diff options
Diffstat (limited to 'include/uapi/drm')
-rw-r--r-- | include/uapi/drm/amdgpu_drm.h | 42 | ||||
-rw-r--r-- | include/uapi/drm/amdxdna_accel.h | 501 | ||||
-rw-r--r-- | include/uapi/drm/drm.h | 17 | ||||
-rw-r--r-- | include/uapi/drm/drm_fourcc.h | 85 | ||||
-rw-r--r-- | include/uapi/drm/drm_mode.h | 21 | ||||
-rw-r--r-- | include/uapi/drm/etnaviv_drm.h | 5 | ||||
-rw-r--r-- | include/uapi/drm/i915_drm.h | 58 | ||||
-rw-r--r-- | include/uapi/drm/ivpu_accel.h | 162 | ||||
-rw-r--r-- | include/uapi/drm/msm_drm.h | 9 | ||||
-rw-r--r-- | include/uapi/drm/nouveau_drm.h | 29 | ||||
-rw-r--r-- | include/uapi/drm/panfrost_drm.h | 3 | ||||
-rw-r--r-- | include/uapi/drm/panthor_drm.h | 1028 | ||||
-rw-r--r-- | include/uapi/drm/qaic_accel.h | 2 | ||||
-rw-r--r-- | include/uapi/drm/v3d_drm.h | 67 | ||||
-rw-r--r-- | include/uapi/drm/xe_drm.h | 630 |
15 files changed, 2621 insertions, 38 deletions
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 96e32dafd4f0..25d5c6e90a99 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -171,6 +171,8 @@ extern "C" { * may override the MTYPE selected in AMDGPU_VA_OP_MAP. */ #define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) +/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ +#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) struct drm_amdgpu_gem_create_in { /** the requested memory size */ @@ -392,7 +394,7 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_NUM_BANKS_SHIFT 21 #define AMDGPU_TILING_NUM_BANKS_MASK 0x3 -/* GFX9 and later: */ +/* GFX9 - GFX11: */ #define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 #define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f #define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 @@ -406,6 +408,24 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_SCANOUT_SHIFT 63 #define AMDGPU_TILING_SCANOUT_MASK 0x1 +/* GFX12 and later: */ +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 +/* These are DCC recompression settings for memory management: */ +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ +/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata + * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */ +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 +/* bit gap */ +#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 + /* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) @@ -743,6 +763,16 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow { #define AMDGPU_IDS_FLAGS_TMZ 0x4 #define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 +/* + * Query h/w info: Flag identifying VF/PF/PT mode + * + */ +#define AMDGPU_IDS_FLAGS_MODE_MASK 0x300 +#define AMDGPU_IDS_FLAGS_MODE_SHIFT 0x8 +#define AMDGPU_IDS_FLAGS_MODE_PF 0x0 +#define AMDGPU_IDS_FLAGS_MODE_VF 0x1 +#define AMDGPU_IDS_FLAGS_MODE_PT 0x2 + /* indicate if acceleration can be working */ #define AMDGPU_INFO_ACCEL_WORKING 0x00 /* get the crtc_id from the mode object id? */ @@ -1268,6 +1298,16 @@ struct drm_amdgpu_info_gpuvm_fault { #define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ + +/* FIXME wrong namespace! */ +struct drm_color_ctm_3x4 { + /* + * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude + * (not two's complement!) format. + */ + __u64 matrix[12]; +}; #if defined(__cplusplus) } diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h new file mode 100644 index 000000000000..a706ead39082 --- /dev/null +++ b/include/uapi/drm/amdxdna_accel.h @@ -0,0 +1,501 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _UAPI_AMDXDNA_ACCEL_H_ +#define _UAPI_AMDXDNA_ACCEL_H_ + +#include <linux/stddef.h> +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define AMDXDNA_INVALID_CMD_HANDLE (~0UL) +#define AMDXDNA_INVALID_ADDR (~0UL) +#define AMDXDNA_INVALID_CTX_HANDLE 0 +#define AMDXDNA_INVALID_BO_HANDLE 0 +#define AMDXDNA_INVALID_FENCE_HANDLE 0 + +enum amdxdna_device_type { + AMDXDNA_DEV_TYPE_UNKNOWN = -1, + AMDXDNA_DEV_TYPE_KMQ, +}; + +enum amdxdna_drm_ioctl_id { + DRM_AMDXDNA_CREATE_HWCTX, + DRM_AMDXDNA_DESTROY_HWCTX, + DRM_AMDXDNA_CONFIG_HWCTX, + DRM_AMDXDNA_CREATE_BO, + DRM_AMDXDNA_GET_BO_INFO, + DRM_AMDXDNA_SYNC_BO, + DRM_AMDXDNA_EXEC_CMD, + DRM_AMDXDNA_GET_INFO, + DRM_AMDXDNA_SET_STATE, +}; + +/** + * struct qos_info - QoS information for driver. + * @gops: Giga operations per second. + * @fps: Frames per second. + * @dma_bandwidth: DMA bandwidtha. + * @latency: Frame response latency. + * @frame_exec_time: Frame execution time. + * @priority: Request priority. + * + * User program can provide QoS hints to driver. + */ +struct amdxdna_qos_info { + __u32 gops; + __u32 fps; + __u32 dma_bandwidth; + __u32 latency; + __u32 frame_exec_time; + __u32 priority; +}; + +/** + * struct amdxdna_drm_create_hwctx - Create hardware context. + * @ext: MBZ. + * @ext_flags: MBZ. + * @qos_p: Address of QoS info. + * @umq_bo: BO handle for user mode queue(UMQ). + * @log_buf_bo: BO handle for log buffer. + * @max_opc: Maximum operations per cycle. + * @num_tiles: Number of AIE tiles. + * @mem_size: Size of AIE tile memory. + * @umq_doorbell: Returned offset of doorbell associated with UMQ. + * @handle: Returned hardware context handle. + * @syncobj_handle: Returned syncobj handle for command completion. + */ +struct amdxdna_drm_create_hwctx { + __u64 ext; + __u64 ext_flags; + __u64 qos_p; + __u32 umq_bo; + __u32 log_buf_bo; + __u32 max_opc; + __u32 num_tiles; + __u32 mem_size; + __u32 umq_doorbell; + __u32 handle; + __u32 syncobj_handle; +}; + +/** + * struct amdxdna_drm_destroy_hwctx - Destroy hardware context. + * @handle: Hardware context handle. + * @pad: MBZ. + */ +struct amdxdna_drm_destroy_hwctx { + __u32 handle; + __u32 pad; +}; + +/** + * struct amdxdna_cu_config - configuration for one CU + * @cu_bo: CU configuration buffer bo handle. + * @cu_func: Function of a CU. + * @pad: MBZ. + */ +struct amdxdna_cu_config { + __u32 cu_bo; + __u8 cu_func; + __u8 pad[3]; +}; + +/** + * struct amdxdna_hwctx_param_config_cu - configuration for CUs in hardware context + * @num_cus: Number of CUs to configure. + * @pad: MBZ. + * @cu_configs: Array of CU configurations of struct amdxdna_cu_config. + */ +struct amdxdna_hwctx_param_config_cu { + __u16 num_cus; + __u16 pad[3]; + struct amdxdna_cu_config cu_configs[] __counted_by(num_cus); +}; + +enum amdxdna_drm_config_hwctx_param { + DRM_AMDXDNA_HWCTX_CONFIG_CU, + DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF, + DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF, +}; + +/** + * struct amdxdna_drm_config_hwctx - Configure hardware context. + * @handle: hardware context handle. + * @param_type: Value in enum amdxdna_drm_config_hwctx_param. Specifies the + * structure passed in via param_val. + * @param_val: A structure specified by the param_type struct member. + * @param_val_size: Size of the parameter buffer pointed to by the param_val. + * If param_val is not a pointer, driver can ignore this. + * @pad: MBZ. + * + * Note: if the param_val is a pointer pointing to a buffer, the maximum size + * of the buffer is 4KiB(PAGE_SIZE). + */ +struct amdxdna_drm_config_hwctx { + __u32 handle; + __u32 param_type; + __u64 param_val; + __u32 param_val_size; + __u32 pad; +}; + +enum amdxdna_bo_type { + AMDXDNA_BO_INVALID = 0, + AMDXDNA_BO_SHMEM, + AMDXDNA_BO_DEV_HEAP, + AMDXDNA_BO_DEV, + AMDXDNA_BO_CMD, +}; + +/** + * struct amdxdna_drm_create_bo - Create a buffer object. + * @flags: Buffer flags. MBZ. + * @vaddr: User VA of buffer if applied. MBZ. + * @size: Size in bytes. + * @type: Buffer type. + * @handle: Returned DRM buffer object handle. + */ +struct amdxdna_drm_create_bo { + __u64 flags; + __u64 vaddr; + __u64 size; + __u32 type; + __u32 handle; +}; + +/** + * struct amdxdna_drm_get_bo_info - Get buffer object information. + * @ext: MBZ. + * @ext_flags: MBZ. + * @handle: DRM buffer object handle. + * @pad: MBZ. + * @map_offset: Returned DRM fake offset for mmap(). + * @vaddr: Returned user VA of buffer. 0 in case user needs mmap(). + * @xdna_addr: Returned XDNA device virtual address. + */ +struct amdxdna_drm_get_bo_info { + __u64 ext; + __u64 ext_flags; + __u32 handle; + __u32 pad; + __u64 map_offset; + __u64 vaddr; + __u64 xdna_addr; +}; + +/** + * struct amdxdna_drm_sync_bo - Sync buffer object. + * @handle: Buffer object handle. + * @direction: Direction of sync, can be from device or to device. + * @offset: Offset in the buffer to sync. + * @size: Size in bytes. + */ +struct amdxdna_drm_sync_bo { + __u32 handle; +#define SYNC_DIRECT_TO_DEVICE 0U +#define SYNC_DIRECT_FROM_DEVICE 1U + __u32 direction; + __u64 offset; + __u64 size; +}; + +enum amdxdna_cmd_type { + AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0, + AMDXDNA_CMD_SUBMIT_DEPENDENCY, + AMDXDNA_CMD_SUBMIT_SIGNAL, +}; + +/** + * struct amdxdna_drm_exec_cmd - Execute command. + * @ext: MBZ. + * @ext_flags: MBZ. + * @hwctx: Hardware context handle. + * @type: One of command type in enum amdxdna_cmd_type. + * @cmd_handles: Array of command handles or the command handle itself + * in case of just one. + * @args: Array of arguments for all command handles. + * @cmd_count: Number of command handles in the cmd_handles array. + * @arg_count: Number of arguments in the args array. + * @seq: Returned sequence number for this command. + */ +struct amdxdna_drm_exec_cmd { + __u64 ext; + __u64 ext_flags; + __u32 hwctx; + __u32 type; + __u64 cmd_handles; + __u64 args; + __u32 cmd_count; + __u32 arg_count; + __u64 seq; +}; + +/** + * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware + * @buffer: The user space buffer that will return the AIE status. + * @buffer_size: The size of the user space buffer. + * @cols_filled: A bitmap of AIE columns whose data has been returned in the buffer. + */ +struct amdxdna_drm_query_aie_status { + __u64 buffer; /* out */ + __u32 buffer_size; /* in */ + __u32 cols_filled; /* out */ +}; + +/** + * struct amdxdna_drm_query_aie_version - Query the version of the AIE hardware + * @major: The major version number. + * @minor: The minor version number. + */ +struct amdxdna_drm_query_aie_version { + __u32 major; /* out */ + __u32 minor; /* out */ +}; + +/** + * struct amdxdna_drm_query_aie_tile_metadata - Query the metadata of AIE tile (core, mem, shim) + * @row_count: The number of rows. + * @row_start: The starting row number. + * @dma_channel_count: The number of dma channels. + * @lock_count: The number of locks. + * @event_reg_count: The number of events. + * @pad: Structure padding. + */ +struct amdxdna_drm_query_aie_tile_metadata { + __u16 row_count; + __u16 row_start; + __u16 dma_channel_count; + __u16 lock_count; + __u16 event_reg_count; + __u16 pad[3]; +}; + +/** + * struct amdxdna_drm_query_aie_metadata - Query the metadata of the AIE hardware + * @col_size: The size of a column in bytes. + * @cols: The total number of columns. + * @rows: The total number of rows. + * @version: The version of the AIE hardware. + * @core: The metadata for all core tiles. + * @mem: The metadata for all mem tiles. + * @shim: The metadata for all shim tiles. + */ +struct amdxdna_drm_query_aie_metadata { + __u32 col_size; + __u16 cols; + __u16 rows; + struct amdxdna_drm_query_aie_version version; + struct amdxdna_drm_query_aie_tile_metadata core; + struct amdxdna_drm_query_aie_tile_metadata mem; + struct amdxdna_drm_query_aie_tile_metadata shim; +}; + +/** + * struct amdxdna_drm_query_clock - Metadata for a clock + * @name: The clock name. + * @freq_mhz: The clock frequency. + * @pad: Structure padding. + */ +struct amdxdna_drm_query_clock { + __u8 name[16]; + __u32 freq_mhz; + __u32 pad; +}; + +/** + * struct amdxdna_drm_query_clock_metadata - Query metadata for clocks + * @mp_npu_clock: The metadata for MP-NPU clock. + * @h_clock: The metadata for H clock. + */ +struct amdxdna_drm_query_clock_metadata { + struct amdxdna_drm_query_clock mp_npu_clock; + struct amdxdna_drm_query_clock h_clock; +}; + +enum amdxdna_sensor_type { + AMDXDNA_SENSOR_TYPE_POWER +}; + +/** + * struct amdxdna_drm_query_sensor - The data for single sensor. + * @label: The name for a sensor. + * @input: The current value of the sensor. + * @max: The maximum value possible for the sensor. + * @average: The average value of the sensor. + * @highest: The highest recorded sensor value for this driver load for the sensor. + * @status: The sensor status. + * @units: The sensor units. + * @unitm: Translates value member variables into the correct unit via (pow(10, unitm) * value). + * @type: The sensor type from enum amdxdna_sensor_type. + * @pad: Structure padding. + */ +struct amdxdna_drm_query_sensor { + __u8 label[64]; + __u32 input; + __u32 max; + __u32 average; + __u32 highest; + __u8 status[64]; + __u8 units[16]; + __s8 unitm; + __u8 type; + __u8 pad[6]; +}; + +/** + * struct amdxdna_drm_query_hwctx - The data for single context. + * @context_id: The ID for this context. + * @start_col: The starting column for the partition assigned to this context. + * @num_col: The number of columns in the partition assigned to this context. + * @pad: Structure padding. + * @pid: The Process ID of the process that created this context. + * @command_submissions: The number of commands submitted to this context. + * @command_completions: The number of commands completed by this context. + * @migrations: The number of times this context has been moved to a different partition. + * @preemptions: The number of times this context has been preempted by another context in the + * same partition. + * @errors: The errors for this context. + */ +struct amdxdna_drm_query_hwctx { + __u32 context_id; + __u32 start_col; + __u32 num_col; + __u32 pad; + __s64 pid; + __u64 command_submissions; + __u64 command_completions; + __u64 migrations; + __u64 preemptions; + __u64 errors; +}; + +enum amdxdna_power_mode_type { + POWER_MODE_DEFAULT, /* Fallback to calculated DPM */ + POWER_MODE_LOW, /* Set frequency to lowest DPM */ + POWER_MODE_MEDIUM, /* Set frequency to medium DPM */ + POWER_MODE_HIGH, /* Set frequency to highest DPM */ + POWER_MODE_TURBO, /* Maximum power */ +}; + +/** + * struct amdxdna_drm_get_power_mode - Get the configured power mode + * @power_mode: The mode type from enum amdxdna_power_mode_type + * @pad: Structure padding. + */ +struct amdxdna_drm_get_power_mode { + __u8 power_mode; + __u8 pad[7]; +}; + +/** + * struct amdxdna_drm_query_firmware_version - Query the firmware version + * @major: The major version number + * @minor: The minor version number + * @patch: The patch level version number + * @build: The build ID + */ +struct amdxdna_drm_query_firmware_version { + __u32 major; /* out */ + __u32 minor; /* out */ + __u32 patch; /* out */ + __u32 build; /* out */ +}; + +enum amdxdna_drm_get_param { + DRM_AMDXDNA_QUERY_AIE_STATUS, + DRM_AMDXDNA_QUERY_AIE_METADATA, + DRM_AMDXDNA_QUERY_AIE_VERSION, + DRM_AMDXDNA_QUERY_CLOCK_METADATA, + DRM_AMDXDNA_QUERY_SENSORS, + DRM_AMDXDNA_QUERY_HW_CONTEXTS, + DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8, + DRM_AMDXDNA_GET_POWER_MODE, +}; + +/** + * struct amdxdna_drm_get_info - Get some information from the AIE hardware. + * @param: Value in enum amdxdna_drm_get_param. Specifies the structure passed in the buffer. + * @buffer_size: Size of the input buffer. Size needed/written by the kernel. + * @buffer: A structure specified by the param struct member. + */ +struct amdxdna_drm_get_info { + __u32 param; /* in */ + __u32 buffer_size; /* in/out */ + __u64 buffer; /* in/out */ +}; + +enum amdxdna_drm_set_param { + DRM_AMDXDNA_SET_POWER_MODE, + DRM_AMDXDNA_WRITE_AIE_MEM, + DRM_AMDXDNA_WRITE_AIE_REG, +}; + +/** + * struct amdxdna_drm_set_state - Set the state of the AIE hardware. + * @param: Value in enum amdxdna_drm_set_param. + * @buffer_size: Size of the input param. + * @buffer: Pointer to the input param. + */ +struct amdxdna_drm_set_state { + __u32 param; /* in */ + __u32 buffer_size; /* in */ + __u64 buffer; /* in */ +}; + +/** + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware + * @power_mode: The sensor type from enum amdxdna_power_mode_type + * @pad: MBZ. + */ +struct amdxdna_drm_set_power_mode { + __u8 power_mode; + __u8 pad[7]; +}; + +#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \ + struct amdxdna_drm_create_hwctx) + +#define DRM_IOCTL_AMDXDNA_DESTROY_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_DESTROY_HWCTX, \ + struct amdxdna_drm_destroy_hwctx) + +#define DRM_IOCTL_AMDXDNA_CONFIG_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CONFIG_HWCTX, \ + struct amdxdna_drm_config_hwctx) + +#define DRM_IOCTL_AMDXDNA_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_BO, \ + struct amdxdna_drm_create_bo) + +#define DRM_IOCTL_AMDXDNA_GET_BO_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_BO_INFO, \ + struct amdxdna_drm_get_bo_info) + +#define DRM_IOCTL_AMDXDNA_SYNC_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, \ + struct amdxdna_drm_sync_bo) + +#define DRM_IOCTL_AMDXDNA_EXEC_CMD \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, \ + struct amdxdna_drm_exec_cmd) + +#define DRM_IOCTL_AMDXDNA_GET_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \ + struct amdxdna_drm_get_info) + +#define DRM_IOCTL_AMDXDNA_SET_STATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \ + struct amdxdna_drm_set_state) + +#if defined(__cplusplus) +} /* extern c end */ +#endif + +#endif /* _UAPI_AMDXDNA_ACCEL_H_ */ diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 16122819edfe..7fba37b94401 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -1024,6 +1024,13 @@ struct drm_crtc_queue_sequence { __u64 user_data; /* user data passed to event */ }; +#define DRM_CLIENT_NAME_MAX_LEN 64 +struct drm_set_client_name { + __u64 name_len; + __u64 name; +}; + + #if defined(__cplusplus) } #endif @@ -1288,6 +1295,16 @@ extern "C" { */ #define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) +/** + * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file + * + * Having a name allows for easier tracking and debugging. + * The length of the name (without null ending char) must be + * <= DRM_CLIENT_NAME_MAX_LEN. + * The call will fail if the name contains whitespaces or non-printable chars. + */ +#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 84d502e42961..e41a3cec6a9e 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -421,6 +421,7 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_ARM 0x08 #define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09 #define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a +#define DRM_FORMAT_MOD_VENDOR_MTK 0x0b /* add more to the end as needed */ @@ -703,6 +704,31 @@ extern "C" { #define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15) /* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on integrated graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. + */ +#define I915_FORMAT_MOD_4_TILED_LNL_CCS fourcc_mod_code(INTEL, 16) + +/* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on discrete graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. The GEM object must be stored in + * contiguous memory with a size aligned to 64KB + */ +#define I915_FORMAT_MOD_4_TILED_BMG_CCS fourcc_mod_code(INTEL, 17) + +/* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * * Macroblocks are laid in a Z-shape, and each pixel data is following the @@ -1428,6 +1454,46 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) */ #define AMLOGIC_FBC_OPTION_MEM_SAVING (1ULL << 0) +/* MediaTek modifiers + * Bits Parameter Notes + * ----- ------------------------ --------------------------------------------- + * 7: 0 TILE LAYOUT Values are MTK_FMT_MOD_TILE_* + * 15: 8 COMPRESSION Values are MTK_FMT_MOD_COMPRESS_* + * 23:16 10 BIT LAYOUT Values are MTK_FMT_MOD_10BIT_LAYOUT_* + * + */ + +#define DRM_FORMAT_MOD_MTK(__flags) fourcc_mod_code(MTK, __flags) + +/* + * MediaTek Tiled Modifier + * The lowest 8 bits of the modifier is used to specify the tiling + * layout. Only the 16L_32S tiling is used for now, but we define an + * "untiled" version and leave room for future expansion. + */ +#define MTK_FMT_MOD_TILE_MASK 0xf +#define MTK_FMT_MOD_TILE_NONE 0x0 +#define MTK_FMT_MOD_TILE_16L32S 0x1 + +/* + * Bits 8-15 specify compression options + */ +#define MTK_FMT_MOD_COMPRESS_MASK (0xf << 8) +#define MTK_FMT_MOD_COMPRESS_NONE (0x0 << 8) +#define MTK_FMT_MOD_COMPRESS_V1 (0x1 << 8) + +/* + * Bits 16-23 specify how the bits of 10 bit formats are + * stored out in memory + */ +#define MTK_FMT_MOD_10BIT_LAYOUT_MASK (0xf << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_PACKED (0x0 << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_LSBTILED (0x1 << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_LSBRASTER (0x2 << 16) + +/* alias for the most common tiling format */ +#define DRM_FORMAT_MOD_MTK_16L_32S_TILE DRM_FORMAT_MOD_MTK(MTK_FMT_MOD_TILE_16L32S) + /* * AMD modifiers * @@ -1476,6 +1542,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_TILE_VER_GFX10 2 #define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3 #define AMD_FMT_MOD_TILE_VER_GFX11 4 +#define AMD_FMT_MOD_TILE_VER_GFX12 5 /* * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical @@ -1486,13 +1553,31 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) /* * 64K_D for non-32 bpp is the same for GFX9/GFX10/GFX10_RBPLUS and hence has * GFX9 as canonical version. + * + * 64K_D_2D on GFX12 is identical to 64K_D on GFX11. */ #define AMD_FMT_MOD_TILE_GFX9_64K_D 10 +#define AMD_FMT_MOD_TILE_GFX9_4K_D_X 22 #define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25 #define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26 #define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27 #define AMD_FMT_MOD_TILE_GFX11_256K_R_X 31 +/* Gfx12 swizzle modes: + * 0 - LINEAR + * 1 - 256B_2D - 2D block dimensions + * 2 - 4KB_2D + * 3 - 64KB_2D + * 4 - 256KB_2D + * 5 - 4KB_3D - 3D block dimensions + * 6 - 64KB_3D + * 7 - 256KB_3D + */ +#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1 +#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2 +#define AMD_FMT_MOD_TILE_GFX12_64K_2D 3 +#define AMD_FMT_MOD_TILE_GFX12_256K_2D 4 + #define AMD_FMT_MOD_DCC_BLOCK_64B 0 #define AMD_FMT_MOD_DCC_BLOCK_128B 1 #define AMD_FMT_MOD_DCC_BLOCK_256B 2 diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 7040e7ea80c7..c082810c08a8 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -846,14 +846,6 @@ struct drm_color_ctm { __u64 matrix[9]; }; -struct drm_color_ctm_3x4 { - /* - * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude - * (not two's complement!) format. - */ - __u64 matrix[12]; -}; - struct drm_color_lut { /* * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and @@ -866,6 +858,19 @@ struct drm_color_lut { }; /** + * struct drm_plane_size_hint - Plane size hints + * @width: The width of the plane in pixel + * @height: The height of the plane in pixel + * + * The plane SIZE_HINTS property blob contains an + * array of struct drm_plane_size_hint. + */ +struct drm_plane_size_hint { + __u16 width; + __u16 height; +}; + +/** * struct hdr_metadata_infoframe - HDR Metadata Infoframe Data. * * HDR Metadata Infoframe as per CTA 861.G spec. This is expected diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h index d87410a8443a..af024d90453d 100644 --- a/include/uapi/drm/etnaviv_drm.h +++ b/include/uapi/drm/etnaviv_drm.h @@ -77,11 +77,6 @@ struct drm_etnaviv_timespec { #define ETNAVIV_PARAM_GPU_PRODUCT_ID 0x1c #define ETNAVIV_PARAM_GPU_CUSTOMER_ID 0x1d #define ETNAVIV_PARAM_GPU_ECO_ID 0x1e -#define ETNAVIV_PARAM_GPU_NN_CORE_COUNT 0x1f -#define ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE 0x20 -#define ETNAVIV_PARAM_GPU_TP_CORE_COUNT 0x21 -#define ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE 0x22 -#define ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE 0x23 #define ETNA_MAX_PIPES 4 diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2ee338860b7e..535cb68fdb5c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -806,6 +806,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PXP_STATUS 58 +/* + * Query if kernel allows marking a context to send a Freq hint to SLPC. This + * will enable use of the strategies allowed by the SLPC algorithm. + */ +#define I915_PARAM_HAS_CONTEXT_FREQ_HINT 59 + /* Must be kept compact -- no holes and well documented */ /** @@ -2148,6 +2154,24 @@ struct drm_i915_gem_context_param { * -EIO: The firmware did not succeed in creating the protected context. */ #define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd + +/* + * I915_CONTEXT_PARAM_LOW_LATENCY: + * + * Mark this context as a low latency workload which requires aggressive GT + * frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel + * supports this per context flag. + */ +#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2549,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. @@ -2623,19 +2665,29 @@ struct drm_i915_reg_read { * */ +/* + * struct drm_i915_reset_stats - Return global reset and other context stats + * + * Driver keeps few stats for each contexts and also global reset count. + * This struct can be used to query those stats. + */ struct drm_i915_reset_stats { + /** @ctx_id: ID of the requested context */ __u32 ctx_id; + + /** @flags: MBZ */ __u32 flags; - /* All resets since boot/module reload, for all contexts */ + /** @reset_count: All resets since boot/module reload, for all contexts */ __u32 reset_count; - /* Number of batches lost when active in GPU, for this context */ + /** @batch_active: Number of batches lost when active in GPU, for this context */ __u32 batch_active; - /* Number of batches lost pending for execution, for this context */ + /** @batch_pending: Number of batches lost pending for execution, for this context */ __u32 batch_pending; + /** @pad: MBZ */ __u32 pad; }; diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 19a13468eca5..746c43bd3eb6 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __UAPI_IVPU_DRM_H__ @@ -12,15 +12,19 @@ extern "C" { #endif -#define DRM_IVPU_DRIVER_MAJOR 1 -#define DRM_IVPU_DRIVER_MINOR 0 - #define DRM_IVPU_GET_PARAM 0x00 #define DRM_IVPU_SET_PARAM 0x01 #define DRM_IVPU_BO_CREATE 0x02 #define DRM_IVPU_BO_INFO 0x03 #define DRM_IVPU_SUBMIT 0x05 #define DRM_IVPU_BO_WAIT 0x06 +#define DRM_IVPU_METRIC_STREAMER_START 0x07 +#define DRM_IVPU_METRIC_STREAMER_STOP 0x08 +#define DRM_IVPU_METRIC_STREAMER_GET_DATA 0x09 +#define DRM_IVPU_METRIC_STREAMER_GET_INFO 0x0a +#define DRM_IVPU_CMDQ_CREATE 0x0b +#define DRM_IVPU_CMDQ_DESTROY 0x0c +#define DRM_IVPU_CMDQ_SUBMIT 0x0d #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -40,6 +44,31 @@ extern "C" { #define DRM_IOCTL_IVPU_BO_WAIT \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait) +#define DRM_IOCTL_IVPU_METRIC_STREAMER_START \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_START, \ + struct drm_ivpu_metric_streamer_start) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_STOP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_STOP, \ + struct drm_ivpu_metric_streamer_stop) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_DATA, \ + struct drm_ivpu_metric_streamer_get_data) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_INFO, \ + struct drm_ivpu_metric_streamer_get_data) + +#define DRM_IOCTL_IVPU_CMDQ_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_CREATE, struct drm_ivpu_cmdq_create) + +#define DRM_IOCTL_IVPU_CMDQ_DESTROY \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_DESTROY, struct drm_ivpu_cmdq_destroy) + +#define DRM_IOCTL_IVPU_CMDQ_SUBMIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_SUBMIT, struct drm_ivpu_cmdq_submit) + /** * DOC: contexts * @@ -90,6 +119,13 @@ extern "C" { * accessible by hardware DMA. */ #define DRM_IVPU_CAP_DMA_MEMORY_RANGE 2 +/** + * DRM_IVPU_CAP_MANAGE_CMDQ + * + * Driver supports explicit command queue operations like command queue create, + * command queue destroy and submit job on specific command queue. + */ +#define DRM_IVPU_CAP_MANAGE_CMDQ 3 /** * struct drm_ivpu_param - Get/Set VPU parameters @@ -241,7 +277,7 @@ struct drm_ivpu_bo_info { /* drm_ivpu_submit engines */ #define DRM_IVPU_ENGINE_COMPUTE 0 -#define DRM_IVPU_ENGINE_COPY 1 +#define DRM_IVPU_ENGINE_COPY 1 /* Deprecated */ /** * struct drm_ivpu_submit - Submit commands to the VPU @@ -272,10 +308,6 @@ struct drm_ivpu_submit { * %DRM_IVPU_ENGINE_COMPUTE: * * Performs Deep Learning Neural Compute Inference Operations - * - * %DRM_IVPU_ENGINE_COPY: - * - * Performs memory copy operations to/from system memory allocated for VPU */ __u32 engine; @@ -303,6 +335,44 @@ struct drm_ivpu_submit { __u32 priority; }; +/** + * struct drm_ivpu_cmdq_submit - Submit commands to the VPU using explicit command queue + * + * Execute a single command buffer on a given command queue. + * Handles to all referenced buffer objects have to be provided in @buffers_ptr. + * + * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl. + */ +struct drm_ivpu_cmdq_submit { + /** + * @buffers_ptr: + * + * A pointer to an u32 array of GEM handles of the BOs required for this job. + * The number of elements in the array must be equal to the value given by @buffer_count. + * + * The first BO is the command buffer. The rest of array has to contain all + * BOs referenced from the command buffer. + */ + __u64 buffers_ptr; + + /** @buffer_count: Number of elements in the @buffers_ptr */ + __u32 buffer_count; + + /** @cmdq_id: ID for the command queue where job will be submitted */ + __u32 cmdq_id; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** + * @commands_offset: + * + * Offset inside the first buffer in @buffers_ptr containing commands + * to be executed. The offset has to be 8-byte aligned. + */ + __u32 commands_offset; +}; + /* drm_ivpu_bo_wait job status codes */ #define DRM_IVPU_JOB_STATUS_SUCCESS 0 #define DRM_IVPU_JOB_STATUS_ABORTED 256 @@ -336,6 +406,80 @@ struct drm_ivpu_bo_wait { __u32 pad; }; +/** + * struct drm_ivpu_metric_streamer_start - Start collecting metric data + */ +struct drm_ivpu_metric_streamer_start { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; + /** @sampling_period_ns: Sampling period in nanoseconds */ + __u64 sampling_period_ns; + /** + * @read_period_samples: + * + * Number of samples after which user space will try to read the data. + * Reading the data after significantly longer period may cause data loss. + */ + __u32 read_period_samples; + /** @sample_size: Returned size of a single sample in bytes */ + __u32 sample_size; + /** @max_data_size: Returned max @data_size from %DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA */ + __u32 max_data_size; +}; + +/** + * struct drm_ivpu_metric_streamer_get_data - Copy collected metric data + */ +struct drm_ivpu_metric_streamer_get_data { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; + /** @buffer_ptr: A pointer to a destination for the copied data */ + __u64 buffer_ptr; + /** @buffer_size: Size of the destination buffer */ + __u64 buffer_size; + /** + * @data_size: Returned size of copied metric data + * + * If the @buffer_size is zero, returns the amount of data ready to be copied. + */ + __u64 data_size; +}; + +/** + * struct drm_ivpu_cmdq_create - Create command queue for job submission + */ +struct drm_ivpu_cmdq_create { + /** @cmdq_id: Returned ID of created command queue */ + __u32 cmdq_id; + /** + * @priority: + * + * Priority to be set for related job command queue, can be one of the following: + * %DRM_IVPU_JOB_PRIORITY_DEFAULT + * %DRM_IVPU_JOB_PRIORITY_IDLE + * %DRM_IVPU_JOB_PRIORITY_NORMAL + * %DRM_IVPU_JOB_PRIORITY_FOCUS + * %DRM_IVPU_JOB_PRIORITY_REALTIME + */ + __u32 priority; +}; + +/** + * struct drm_ivpu_cmdq_destroy - Destroy a command queue + */ +struct drm_ivpu_cmdq_destroy { + /** @cmdq_id: ID of command queue to destroy */ + __u32 cmdq_id; +}; + +/** + * struct drm_ivpu_metric_streamer_stop - Stop collecting metric data + */ +struct drm_ivpu_metric_streamer_stop { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index d8a6b3472760..2342cb90857e 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -87,6 +87,10 @@ struct drm_msm_timespec { #define MSM_PARAM_VA_START 0x0e /* RO: start of valid GPU iova range */ #define MSM_PARAM_VA_SIZE 0x0f /* RO: size of valid GPU iova range (bytes) */ #define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */ +#define MSM_PARAM_RAYTRACING 0x11 /* RO */ +#define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */ +#define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */ +#define MSM_PARAM_UCHE_TRAP_BASE 0x14 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # @@ -344,7 +348,10 @@ struct drm_msm_gem_madvise { * backwards compatibility as a "default" submitqueue */ -#define MSM_SUBMITQUEUE_FLAGS (0) +#define MSM_SUBMITQUEUE_ALLOW_PREEMPT 0x00000001 +#define MSM_SUBMITQUEUE_FLAGS ( \ + MSM_SUBMITQUEUE_ALLOW_PREEMPT | \ + 0) /* * The submitqueue priority should be between 0 and MSM_PARAM_PRIORITIES-1, diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index cd84227f1b42..dd87f8f30793 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -68,11 +68,28 @@ extern "C" { */ #define NOUVEAU_GETPARAM_VRAM_USED 19 +/* + * NOUVEAU_GETPARAM_HAS_VMA_TILEMODE + * + * Query whether tile mode and PTE kind are accepted with VM allocs or not. + */ +#define NOUVEAU_GETPARAM_HAS_VMA_TILEMODE 20 + struct drm_nouveau_getparam { __u64 param; __u64 value; }; +/* + * Those are used to support selecting the main engine used on Kepler. + * This goes into drm_nouveau_channel_alloc::tt_ctxdma_handle + */ +#define NOUVEAU_FIFO_ENGINE_GR 0x01 +#define NOUVEAU_FIFO_ENGINE_VP 0x02 +#define NOUVEAU_FIFO_ENGINE_PPP 0x04 +#define NOUVEAU_FIFO_ENGINE_BSP 0x08 +#define NOUVEAU_FIFO_ENGINE_CE 0x30 + struct drm_nouveau_channel_alloc { __u32 fb_ctxdma_handle; __u32 tt_ctxdma_handle; @@ -95,6 +112,18 @@ struct drm_nouveau_channel_free { __s32 channel; }; +struct drm_nouveau_notifierobj_alloc { + __u32 channel; + __u32 handle; + __u32 size; + __u32 offset; +}; + +struct drm_nouveau_gpuobj_free { + __s32 channel; + __u32 handle; +}; + #define NOUVEAU_GEM_DOMAIN_CPU (1 << 0) #define NOUVEAU_GEM_DOMAIN_VRAM (1 << 1) #define NOUVEAU_GEM_DOMAIN_GART (1 << 2) diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 9f231d40a146..568724be6628 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -40,6 +40,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_PERFCNT_DUMP DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump) #define PANFROST_JD_REQ_FS (1 << 0) +#define PANFROST_JD_REQ_CYCLE_COUNT (1 << 1) /** * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D * engine. @@ -172,6 +173,8 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_NR_CORE_GROUPS, DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, DRM_PANFROST_PARAM_AFBC_FEATURES, + DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP, + DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, }; struct drm_panfrost_get_param { diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h new file mode 100644 index 000000000000..97e2c4510e69 --- /dev/null +++ b/include/uapi/drm/panthor_drm.h @@ -0,0 +1,1028 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright (C) 2023 Collabora ltd. */ +#ifndef _PANTHOR_DRM_H_ +#define _PANTHOR_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: Introduction + * + * This documentation describes the Panthor IOCTLs. + * + * Just a few generic rules about the data passed to the Panthor IOCTLs: + * + * - Structures must be aligned on 64-bit/8-byte. If the object is not + * naturally aligned, a padding field must be added. + * - Fields must be explicitly aligned to their natural type alignment with + * pad[0..N] fields. + * - All padding fields will be checked by the driver to make sure they are + * zeroed. + * - Flags can be added, but not removed/replaced. + * - New fields can be added to the main structures (the structures + * directly passed to the ioctl). Those fields can be added at the end of + * the structure, or replace existing padding fields. Any new field being + * added must preserve the behavior that existed before those fields were + * added when a value of zero is passed. + * - New fields can be added to indirect objects (objects pointed by the + * main structure), iff those objects are passed a size to reflect the + * size known by the userspace driver (see drm_panthor_obj_array::stride + * or drm_panthor_dev_query::size). + * - If the kernel driver is too old to know some fields, those will be + * ignored if zero, and otherwise rejected (and so will be zero on output). + * - If userspace is too old to know some fields, those will be zeroed + * (input) before the structure is parsed by the kernel driver. + * - Each new flag/field addition must come with a driver version update so + * the userspace driver doesn't have to trial and error to know which + * flags are supported. + * - Structures should not contain unions, as this would defeat the + * extensibility of such structures. + * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed + * at the end of the drm_panthor_ioctl_id enum. + */ + +/** + * DOC: MMIO regions exposed to userspace. + * + * .. c:macro:: DRM_PANTHOR_USER_MMIO_OFFSET + * + * File offset for all MMIO regions being exposed to userspace. Don't use + * this value directly, use DRM_PANTHOR_USER_<name>_OFFSET values instead. + * pgoffset passed to mmap2() is an unsigned long, which forces us to use a + * different offset on 32-bit and 64-bit systems. + * + * .. c:macro:: DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET + * + * File offset for the LATEST_FLUSH_ID register. The Userspace driver controls + * GPU cache flushing through CS instructions, but the flush reduction + * mechanism requires a flush_id. This flush_id could be queried with an + * ioctl, but Arm provides a well-isolated register page containing only this + * read-only register, so let's expose this page through a static mmap offset + * and allow direct mapping of this MMIO region so we can avoid the + * user <-> kernel round-trip. + */ +#define DRM_PANTHOR_USER_MMIO_OFFSET_32BIT (1ull << 43) +#define DRM_PANTHOR_USER_MMIO_OFFSET_64BIT (1ull << 56) +#define DRM_PANTHOR_USER_MMIO_OFFSET (sizeof(unsigned long) < 8 ? \ + DRM_PANTHOR_USER_MMIO_OFFSET_32BIT : \ + DRM_PANTHOR_USER_MMIO_OFFSET_64BIT) +#define DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET (DRM_PANTHOR_USER_MMIO_OFFSET | 0) + +/** + * DOC: IOCTL IDs + * + * enum drm_panthor_ioctl_id - IOCTL IDs + * + * Place new ioctls at the end, don't re-order, don't replace or remove entries. + * + * These IDs are not meant to be used directly. Use the DRM_IOCTL_PANTHOR_xxx + * definitions instead. + */ +enum drm_panthor_ioctl_id { + /** @DRM_PANTHOR_DEV_QUERY: Query device information. */ + DRM_PANTHOR_DEV_QUERY = 0, + + /** @DRM_PANTHOR_VM_CREATE: Create a VM. */ + DRM_PANTHOR_VM_CREATE, + + /** @DRM_PANTHOR_VM_DESTROY: Destroy a VM. */ + DRM_PANTHOR_VM_DESTROY, + + /** @DRM_PANTHOR_VM_BIND: Bind/unbind memory to a VM. */ + DRM_PANTHOR_VM_BIND, + + /** @DRM_PANTHOR_VM_GET_STATE: Get VM state. */ + DRM_PANTHOR_VM_GET_STATE, + + /** @DRM_PANTHOR_BO_CREATE: Create a buffer object. */ + DRM_PANTHOR_BO_CREATE, + + /** + * @DRM_PANTHOR_BO_MMAP_OFFSET: Get the file offset to pass to + * mmap to map a GEM object. + */ + DRM_PANTHOR_BO_MMAP_OFFSET, + + /** @DRM_PANTHOR_GROUP_CREATE: Create a scheduling group. */ + DRM_PANTHOR_GROUP_CREATE, + + /** @DRM_PANTHOR_GROUP_DESTROY: Destroy a scheduling group. */ + DRM_PANTHOR_GROUP_DESTROY, + + /** + * @DRM_PANTHOR_GROUP_SUBMIT: Submit jobs to queues belonging + * to a specific scheduling group. + */ + DRM_PANTHOR_GROUP_SUBMIT, + + /** @DRM_PANTHOR_GROUP_GET_STATE: Get the state of a scheduling group. */ + DRM_PANTHOR_GROUP_GET_STATE, + + /** @DRM_PANTHOR_TILER_HEAP_CREATE: Create a tiler heap. */ + DRM_PANTHOR_TILER_HEAP_CREATE, + + /** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */ + DRM_PANTHOR_TILER_HEAP_DESTROY, +}; + +/** + * DOC: IOCTL arguments + */ + +/** + * struct drm_panthor_obj_array - Object array. + * + * This object is used to pass an array of objects whose size is subject to changes in + * future versions of the driver. In order to support this mutability, we pass a stride + * describing the size of the object as known by userspace. + * + * You shouldn't fill drm_panthor_obj_array fields directly. You should instead use + * the DRM_PANTHOR_OBJ_ARRAY() macro that takes care of initializing the stride to + * the object size. + */ +struct drm_panthor_obj_array { + /** @stride: Stride of object struct. Used for versioning. */ + __u32 stride; + + /** @count: Number of objects in the array. */ + __u32 count; + + /** @array: User pointer to an array of objects. */ + __u64 array; +}; + +/** + * DRM_PANTHOR_OBJ_ARRAY() - Initialize a drm_panthor_obj_array field. + * @cnt: Number of elements in the array. + * @ptr: Pointer to the array to pass to the kernel. + * + * Macro initializing a drm_panthor_obj_array based on the object size as known + * by userspace. + */ +#define DRM_PANTHOR_OBJ_ARRAY(cnt, ptr) \ + { .stride = sizeof((ptr)[0]), .count = (cnt), .array = (__u64)(uintptr_t)(ptr) } + +/** + * enum drm_panthor_sync_op_flags - Synchronization operation flags. + */ +enum drm_panthor_sync_op_flags { + /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK: Synchronization handle type mask. */ + DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK = 0xff, + + /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ: Synchronization object type. */ + DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ = 0, + + /** + * @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ: Timeline synchronization + * object type. + */ + DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ = 1, + + /** @DRM_PANTHOR_SYNC_OP_WAIT: Wait operation. */ + DRM_PANTHOR_SYNC_OP_WAIT = 0 << 31, + + /** @DRM_PANTHOR_SYNC_OP_SIGNAL: Signal operation. */ + DRM_PANTHOR_SYNC_OP_SIGNAL = (int)(1u << 31), +}; + +/** + * struct drm_panthor_sync_op - Synchronization operation. + */ +struct drm_panthor_sync_op { + /** @flags: Synchronization operation flags. Combination of DRM_PANTHOR_SYNC_OP values. */ + __u32 flags; + + /** @handle: Sync handle. */ + __u32 handle; + + /** + * @timeline_value: MBZ if + * (flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK) != + * DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ. + */ + __u64 timeline_value; +}; + +/** + * enum drm_panthor_dev_query_type - Query type + * + * Place new types at the end, don't re-order, don't remove or replace. + */ +enum drm_panthor_dev_query_type { + /** @DRM_PANTHOR_DEV_QUERY_GPU_INFO: Query GPU information. */ + DRM_PANTHOR_DEV_QUERY_GPU_INFO = 0, + + /** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */ + DRM_PANTHOR_DEV_QUERY_CSIF_INFO, + + /** @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information. */ + DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO, + + /** + * @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group priorities information. + */ + DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, +}; + +/** + * struct drm_panthor_gpu_info - GPU information + * + * Structure grouping all queryable information relating to the GPU. + */ +struct drm_panthor_gpu_info { + /** @gpu_id : GPU ID. */ + __u32 gpu_id; +#define DRM_PANTHOR_ARCH_MAJOR(x) ((x) >> 28) +#define DRM_PANTHOR_ARCH_MINOR(x) (((x) >> 24) & 0xf) +#define DRM_PANTHOR_ARCH_REV(x) (((x) >> 20) & 0xf) +#define DRM_PANTHOR_PRODUCT_MAJOR(x) (((x) >> 16) & 0xf) +#define DRM_PANTHOR_VERSION_MAJOR(x) (((x) >> 12) & 0xf) +#define DRM_PANTHOR_VERSION_MINOR(x) (((x) >> 4) & 0xff) +#define DRM_PANTHOR_VERSION_STATUS(x) ((x) & 0xf) + + /** @gpu_rev: GPU revision. */ + __u32 gpu_rev; + + /** @csf_id: Command stream frontend ID. */ + __u32 csf_id; +#define DRM_PANTHOR_CSHW_MAJOR(x) (((x) >> 26) & 0x3f) +#define DRM_PANTHOR_CSHW_MINOR(x) (((x) >> 20) & 0x3f) +#define DRM_PANTHOR_CSHW_REV(x) (((x) >> 16) & 0xf) +#define DRM_PANTHOR_MCU_MAJOR(x) (((x) >> 10) & 0x3f) +#define DRM_PANTHOR_MCU_MINOR(x) (((x) >> 4) & 0x3f) +#define DRM_PANTHOR_MCU_REV(x) ((x) & 0xf) + + /** @l2_features: L2-cache features. */ + __u32 l2_features; + + /** @tiler_features: Tiler features. */ + __u32 tiler_features; + + /** @mem_features: Memory features. */ + __u32 mem_features; + + /** @mmu_features: MMU features. */ + __u32 mmu_features; +#define DRM_PANTHOR_MMU_VA_BITS(x) ((x) & 0xff) + + /** @thread_features: Thread features. */ + __u32 thread_features; + + /** @max_threads: Maximum number of threads. */ + __u32 max_threads; + + /** @thread_max_workgroup_size: Maximum workgroup size. */ + __u32 thread_max_workgroup_size; + + /** + * @thread_max_barrier_size: Maximum number of threads that can wait + * simultaneously on a barrier. + */ + __u32 thread_max_barrier_size; + + /** @coherency_features: Coherency features. */ + __u32 coherency_features; + + /** @texture_features: Texture features. */ + __u32 texture_features[4]; + + /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ + __u32 as_present; + + /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ + __u64 shader_present; + + /** @l2_present: Bitmask encoding the L2 caches exposed by the GPU. */ + __u64 l2_present; + + /** @tiler_present: Bitmask encoding the tiler units exposed by the GPU. */ + __u64 tiler_present; + + /** @core_features: Used to discriminate core variants when they exist. */ + __u32 core_features; + + /** @pad: MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_csif_info - Command stream interface information + * + * Structure grouping all queryable information relating to the command stream interface. + */ +struct drm_panthor_csif_info { + /** @csg_slot_count: Number of command stream group slots exposed by the firmware. */ + __u32 csg_slot_count; + + /** @cs_slot_count: Number of command stream slots per group. */ + __u32 cs_slot_count; + + /** @cs_reg_count: Number of command stream registers. */ + __u32 cs_reg_count; + + /** @scoreboard_slot_count: Number of scoreboard slots. */ + __u32 scoreboard_slot_count; + + /** + * @unpreserved_cs_reg_count: Number of command stream registers reserved by + * the kernel driver to call a userspace command stream. + * + * All registers can be used by a userspace command stream, but the + * [cs_slot_count - unpreserved_cs_reg_count .. cs_slot_count] registers are + * used by the kernel when DRM_PANTHOR_IOCTL_GROUP_SUBMIT is called. + */ + __u32 unpreserved_cs_reg_count; + + /** + * @pad: Padding field, set to zero. + */ + __u32 pad; +}; + +/** + * struct drm_panthor_timestamp_info - Timestamp information + * + * Structure grouping all queryable information relating to the GPU timestamp. + */ +struct drm_panthor_timestamp_info { + /** + * @timestamp_frequency: The frequency of the timestamp timer or 0 if + * unknown. + */ + __u64 timestamp_frequency; + + /** @current_timestamp: The current timestamp. */ + __u64 current_timestamp; + + /** @timestamp_offset: The offset of the timestamp timer. */ + __u64 timestamp_offset; +}; + +/** + * struct drm_panthor_group_priorities_info - Group priorities information + * + * Structure grouping all queryable information relating to the allowed group priorities. + */ +struct drm_panthor_group_priorities_info { + /** + * @allowed_mask: Bitmask of the allowed group priorities. + * + * Each bit represents a variant of the enum drm_panthor_group_priority. + */ + __u8 allowed_mask; + + /** @pad: Padding fields, MBZ. */ + __u8 pad[3]; +}; + +/** + * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY + */ +struct drm_panthor_dev_query { + /** @type: the query type (see drm_panthor_dev_query_type). */ + __u32 type; + + /** + * @size: size of the type being queried. + * + * If pointer is NULL, size is updated by the driver to provide the + * output structure size. If pointer is not NULL, the driver will + * only copy min(size, actual_structure_size) bytes to the pointer, + * and update the size accordingly. This allows us to extend query + * types without breaking userspace. + */ + __u32 size; + + /** + * @pointer: user pointer to a query type struct. + * + * Pointer can be NULL, in which case, nothing is copied, but the + * actual structure size is returned. If not NULL, it must point to + * a location that's large enough to hold size bytes. + */ + __u64 pointer; +}; + +/** + * struct drm_panthor_vm_create - Arguments passed to DRM_PANTHOR_IOCTL_VM_CREATE + */ +struct drm_panthor_vm_create { + /** @flags: VM flags, MBZ. */ + __u32 flags; + + /** @id: Returned VM ID. */ + __u32 id; + + /** + * @user_va_range: Size of the VA space reserved for user objects. + * + * The kernel will pick the remaining space to map kernel-only objects to the + * VM (heap chunks, heap context, ring buffers, kernel synchronization objects, + * ...). If the space left for kernel objects is too small, kernel object + * allocation will fail further down the road. One can use + * drm_panthor_gpu_info::mmu_features to extract the total virtual address + * range, and chose a user_va_range that leaves some space to the kernel. + * + * If user_va_range is zero, the kernel will pick a sensible value based on + * TASK_SIZE and the virtual range supported by the GPU MMU (the kernel/user + * split should leave enough VA space for userspace processes to support SVM, + * while still allowing the kernel to map some amount of kernel objects in + * the kernel VA range). The value chosen by the driver will be returned in + * @user_va_range. + * + * User VA space always starts at 0x0, kernel VA space is always placed after + * the user VA range. + */ + __u64 user_va_range; +}; + +/** + * struct drm_panthor_vm_destroy - Arguments passed to DRM_PANTHOR_IOCTL_VM_DESTROY + */ +struct drm_panthor_vm_destroy { + /** @id: ID of the VM to destroy. */ + __u32 id; + + /** @pad: MBZ. */ + __u32 pad; +}; + +/** + * enum drm_panthor_vm_bind_op_flags - VM bind operation flags + */ +enum drm_panthor_vm_bind_op_flags { + /** + * @DRM_PANTHOR_VM_BIND_OP_MAP_READONLY: Map the memory read-only. + * + * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. + */ + DRM_PANTHOR_VM_BIND_OP_MAP_READONLY = 1 << 0, + + /** + * @DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC: Map the memory not-executable. + * + * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. + */ + DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC = 1 << 1, + + /** + * @DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED: Map the memory uncached. + * + * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. + */ + DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED = 1 << 2, + + /** + * @DRM_PANTHOR_VM_BIND_OP_TYPE_MASK: Mask used to determine the type of operation. + */ + DRM_PANTHOR_VM_BIND_OP_TYPE_MASK = (int)(0xfu << 28), + + /** @DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: Map operation. */ + DRM_PANTHOR_VM_BIND_OP_TYPE_MAP = 0 << 28, + + /** @DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: Unmap operation. */ + DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP = 1 << 28, + + /** + * @DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: No VM operation. + * + * Just serves as a synchronization point on a VM queue. + * + * Only valid if %DRM_PANTHOR_VM_BIND_ASYNC is set in drm_panthor_vm_bind::flags, + * and drm_panthor_vm_bind_op::syncs contains at least one element. + */ + DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY = 2 << 28, +}; + +/** + * struct drm_panthor_vm_bind_op - VM bind operation + */ +struct drm_panthor_vm_bind_op { + /** @flags: Combination of drm_panthor_vm_bind_op_flags flags. */ + __u32 flags; + + /** + * @bo_handle: Handle of the buffer object to map. + * MBZ for unmap or sync-only operations. + */ + __u32 bo_handle; + + /** + * @bo_offset: Buffer object offset. + * MBZ for unmap or sync-only operations. + */ + __u64 bo_offset; + + /** + * @va: Virtual address to map/unmap. + * MBZ for sync-only operations. + */ + __u64 va; + + /** + * @size: Size to map/unmap. + * MBZ for sync-only operations. + */ + __u64 size; + + /** + * @syncs: Array of struct drm_panthor_sync_op synchronization + * operations. + * + * This array must be empty if %DRM_PANTHOR_VM_BIND_ASYNC is not set on + * the drm_panthor_vm_bind object containing this VM bind operation. + * + * This array shall not be empty for sync-only operations. + */ + struct drm_panthor_obj_array syncs; + +}; + +/** + * enum drm_panthor_vm_bind_flags - VM bind flags + */ +enum drm_panthor_vm_bind_flags { + /** + * @DRM_PANTHOR_VM_BIND_ASYNC: VM bind operations are queued to the VM + * queue instead of being executed synchronously. + */ + DRM_PANTHOR_VM_BIND_ASYNC = 1 << 0, +}; + +/** + * struct drm_panthor_vm_bind - Arguments passed to DRM_IOCTL_PANTHOR_VM_BIND + */ +struct drm_panthor_vm_bind { + /** @vm_id: VM targeted by the bind request. */ + __u32 vm_id; + + /** @flags: Combination of drm_panthor_vm_bind_flags flags. */ + __u32 flags; + + /** @ops: Array of struct drm_panthor_vm_bind_op bind operations. */ + struct drm_panthor_obj_array ops; +}; + +/** + * enum drm_panthor_vm_state - VM states. + */ +enum drm_panthor_vm_state { + /** + * @DRM_PANTHOR_VM_STATE_USABLE: VM is usable. + * + * New VM operations will be accepted on this VM. + */ + DRM_PANTHOR_VM_STATE_USABLE, + + /** + * @DRM_PANTHOR_VM_STATE_UNUSABLE: VM is unusable. + * + * Something put the VM in an unusable state (like an asynchronous + * VM_BIND request failing for any reason). + * + * Once the VM is in this state, all new MAP operations will be + * rejected, and any GPU job targeting this VM will fail. + * UNMAP operations are still accepted. + * + * The only way to recover from an unusable VM is to create a new + * VM, and destroy the old one. + */ + DRM_PANTHOR_VM_STATE_UNUSABLE, +}; + +/** + * struct drm_panthor_vm_get_state - Get VM state. + */ +struct drm_panthor_vm_get_state { + /** @vm_id: VM targeted by the get_state request. */ + __u32 vm_id; + + /** + * @state: state returned by the driver. + * + * Must be one of the enum drm_panthor_vm_state values. + */ + __u32 state; +}; + +/** + * enum drm_panthor_bo_flags - Buffer object flags, passed at creation time. + */ +enum drm_panthor_bo_flags { + /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ + DRM_PANTHOR_BO_NO_MMAP = (1 << 0), +}; + +/** + * struct drm_panthor_bo_create - Arguments passed to DRM_IOCTL_PANTHOR_BO_CREATE. + */ +struct drm_panthor_bo_create { + /** + * @size: Requested size for the object + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + + /** + * @flags: Flags. Must be a combination of drm_panthor_bo_flags flags. + */ + __u32 flags; + + /** + * @exclusive_vm_id: Exclusive VM this buffer object will be mapped to. + * + * If not zero, the field must refer to a valid VM ID, and implies that: + * - the buffer object will only ever be bound to that VM + * - cannot be exported as a PRIME fd + */ + __u32 exclusive_vm_id; + + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_bo_mmap_offset - Arguments passed to DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET. + */ +struct drm_panthor_bo_mmap_offset { + /** @handle: Handle of the object we want an mmap offset for. */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** @offset: The fake offset to use for subsequent mmap calls. */ + __u64 offset; +}; + +/** + * struct drm_panthor_queue_create - Queue creation arguments. + */ +struct drm_panthor_queue_create { + /** + * @priority: Defines the priority of queues inside a group. Goes from 0 to 15, + * 15 being the highest priority. + */ + __u8 priority; + + /** @pad: Padding fields, MBZ. */ + __u8 pad[3]; + + /** @ringbuf_size: Size of the ring buffer to allocate to this queue. */ + __u32 ringbuf_size; +}; + +/** + * enum drm_panthor_group_priority - Scheduling group priority + */ +enum drm_panthor_group_priority { + /** @PANTHOR_GROUP_PRIORITY_LOW: Low priority group. */ + PANTHOR_GROUP_PRIORITY_LOW = 0, + + /** @PANTHOR_GROUP_PRIORITY_MEDIUM: Medium priority group. */ + PANTHOR_GROUP_PRIORITY_MEDIUM, + + /** + * @PANTHOR_GROUP_PRIORITY_HIGH: High priority group. + * + * Requires CAP_SYS_NICE or DRM_MASTER. + */ + PANTHOR_GROUP_PRIORITY_HIGH, + + /** + * @PANTHOR_GROUP_PRIORITY_REALTIME: Realtime priority group. + * + * Requires CAP_SYS_NICE or DRM_MASTER. + */ + PANTHOR_GROUP_PRIORITY_REALTIME, +}; + +/** + * struct drm_panthor_group_create - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_CREATE + */ +struct drm_panthor_group_create { + /** @queues: Array of drm_panthor_queue_create elements. */ + struct drm_panthor_obj_array queues; + + /** + * @max_compute_cores: Maximum number of cores that can be used by compute + * jobs across CS queues bound to this group. + * + * Must be less or equal to the number of bits set in @compute_core_mask. + */ + __u8 max_compute_cores; + + /** + * @max_fragment_cores: Maximum number of cores that can be used by fragment + * jobs across CS queues bound to this group. + * + * Must be less or equal to the number of bits set in @fragment_core_mask. + */ + __u8 max_fragment_cores; + + /** + * @max_tiler_cores: Maximum number of tilers that can be used by tiler jobs + * across CS queues bound to this group. + * + * Must be less or equal to the number of bits set in @tiler_core_mask. + */ + __u8 max_tiler_cores; + + /** @priority: Group priority (see enum drm_panthor_group_priority). */ + __u8 priority; + + /** @pad: Padding field, MBZ. */ + __u32 pad; + + /** + * @compute_core_mask: Mask encoding cores that can be used for compute jobs. + * + * This field must have at least @max_compute_cores bits set. + * + * The bits set here should also be set in drm_panthor_gpu_info::shader_present. + */ + __u64 compute_core_mask; + + /** + * @fragment_core_mask: Mask encoding cores that can be used for fragment jobs. + * + * This field must have at least @max_fragment_cores bits set. + * + * The bits set here should also be set in drm_panthor_gpu_info::shader_present. + */ + __u64 fragment_core_mask; + + /** + * @tiler_core_mask: Mask encoding cores that can be used for tiler jobs. + * + * This field must have at least @max_tiler_cores bits set. + * + * The bits set here should also be set in drm_panthor_gpu_info::tiler_present. + */ + __u64 tiler_core_mask; + + /** + * @vm_id: VM ID to bind this group to. + * + * All submission to queues bound to this group will use this VM. + */ + __u32 vm_id; + + /** + * @group_handle: Returned group handle. Passed back when submitting jobs or + * destroying a group. + */ + __u32 group_handle; +}; + +/** + * struct drm_panthor_group_destroy - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_DESTROY + */ +struct drm_panthor_group_destroy { + /** @group_handle: Group to destroy */ + __u32 group_handle; + + /** @pad: Padding field, MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_queue_submit - Job submission arguments. + * + * This is describing the userspace command stream to call from the kernel + * command stream ring-buffer. Queue submission is always part of a group + * submission, taking one or more jobs to submit to the underlying queues. + */ +struct drm_panthor_queue_submit { + /** @queue_index: Index of the queue inside a group. */ + __u32 queue_index; + + /** + * @stream_size: Size of the command stream to execute. + * + * Must be 64-bit/8-byte aligned (the size of a CS instruction) + * + * Can be zero if stream_addr is zero too. + * + * When the stream size is zero, the queue submit serves as a + * synchronization point. + */ + __u32 stream_size; + + /** + * @stream_addr: GPU address of the command stream to execute. + * + * Must be aligned on 64-byte. + * + * Can be zero is stream_size is zero too. + */ + __u64 stream_addr; + + /** + * @latest_flush: FLUSH_ID read at the time the stream was built. + * + * This allows cache flush elimination for the automatic + * flush+invalidate(all) done at submission time, which is needed to + * ensure the GPU doesn't get garbage when reading the indirect command + * stream buffers. If you want the cache flush to happen + * unconditionally, pass a zero here. + * + * Ignored when stream_size is zero. + */ + __u32 latest_flush; + + /** @pad: MBZ. */ + __u32 pad; + + /** @syncs: Array of struct drm_panthor_sync_op sync operations. */ + struct drm_panthor_obj_array syncs; +}; + +/** + * struct drm_panthor_group_submit - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_SUBMIT + */ +struct drm_panthor_group_submit { + /** @group_handle: Handle of the group to queue jobs to. */ + __u32 group_handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** @queue_submits: Array of drm_panthor_queue_submit objects. */ + struct drm_panthor_obj_array queue_submits; +}; + +/** + * enum drm_panthor_group_state_flags - Group state flags + */ +enum drm_panthor_group_state_flags { + /** + * @DRM_PANTHOR_GROUP_STATE_TIMEDOUT: Group had unfinished jobs. + * + * When a group ends up with this flag set, no jobs can be submitted to its queues. + */ + DRM_PANTHOR_GROUP_STATE_TIMEDOUT = 1 << 0, + + /** + * @DRM_PANTHOR_GROUP_STATE_FATAL_FAULT: Group had fatal faults. + * + * When a group ends up with this flag set, no jobs can be submitted to its queues. + */ + DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1, + + /** + * @DRM_PANTHOR_GROUP_STATE_INNOCENT: Group was killed during a reset caused by other + * groups. + * + * This flag can only be set if DRM_PANTHOR_GROUP_STATE_TIMEDOUT is set and + * DRM_PANTHOR_GROUP_STATE_FATAL_FAULT is not. + */ + DRM_PANTHOR_GROUP_STATE_INNOCENT = 1 << 2, +}; + +/** + * struct drm_panthor_group_get_state - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_GET_STATE + * + * Used to query the state of a group and decide whether a new group should be created to + * replace it. + */ +struct drm_panthor_group_get_state { + /** @group_handle: Handle of the group to query state on */ + __u32 group_handle; + + /** + * @state: Combination of DRM_PANTHOR_GROUP_STATE_* flags encoding the + * group state. + */ + __u32 state; + + /** @fatal_queues: Bitmask of queues that faced fatal faults. */ + __u32 fatal_queues; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_panthor_tiler_heap_create - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE + */ +struct drm_panthor_tiler_heap_create { + /** @vm_id: VM ID the tiler heap should be mapped to */ + __u32 vm_id; + + /** @initial_chunk_count: Initial number of chunks to allocate. Must be at least one. */ + __u32 initial_chunk_count; + + /** + * @chunk_size: Chunk size. + * + * Must be page-aligned and lie in the [128k:8M] range. + */ + __u32 chunk_size; + + /** + * @max_chunks: Maximum number of chunks that can be allocated. + * + * Must be at least @initial_chunk_count. + */ + __u32 max_chunks; + + /** + * @target_in_flight: Maximum number of in-flight render passes. + * + * If the heap has more than tiler jobs in-flight, the FW will wait for render + * passes to finish before queuing new tiler jobs. + */ + __u32 target_in_flight; + + /** @handle: Returned heap handle. Passed back to DESTROY_TILER_HEAP. */ + __u32 handle; + + /** @tiler_heap_ctx_gpu_va: Returned heap GPU virtual address returned */ + __u64 tiler_heap_ctx_gpu_va; + + /** + * @first_heap_chunk_gpu_va: First heap chunk. + * + * The tiler heap is formed of heap chunks forming a single-link list. This + * is the first element in the list. + */ + __u64 first_heap_chunk_gpu_va; +}; + +/** + * struct drm_panthor_tiler_heap_destroy - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY + */ +struct drm_panthor_tiler_heap_destroy { + /** + * @handle: Handle of the tiler heap to destroy. + * + * Must be a valid heap handle returned by DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE. + */ + __u32 handle; + + /** @pad: Padding field, MBZ. */ + __u32 pad; +}; + +/** + * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number + * @__access: Access type. Must be R, W or RW. + * @__id: One of the DRM_PANTHOR_xxx id. + * @__type: Suffix of the type being passed to the IOCTL. + * + * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx + * values instead. + * + * Return: An IOCTL number to be passed to ioctl() from userspace. + */ +#define DRM_IOCTL_PANTHOR(__access, __id, __type) \ + DRM_IO ## __access(DRM_COMMAND_BASE + DRM_PANTHOR_ ## __id, \ + struct drm_panthor_ ## __type) + +enum { + DRM_IOCTL_PANTHOR_DEV_QUERY = + DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query), + DRM_IOCTL_PANTHOR_VM_CREATE = + DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create), + DRM_IOCTL_PANTHOR_VM_DESTROY = + DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy), + DRM_IOCTL_PANTHOR_VM_BIND = + DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind), + DRM_IOCTL_PANTHOR_VM_GET_STATE = + DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state), + DRM_IOCTL_PANTHOR_BO_CREATE = + DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create), + DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET = + DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset), + DRM_IOCTL_PANTHOR_GROUP_CREATE = + DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create), + DRM_IOCTL_PANTHOR_GROUP_DESTROY = + DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy), + DRM_IOCTL_PANTHOR_GROUP_SUBMIT = + DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit), + DRM_IOCTL_PANTHOR_GROUP_GET_STATE = + DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state), + DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE = + DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create), + DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY = + DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy), +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _PANTHOR_DRM_H_ */ diff --git a/include/uapi/drm/qaic_accel.h b/include/uapi/drm/qaic_accel.h index d3ca876a08e9..c92d0309d583 100644 --- a/include/uapi/drm/qaic_accel.h +++ b/include/uapi/drm/qaic_accel.h @@ -64,7 +64,7 @@ struct qaic_manage_trans_hdr { /** * struct qaic_manage_trans_passthrough - Defines a passthrough transaction. * @hdr: In. Header to identify this transaction. - * @data: In. Payload of this ransaction. Opaque to the driver. Userspace must + * @data: In. Payload of this transaction. Opaque to the driver. Userspace must * encode in little endian and align/pad to 64-bit. */ struct qaic_manage_trans_passthrough { diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index dce1835eced4..dbbc404d2b3d 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -42,6 +42,8 @@ extern "C" { #define DRM_V3D_PERFMON_DESTROY 0x09 #define DRM_V3D_PERFMON_GET_VALUES 0x0a #define DRM_V3D_SUBMIT_CPU 0x0b +#define DRM_V3D_PERFMON_GET_COUNTER 0x0c +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -58,6 +60,10 @@ extern "C" { #define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \ struct drm_v3d_perfmon_get_values) #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) +#define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ + struct drm_v3d_perfmon_get_counter) +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ + struct drm_v3d_perfmon_set_global) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 #define DRM_V3D_SUBMIT_EXTENSION 0x02 @@ -286,6 +292,8 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_PERFMON, DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE, + DRM_V3D_PARAM_MAX_PERF_COUNTERS, + DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES, }; struct drm_v3d_get_param { @@ -599,6 +607,16 @@ struct drm_v3d_submit_cpu { __u64 extensions; }; +/* The performance counters index represented by this enum are deprecated and + * must no longer be used. These counters are only valid for V3D 4.2. + * + * In order to check for performance counter information, + * use DRM_IOCTL_V3D_PERFMON_GET_COUNTER. + * + * Don't use V3D_PERFCNT_NUM to retrieve the maximum number of performance + * counters. You should use DRM_IOCTL_V3D_GET_PARAM with the following + * parameter: DRM_V3D_PARAM_MAX_PERF_COUNTERS. + */ enum { V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS, V3D_PERFCNT_FEP_VALID_PRIMS, @@ -717,6 +735,55 @@ struct drm_v3d_perfmon_get_values { __u64 values_ptr; }; +#define DRM_V3D_PERFCNT_MAX_NAME 64 +#define DRM_V3D_PERFCNT_MAX_CATEGORY 32 +#define DRM_V3D_PERFCNT_MAX_DESCRIPTION 256 + +/** + * struct drm_v3d_perfmon_get_counter - ioctl to get the description of a + * performance counter + * + * As userspace needs to retrieve information about the performance counters + * available, this IOCTL allows users to get information about a performance + * counter (name, category and description). + */ +struct drm_v3d_perfmon_get_counter { + /* + * Counter ID + * + * Must be smaller than the maximum number of performance counters, which + * can be retrieve through DRM_V3D_PARAM_MAX_PERF_COUNTERS. + */ + __u8 counter; + + /* Name of the counter */ + __u8 name[DRM_V3D_PERFCNT_MAX_NAME]; + + /* Category of the counter */ + __u8 category[DRM_V3D_PERFCNT_MAX_CATEGORY]; + + /* Description of the counter */ + __u8 description[DRM_V3D_PERFCNT_MAX_DESCRIPTION]; + + /* mbz */ + __u8 reserved[7]; +}; + +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 + +/** + * struct drm_v3d_perfmon_set_global - ioctl to define a global performance + * monitor + * + * The global performance monitor will be used for all jobs. If a global + * performance monitor is defined, jobs with a self-defined performance + * monitor won't be allowed. + */ +struct drm_v3d_perfmon_set_global { + __u32 flags; + __u32 id; +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 538a3ac95c54..616916985e3f 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,6 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE + * - &DRM_IOCTL_XE_OBSERVATION */ /* @@ -100,6 +101,8 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_OBSERVATION 0x0b + /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -113,6 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) /** * DOC: Xe IOCTL Extensions @@ -389,6 +393,10 @@ struct drm_xe_query_mem_regions { * * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM - Flag is set if the device * has usable VRAM + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY - Flag is set if the device + * has low latency hint support + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the + * device has CPU address mirroring support * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment * required by this device, typically SZ_4K or SZ_64K * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address @@ -405,6 +413,8 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 #define DRM_XE_QUERY_CONFIG_FLAGS 1 #define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -459,8 +469,16 @@ struct drm_xe_gt { * by struct drm_xe_query_mem_regions' mem_class. */ __u64 far_mem_regions; + /** @ip_ver_major: Graphics/media IP major version on GMD_ID platforms */ + __u16 ip_ver_major; + /** @ip_ver_minor: Graphics/media IP minor version on GMD_ID platforms */ + __u16 ip_ver_minor; + /** @ip_ver_rev: Graphics/media IP revision version on GMD_ID platforms */ + __u16 ip_ver_rev; + /** @pad2: MBZ */ + __u16 pad2; /** @reserved: Reserved */ - __u64 reserved[8]; + __u64 reserved[7]; }; /** @@ -500,19 +518,31 @@ struct drm_xe_query_gt_list { * containing the following in mask: * ``DSS_COMPUTE ff ff ff ff 00 00 00 00`` * means 32 DSS are available for compute. + * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks. This type + * may be omitted if the driver is unable to query the mask from the + * hardware. * - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU) * available per Dual Sub Slices (DSS). For example a query response * containing the following in mask: * ``EU_PER_DSS ff ff 00 00 00 00 00 00`` - * means each DSS has 16 EU. + * means each DSS has 16 SIMD8 EUs. This type may be omitted if device + * doesn't have SIMD8 EUs. + * - %DRM_XE_TOPO_SIMD16_EU_PER_DSS - To query the mask of SIMD16 Execution + * Units (EU) available per Dual Sub Slices (DSS). For example a query + * response containing the following in mask: + * ``SIMD16_EU_PER_DSS ff ff 00 00 00 00 00 00`` + * means each DSS has 16 SIMD16 EUs. This type may be omitted if device + * doesn't have SIMD16 EUs. */ struct drm_xe_query_topology_mask { /** @gt_id: GT ID the mask is associated with */ __u16 gt_id; -#define DRM_XE_TOPO_DSS_GEOMETRY (1 << 0) -#define DRM_XE_TOPO_DSS_COMPUTE (1 << 1) -#define DRM_XE_TOPO_EU_PER_DSS (1 << 2) +#define DRM_XE_TOPO_DSS_GEOMETRY 1 +#define DRM_XE_TOPO_DSS_COMPUTE 2 +#define DRM_XE_TOPO_L3_BANK 3 +#define DRM_XE_TOPO_EU_PER_DSS 4 +#define DRM_XE_TOPO_SIMD16_EU_PER_DSS 5 /** @type: type of mask */ __u16 type; @@ -583,6 +613,7 @@ struct drm_xe_query_engine_cycles { struct drm_xe_query_uc_fw_version { /** @uc_type: The micro-controller type to query firmware version */ #define XE_QUERY_UC_TYPE_GUC_SUBMISSION 0 +#define XE_QUERY_UC_TYPE_HUC 1 __u16 uc_type; /** @pad: MBZ */ @@ -605,6 +636,39 @@ struct drm_xe_query_uc_fw_version { }; /** + * struct drm_xe_query_pxp_status - query if PXP is ready + * + * If PXP is enabled and no fatal error has occurred, the status will be set to + * one of the following values: + * 0: PXP init still in progress + * 1: PXP init complete + * + * If PXP is not enabled or something has gone wrong, the query will be failed + * with one of the following error codes: + * -ENODEV: PXP not supported or disabled; + * -EIO: fatal error occurred during init, so PXP will never be enabled; + * -EINVAL: incorrect value provided as part of the query; + * -EFAULT: error copying the memory between kernel and userspace. + * + * The status can only be 0 in the first few seconds after driver load. If + * everything works as expected, the status will transition to init complete in + * less than 1 second, while in case of errors the driver might take longer to + * start returning an error code, but it should still take less than 10 seconds. + * + * The supported session type bitmask is based on the values in + * enum drm_xe_pxp_session_type. TYPE_NONE is always supported and therefore + * is not reported in the bitmask. + * + */ +struct drm_xe_query_pxp_status { + /** @status: current PXP status */ + __u32 status; + + /** @supported_session_types: bitmask of supported PXP session types */ + __u32 supported_session_types; +}; + +/** * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main * structure to query device information * @@ -623,6 +687,7 @@ struct drm_xe_query_uc_fw_version { * attributes. * - %DRM_XE_DEVICE_QUERY_GT_TOPOLOGY * - %DRM_XE_DEVICE_QUERY_ENGINE_CYCLES + * - %DRM_XE_DEVICE_QUERY_PXP_STATUS * * If size is set to 0, the driver fills it with the required size for * the requested type of data to query. If size is equal to the required @@ -674,6 +739,9 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 +#define DRM_XE_DEVICE_QUERY_OA_UNITS 8 +#define DRM_XE_DEVICE_QUERY_PXP_STATUS 9 +#define DRM_XE_DEVICE_QUERY_EU_STALL 10 /** @query: The type of data to query */ __u32 query; @@ -717,8 +785,23 @@ struct drm_xe_device_query { * - %DRM_XE_GEM_CPU_CACHING_WC - Allocate the pages as write-combined. This * is uncached. Scanout surfaces should likely use this. All objects * that can be placed in VRAM must use this. + * + * This ioctl supports setting the following properties via the + * %DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY extension, which uses the + * generic @drm_xe_ext_set_property struct: + * + * - %DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE - set the type of PXP session + * this object will be used with. Valid values are listed in enum + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so + * there is no need to explicitly set that. Objects used with session of type + * %DRM_XE_PXP_TYPE_HWDRM will be marked as invalid if a PXP invalidation + * event occurs after their creation. Attempting to flip an invalid object + * will cause a black frame to be displayed instead. Submissions with invalid + * objects mapped in the VM will be rejected. */ struct drm_xe_gem_create { +#define DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY 0 +#define DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE 0 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -767,7 +850,13 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CPU_CACHING_WC 2 /** * @cpu_caching: The CPU caching mode to select for this object. If - * mmaping the object the mode selected here will also be used. + * mmaping the object the mode selected here will also be used. The + * exception is when mapping system memory (including data evicted + * to system) on discrete GPUs. The caching mode selected will + * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency + * between GPU- and CPU is guaranteed. The caching mode of + * existing CPU-mappings will be updated transparently to + * user-space clients. */ __u16 cpu_caching; /** @pad: MBZ */ @@ -779,6 +868,32 @@ struct drm_xe_gem_create { /** * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET + * + * The @flags can be: + * - %DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER - For user to query special offset + * for use in mmap ioctl. Writing to the returned mmap address will generate a + * PCI memory barrier with low overhead (avoiding IOCTL call as well as writing + * to VRAM which would also add overhead), acting like an MI_MEM_FENCE + * instruction. + * + * Note: The mmap size can be at most 4K, due to HW limitations. As a result + * this interface is only supported on CPU architectures that support 4K page + * size. The mmap_offset ioctl will detect this and gracefully return an + * error, where userspace is expected to have a different fallback method for + * triggering a barrier. + * + * Roughly the usage would be as follows: + * + * .. code-block:: C + * + * struct drm_xe_gem_mmap_offset mmo = { + * .handle = 0, // must be set to 0 + * .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER, + * }; + * + * err = ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo); + * map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo.offset); + * map[i] = 0xdeadbeaf; // issue barrier */ struct drm_xe_gem_mmap_offset { /** @extensions: Pointer to the first extension struct, if any */ @@ -787,7 +902,8 @@ struct drm_xe_gem_mmap_offset { /** @handle: Handle for the object being mapped. */ __u32 handle; - /** @flags: Must be zero */ +#define DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER (1 << 0) + /** @flags: Flags */ __u32 flags; /** @offset: The fake offset to use for subsequent mmap call */ @@ -862,12 +978,27 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: + * - %DRM_XE_VM_BIND_FLAG_READONLY - Setup the page tables as read-only + * to ensure write protection + * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - On a faulting VM, do the + * MAP operation immediately rather than deferring the MAP to the page + * fault handler. This is implied on a non-faulting VM as there is no + * fault handler to defer to. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags * will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. This flag is intended to * implement VK sparse bindings. + * - %DRM_XE_VM_BIND_FLAG_CHECK_PXP - If the object is encrypted via PXP, + * reject the binding if the encryption key is no longer valid. This + * flag has no effect on BOs that are not marked as using PXP. + * - %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR - When the CPU address mirror flag is + * set, no mappings are created rather the range is reserved for CPU address + * mirroring which will be populated on GPU page faults or prefetches. Only + * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address + * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO + * handle MBZ, and the BO offset MBZ. */ struct drm_xe_vm_bind_op { /** @extensions: Pointer to the first extension struct, if any */ @@ -920,7 +1051,9 @@ struct drm_xe_vm_bind_op { * on the @pat_index. For such mappings there is no actual memory being * mapped (the address in the PTE is invalid), so the various PAT memory * attributes likely do not apply. Simply leaving as zero is one - * option (still a valid pat_index). + * option (still a valid pat_index). Same applies to + * DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR bindings as for such mapping + * there is no actual memory being mapped. */ __u16 pat_index; @@ -936,6 +1069,14 @@ struct drm_xe_vm_bind_op { /** @userptr: user pointer to bind on */ __u64 userptr; + + /** + * @cpu_addr_mirror_offset: Offset from GPU @addr to create + * CPU address mirror mappings. MBZ with current level of + * support (e.g. 1 to 1 mapping between GPU and CPU mappings + * only supported). + */ + __s64 cpu_addr_mirror_offset; }; /** @@ -954,8 +1095,12 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; +#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) +#define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) +#define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) /** @flags: Bind flags */ __u32 flags; @@ -1047,6 +1192,24 @@ struct drm_xe_vm_bind { /** * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE * + * This ioctl supports setting the following properties via the + * %DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY extension, which uses the + * generic @drm_xe_ext_set_property struct: + * + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY - set the queue priority. + * CAP_SYS_NICE is required to set a value above normal. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE - set the queue timeslice + * duration in microseconds. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE - set the type of PXP session + * this queue will be used with. Valid values are listed in enum + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so + * there is no need to explicitly set that. When a queue of type + * %DRM_XE_PXP_TYPE_HWDRM is created, the PXP default HWDRM session + * (%XE_PXP_HWDRM_DEFAULT_SESSION) will be started, if isn't already running. + * Given that going into a power-saving state kills PXP HWDRM sessions, + * runtime PM will be blocked while queues of this type are alive. + * All PXP queues will be killed if a PXP invalidation event occurs. + * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class * &DRM_XE_ENGINE_CLASS_RENDER. @@ -1065,12 +1228,27 @@ struct drm_xe_vm_bind { * }; * ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create); * + * Allow users to provide a hint to kernel for cases demanding low latency + * profile. Please note it will have impact on power consumption. User can + * indicate low latency hint with flag while creating exec queue as + * mentioned below, + * + * struct drm_xe_exec_queue_create exec_queue_create = { + * .flags = DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT, + * .extensions = 0, + * .vm_id = vm, + * .num_bb_per_exec = 1, + * .num_eng_per_bb = 1, + * .instances = to_user_pointer(&instance), + * }; + * ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create); + * */ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 - +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -1083,7 +1261,8 @@ struct drm_xe_exec_queue_create { /** @vm_id: VM to use for this exec queue */ __u32 vm_id; - /** @flags: MBZ */ +#define DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT (1 << 0) + /** @flags: flags to use for this exec queue */ __u32 flags; /** @exec_queue_id: Returned exec queue ID */ @@ -1351,6 +1530,437 @@ struct drm_xe_wait_user_fence { __u64 reserved[2]; }; +/** + * enum drm_xe_observation_type - Observation stream types + */ +enum drm_xe_observation_type { + /** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */ + DRM_XE_OBSERVATION_TYPE_OA, + /** @DRM_XE_OBSERVATION_TYPE_EU_STALL: EU stall sampling observation stream type */ + DRM_XE_OBSERVATION_TYPE_EU_STALL, +}; + +/** + * enum drm_xe_observation_op - Observation stream ops + */ +enum drm_xe_observation_op { + /** @DRM_XE_OBSERVATION_OP_STREAM_OPEN: Open an observation stream */ + DRM_XE_OBSERVATION_OP_STREAM_OPEN, + + /** @DRM_XE_OBSERVATION_OP_ADD_CONFIG: Add observation stream config */ + DRM_XE_OBSERVATION_OP_ADD_CONFIG, + + /** @DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: Remove observation stream config */ + DRM_XE_OBSERVATION_OP_REMOVE_CONFIG, +}; + +/** + * struct drm_xe_observation_param - Input of &DRM_XE_OBSERVATION + * + * The observation layer enables multiplexing observation streams of + * multiple types. The actual params for a particular stream operation are + * supplied via the @param pointer (use __copy_from_user to get these + * params). + */ +struct drm_xe_observation_param { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @observation_type: observation stream type, of enum @drm_xe_observation_type */ + __u64 observation_type; + /** @observation_op: observation stream op, of enum @drm_xe_observation_op */ + __u64 observation_op; + /** @param: Pointer to actual stream params */ + __u64 param; +}; + +/** + * enum drm_xe_observation_ioctls - Observation stream fd ioctl's + * + * Information exchanged between userspace and kernel for observation fd + * ioctl's is stream type specific + */ +enum drm_xe_observation_ioctls { + /** @DRM_XE_OBSERVATION_IOCTL_ENABLE: Enable data capture for an observation stream */ + DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0), + + /** @DRM_XE_OBSERVATION_IOCTL_DISABLE: Disable data capture for a observation stream */ + DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1), + + /** @DRM_XE_OBSERVATION_IOCTL_CONFIG: Change observation stream configuration */ + DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2), + + /** @DRM_XE_OBSERVATION_IOCTL_STATUS: Return observation stream status */ + DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3), + + /** @DRM_XE_OBSERVATION_IOCTL_INFO: Return observation stream info */ + DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4), +}; + +/** + * enum drm_xe_oa_unit_type - OA unit types + */ +enum drm_xe_oa_unit_type { + /** + * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered + * sub-types of OAG. For OAR/OAC, use OAG. + */ + DRM_XE_OA_UNIT_TYPE_OAG, + + /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM, +}; + +/** + * struct drm_xe_oa_unit - describe OA unit + */ +struct drm_xe_oa_unit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_unit_id: OA unit ID */ + __u32 oa_unit_id; + + /** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */ + __u32 oa_unit_type; + + /** @capabilities: OA capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_OA_CAPS_BASE (1 << 0) +#define DRM_XE_OA_CAPS_SYNCS (1 << 1) +#define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) +#define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) + + /** @oa_timestamp_freq: OA timestamp freq */ + __u64 oa_timestamp_freq; + + /** @reserved: MBZ */ + __u64 reserved[4]; + + /** @num_engines: number of engines in @eci array */ + __u64 num_engines; + + /** @eci: engines attached to this OA unit */ + struct drm_xe_engine_class_instance eci[]; +}; + +/** + * struct drm_xe_query_oa_units - describe OA units + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct + * drm_xe_query_oa_units in .data. + * + * OA unit properties for all OA units can be accessed using a code block + * such as the one below: + * + * .. code-block:: C + * + * struct drm_xe_query_oa_units *qoa; + * struct drm_xe_oa_unit *oau; + * u8 *poau; + * + * // malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then: + * poau = (u8 *)&qoa->oa_units[0]; + * for (int i = 0; i < qoa->num_oa_units; i++) { + * oau = (struct drm_xe_oa_unit *)poau; + * // Access 'struct drm_xe_oa_unit' fields here + * poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]); + * } + */ +struct drm_xe_query_oa_units { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @num_oa_units: number of OA units returned in oau[] */ + __u32 num_oa_units; + /** @pad: MBZ */ + __u32 pad; + /** + * @oa_units: struct @drm_xe_oa_unit array returned for this device. + * Written below as a u64 array to avoid problems with nested flexible + * arrays with some compilers + */ + __u64 oa_units[]; +}; + +/** + * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec + * 52198/60942 + */ +enum drm_xe_oa_format_type { + /** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */ + DRM_XE_OA_FMT_TYPE_OAG, + /** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */ + DRM_XE_OA_FMT_TYPE_OAR, + /** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */ + DRM_XE_OA_FMT_TYPE_OAM, + /** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */ + DRM_XE_OA_FMT_TYPE_OAC, + /** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format */ + DRM_XE_OA_FMT_TYPE_OAM_MPEC, + /** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */ + DRM_XE_OA_FMT_TYPE_PEC, +}; + +/** + * enum drm_xe_oa_property_id - OA stream property id's + * + * Stream params are specified as a chain of @drm_xe_ext_set_property + * struct's, with @property values from enum @drm_xe_oa_property_id and + * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. + * @param field in struct @drm_xe_observation_param points to the first + * @drm_xe_ext_set_property struct. + * + * Exactly the same mechanism is also used for stream reconfiguration using the + * @DRM_XE_OBSERVATION_IOCTL_CONFIG observation stream fd ioctl, though only a + * subset of properties below can be specified for stream reconfiguration. + */ +enum drm_xe_oa_property_id { +#define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open + * the OA stream, see @oa_unit_id in 'struct + * drm_xe_query_oa_units'. Defaults to 0 if not provided. + */ + DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1, + + /** + * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw + * OA unit reports or stream samples in a global buffer attached to an + * OA unit. + */ + DRM_XE_OA_PROPERTY_SAMPLE_OA, + + /** + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA + * reports, previously added via @DRM_XE_OBSERVATION_OP_ADD_CONFIG. + */ + DRM_XE_OA_PROPERTY_OA_METRIC_SET, + + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: OA counter report format */ + DRM_XE_OA_PROPERTY_OA_FORMAT, + /* + * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, + * in terms of the following quantities: a. enum @drm_xe_oa_format_type + * b. Counter select c. Counter size and d. BC report. Also refer to the + * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. + */ +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24) + + /** + * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit + * sampling with sampling frequency proportional to 2^(period_exponent + 1) + */ + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, + + /** + * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA + * stream in a DISABLED state (see @DRM_XE_OBSERVATION_IOCTL_ENABLE). + */ + DRM_XE_OA_PROPERTY_OA_DISABLED, + + /** + * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific + * @exec_queue_id. OA queries can be executed on this exec queue. + */ + DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, + + /** + * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to + * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. + */ + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** + * @DRM_XE_OA_PROPERTY_NO_PREEMPT: Allow preemption and timeslicing + * to be disabled for the stream exec queue. + */ + DRM_XE_OA_PROPERTY_NO_PREEMPT, + + /** + * @DRM_XE_OA_PROPERTY_NUM_SYNCS: Number of syncs in the sync array + * specified in @DRM_XE_OA_PROPERTY_SYNCS + */ + DRM_XE_OA_PROPERTY_NUM_SYNCS, + + /** + * @DRM_XE_OA_PROPERTY_SYNCS: Pointer to struct @drm_xe_sync array + * with array size specified via @DRM_XE_OA_PROPERTY_NUM_SYNCS. OA + * configuration will wait till input fences signal. Output fences + * will signal after the new OA configuration takes effect. For + * @DRM_XE_SYNC_TYPE_USER_FENCE, @addr is a user pointer, similar + * to the VM bind case. + */ + DRM_XE_OA_PROPERTY_SYNCS, + + /** + * @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Size of OA buffer to be + * allocated by the driver in bytes. Supported sizes are powers of + * 2 from 128 KiB to 128 MiB. When not specified, a 16 MiB OA + * buffer is allocated by default. + */ + DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE, + + /** + * @DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS: Number of reports to wait + * for before unblocking poll or read + */ + DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS, +}; + +/** + * struct drm_xe_oa_config - OA metric configuration + * + * Multiple OA configs can be added using @DRM_XE_OBSERVATION_OP_ADD_CONFIG. A + * particular config can be specified when opening an OA stream using + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. + */ +struct drm_xe_oa_config { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */ + char uuid[36]; + + /** @n_regs: Number of regs in @regs_ptr */ + __u32 n_regs; + + /** + * @regs_ptr: Pointer to (register address, value) pairs for OA config + * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs). + */ + __u64 regs_ptr; +}; + +/** + * struct drm_xe_oa_stream_status - OA stream status returned from + * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl. Userspace can + * call the ioctl to query stream status in response to EIO errno from + * observation fd read(). + */ +struct drm_xe_oa_stream_status { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_status: OA stream status (see Bspec 46717/61226) */ + __u64 oa_status; +#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) +#define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + +/** + * struct drm_xe_oa_stream_info - OA stream info returned from + * @DRM_XE_OBSERVATION_IOCTL_INFO observation stream fd ioctl + */ +struct drm_xe_oa_stream_info { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_buf_size: OA buffer size */ + __u64 oa_buf_size; + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + +/** + * enum drm_xe_pxp_session_type - Supported PXP session types. + * + * We currently only support HWDRM sessions, which are used for protected + * content that ends up being displayed, but the HW supports multiple types, so + * we might extend support in the future. + */ +enum drm_xe_pxp_session_type { + /** @DRM_XE_PXP_TYPE_NONE: PXP not used */ + DRM_XE_PXP_TYPE_NONE = 0, + /** + * @DRM_XE_PXP_TYPE_HWDRM: HWDRM sessions are used for content that ends + * up on the display. + */ + DRM_XE_PXP_TYPE_HWDRM = 1, +}; + +/* ID of the protected content session managed by Xe when PXP is active */ +#define DRM_XE_PXP_HWDRM_DEFAULT_SESSION 0xf + +/** + * enum drm_xe_eu_stall_property_id - EU stall sampling input property ids. + * + * These properties are passed to the driver at open as a chain of + * @drm_xe_ext_set_property structures with @property set to these + * properties' enums and @value set to the corresponding values of these + * properties. @drm_xe_user_extension base.name should be set to + * @DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY. + * + * With the file descriptor obtained from open, user space must enable + * the EU stall stream fd with @DRM_XE_OBSERVATION_IOCTL_ENABLE before + * calling read(). EIO errno from read() indicates HW dropped data + * due to full buffer. + */ +enum drm_xe_eu_stall_property_id { +#define DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_EU_STALL_PROP_GT_ID: @gt_id of the GT on which + * EU stall data will be captured. + */ + DRM_XE_EU_STALL_PROP_GT_ID = 1, + + /** + * @DRM_XE_EU_STALL_PROP_SAMPLE_RATE: Sampling rate in + * GPU cycles from @sampling_rates in struct @drm_xe_query_eu_stall + */ + DRM_XE_EU_STALL_PROP_SAMPLE_RATE, + + /** + * @DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS: Minimum number of + * EU stall data reports to be present in the kernel buffer + * before unblocking a blocked poll or read. + */ + DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS, +}; + +/** + * struct drm_xe_query_eu_stall - Information about EU stall sampling. + * + * If a query is made with a struct @drm_xe_device_query where .query + * is equal to @DRM_XE_DEVICE_QUERY_EU_STALL, then the reply uses + * struct @drm_xe_query_eu_stall in .data. + */ +struct drm_xe_query_eu_stall { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @capabilities: EU stall capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_EU_STALL_CAPS_BASE (1 << 0) + + /** @record_size: size of each EU stall data record */ + __u64 record_size; + + /** @per_xecore_buf_size: internal per XeCore buffer size */ + __u64 per_xecore_buf_size; + + /** @reserved: Reserved */ + __u64 reserved[5]; + + /** @num_sampling_rates: Number of sampling rates in @sampling_rates array */ + __u64 num_sampling_rates; + + /** + * @sampling_rates: Flexible array of sampling rates + * sorted in the fastest to slowest order. + * Sampling rates are specified in GPU clock cycles. + */ + __u64 sampling_rates[]; +}; + #if defined(__cplusplus) } #endif |