diff options
Diffstat (limited to 'include/uapi/drm/amdgpu_drm.h')
| -rw-r--r-- | include/uapi/drm/amdgpu_drm.h | 817 |
1 files changed, 799 insertions, 18 deletions
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 7b8fa11c2285..f80aa4c9d88f 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -52,6 +52,12 @@ extern "C" { #define DRM_AMDGPU_GEM_USERPTR 0x11 #define DRM_AMDGPU_WAIT_FENCES 0x12 #define DRM_AMDGPU_VM 0x13 +#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 +#define DRM_AMDGPU_SCHED 0x15 +#define DRM_AMDGPU_USERQ 0x16 +#define DRM_AMDGPU_USERQ_SIGNAL 0x17 +#define DRM_AMDGPU_USERQ_WAIT 0x18 +#define DRM_AMDGPU_GEM_LIST_HANDLES 0x19 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -67,13 +73,57 @@ extern "C" { #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) +#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) +#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) +#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) +#define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles) +/** + * DOC: memory domains + * + * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. + * Memory in this pool could be swapped out to disk if there is pressure. + * + * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the + * GPU's virtual address space via gart. Gart memory linearizes non-contiguous + * pages of system memory, allows GPU access system memory in a linearized + * fashion. + * + * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory + * carved out by the BIOS. + * + * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data + * across shader threads. + * + * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the + * execution of all the waves on a device. + * + * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines + * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. + * + * %AMDGPU_GEM_DOMAIN_MMIO_REMAP MMIO remap page (special mapping for HDP flushing). + */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 #define AMDGPU_GEM_DOMAIN_VRAM 0x4 #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 +#define AMDGPU_GEM_DOMAIN_MMIO_REMAP 0x80 +#define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ + AMDGPU_GEM_DOMAIN_GTT | \ + AMDGPU_GEM_DOMAIN_VRAM | \ + AMDGPU_GEM_DOMAIN_GDS | \ + AMDGPU_GEM_DOMAIN_GWS | \ + AMDGPU_GEM_DOMAIN_OA | \ + AMDGPU_GEM_DOMAIN_DOORBELL | \ + AMDGPU_GEM_DOMAIN_MMIO_REMAP) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) @@ -83,10 +133,58 @@ extern "C" { #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) /* Flag that the memory should be in VRAM and cleared */ #define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) -/* Flag that create shadow bo(GTT) while allocating vram bo */ -#define AMDGPU_GEM_CREATE_SHADOW (1 << 4) /* Flag that allocating the BO should use linear VRAM */ #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) +/* Flag that BO is always valid in this VM */ +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitly synchronized */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) +/* Flag that indicates allocating MQD gart on GFX9, where the mtype + * for the second page onward should be set to NC. It should never + * be used by user space applications. + */ +#define AMDGPU_GEM_CREATE_CP_MQD_GFX9 (1 << 8) +/* Flag that BO may contain sensitive data that must be wiped before + * releasing the memory + */ +#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) +/* Flag that BO will be encrypted and that the TMZ bit should be + * set in the PTEs when mapping this buffer via GPUVM or + * accessing it with various hw blocks + */ +#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) +/* Flag that BO will be used only in preemptible context, which does + * not require GTT memory accounting + */ +#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) +/* Flag that BO can be discarded under memory pressure without keeping the + * content. + */ +#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12) +/* Flag that BO is shared coherently between multiple devices or CPU threads. + * May depend on GPU instructions to flush caches to system scope explicitly. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_COHERENT (1 << 13) +/* Flag that BO should not be cached by GPU. Coherent without having to flush + * GPU caches explicitly + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_UNCACHED (1 << 14) +/* Flag that BO should be coherent across devices when using device-level + * atomics. May depend on GPU instructions to flush caches to device scope + * explicitly, promoting them to system scope automatically. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) +/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ +#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) struct drm_amdgpu_gem_create_in { /** the requested memory size */ @@ -152,6 +250,9 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_ALLOC_CTX 1 #define AMDGPU_CTX_OP_FREE_CTX 2 #define AMDGPU_CTX_OP_QUERY_STATE 3 +#define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -162,13 +263,46 @@ union drm_amdgpu_bo_list { /* unknown cause */ #define AMDGPU_CTX_UNKNOWN_RESET 3 +/* indicate gpu reset occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET (1<<0) +/* indicate vram lost occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) +/* indicate some job from this context once cause gpu hang */ +#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) +/* indicate some errors are detected by RAS */ +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) +/* indicate that the reset hasn't completed yet */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5) + +/* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 +#define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 +#define AMDGPU_CTX_PRIORITY_LOW -512 +#define AMDGPU_CTX_PRIORITY_NORMAL 0 +/* + * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires + * CAP_SYS_NICE or DRM_MASTER +*/ +#define AMDGPU_CTX_PRIORITY_HIGH 512 +#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 + +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; - /** For future use, no flags defined so far */ + /** Flags */ __u32 flags; __u32 ctx_id; - __u32 _pad; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; }; union drm_amdgpu_ctx_out { @@ -185,6 +319,11 @@ union drm_amdgpu_ctx_out { /** Reset status since the last call of the ioctl. */ __u32 reset_status; } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; }; union drm_amdgpu_ctx { @@ -192,6 +331,260 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; +/* user queue IOCTL operations */ +#define AMDGPU_USERQ_OP_CREATE 1 +#define AMDGPU_USERQ_OP_FREE 2 + +/* queue priority levels */ +/* low < normal low < normal high < high */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK 0x3 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ +/* for queues that need access to protected content */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2) + +/* + * This structure is a container to pass input configuration + * info for all supported userqueue related operations. + * For operation AMDGPU_USERQ_OP_CREATE: user is expected + * to set all fields, excep the parameter 'queue_id'. + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected + * to be set is 'queue_id', eveything else is ignored. + */ +struct drm_amdgpu_userq_in { + /** AMDGPU_USERQ_OP_* */ + __u32 op; + /** Queue id passed for operation USERQ_OP_FREE */ + __u32 queue_id; + /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ + __u32 ip_type; + /** + * @doorbell_handle: the handle of doorbell GEM object + * associated with this userqueue client. + */ + __u32 doorbell_handle; + /** + * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. + * Kernel will generate absolute doorbell offset using doorbell_handle + * and doorbell_offset in the doorbell bo. + */ + __u32 doorbell_offset; + /** + * @flags: flags used for queue parameters + */ + __u32 flags; + /** + * @queue_va: Virtual address of the GPU memory which holds the queue + * object. The queue holds the workload packets. + */ + __u64 queue_va; + /** + * @queue_size: Size of the queue in bytes, this needs to be 256-byte + * aligned. + */ + __u64 queue_size; + /** + * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + */ + __u64 rptr_va; + /** + * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + * + * Queue, RPTR and WPTR can come from the same object, as long as the size + * and alignment related requirements are met. + */ + __u64 wptr_va; + /** + * @mqd: MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. + * + * MQD data can be of different size for different GPU IP/engine and + * their respective versions/revisions, so this points to a __u64 * + * which holds IP specific MQD of this usermode queue. + */ + __u64 mqd; + /** + * @size: size of MQD data in bytes, it must match the MQD structure + * size of the respective engine/revision defined in UAPI for ex, for + * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). + */ + __u64 mqd_size; +}; + +/* The structure to carry output of userqueue ops */ +struct drm_amdgpu_userq_out { + /** + * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique + * queue ID to represent the newly created userqueue in the system, otherwise + * it should be ignored. + */ + __u32 queue_id; + __u32 _pad; +}; + +union drm_amdgpu_userq { + struct drm_amdgpu_userq_in in; + struct drm_amdgpu_userq_out out; +}; + +/* GFX V11 IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_gfx11 { + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 shadow_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 csa_va; +}; + +/* GFX V11 SDMA IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_sdma_gfx11 { + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 csa_va; +}; + +/* GFX V11 Compute IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_compute_gfx11 { + /** + * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 eop_va; +}; + +/* userq signal/wait ioctl */ +struct drm_amdgpu_userq_signal { + /** + * @queue_id: Queue handle used by the userq fence creation function + * to retrieve the WPTR. + */ + __u32 queue_id; + __u32 pad; + /** + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to be signaled. + */ + __u64 syncobj_handles; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles. + */ + __u64 num_syncobj_handles; + /** + * @bo_read_handles: The list of BO handles that the submitted user queue job + * is using for read only. This will update BO fences in the kernel. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of BO handles that the submitted user queue job + * is using for write only. This will update BO fences in the kernel. + */ + __u64 bo_write_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; +}; + +struct drm_amdgpu_userq_fence_info { + /** + * @va: A gpu address allocated for each queue which stores the + * read pointer (RPTR) value. + */ + __u64 va; + /** + * @value: A 64 bit value represents the write pointer (WPTR) of the + * queue commands which compared with the RPTR value to signal the + * fences. + */ + __u64 value; +}; + +struct drm_amdgpu_userq_wait { + /** + * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the + * wait queue and maintain the fence driver references in it. + */ + __u32 waitq_id; + __u32 pad; + /** + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 syncobj_handles; + /** + * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by + * the user queue job to get the va/value pairs at given @syncobj_timeline_points. + */ + __u64 syncobj_timeline_handles; + /** + * @syncobj_timeline_points: The list of timeline syncobj points submitted by the + * user queue job for the corresponding @syncobj_timeline_handles. + */ + __u64 syncobj_timeline_points; + /** + * @bo_read_handles: The list of read BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of write BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_write_handles; + /** + * @num_syncobj_timeline_handles: A count that represents the number of timeline + * syncobj handles in @syncobj_timeline_handles. + */ + __u16 num_syncobj_timeline_handles; + /** + * @num_fences: This field can be used both as input and output. As input it defines + * the maximum number of fences that can be returned and as output it will specify + * how many fences were actually returned from the ioctl. + */ + __u16 num_fences; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles. + */ + __u32 num_syncobj_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; + /** + * @out_fences: The field is a return value from the ioctl containing the list of + * address/value pairs to wait for. + */ + __u64 out_fences; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 @@ -212,6 +605,23 @@ union drm_amdgpu_vm { struct drm_amdgpu_vm_out out; }; +/* sched ioctl */ +#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 +#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE 2 + +struct drm_amdgpu_sched_in { + /* AMDGPU_SCHED_OP_* */ + __u32 op; + __u32 fd; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; + __u32 ctx_id; +}; + +union drm_amdgpu_sched { + struct drm_amdgpu_sched_in in; +}; + /* * This is not a reliable API and you should expect it to fail for any * number of reasons and have fallback path that do not use userptr to @@ -250,9 +660,37 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_NUM_BANKS_SHIFT 21 #define AMDGPU_TILING_NUM_BANKS_MASK 0x3 -/* GFX9 and later: */ +/* GFX9 - GFX11: */ #define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 #define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f +#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 +#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF +#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29 +#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMDGPU_TILING_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_SCANOUT_MASK 0x1 + +/* GFX12 and later: */ +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 +/* These are DCC recompression settings for memory management: */ +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ +/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata + * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */ +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 +/* bit gap */ +#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 /* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ @@ -368,6 +806,21 @@ union drm_amdgpu_wait_fences { #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 #define AMDGPU_GEM_OP_SET_PLACEMENT 1 +#define AMDGPU_GEM_OP_GET_MAPPING_INFO 2 + +struct drm_amdgpu_gem_vm_entry { + /* Start of mapping (in bytes) */ + __u64 addr; + + /* Size of mapping (in bytes) */ + __u64 size; + + /* Mapping offset */ + __u64 offset; + + /* flags needed to recreate mapping */ + __u64 flags; +}; /* Sets or returns a value associated with a buffer. */ struct drm_amdgpu_gem_op { @@ -375,8 +828,44 @@ struct drm_amdgpu_gem_op { __u32 handle; /** AMDGPU_GEM_OP_* */ __u32 op; - /** Input or return value */ + /** Input or return value. For MAPPING_INFO op: pointer to array of struct drm_amdgpu_gem_vm_entry */ __u64 value; + /** For MAPPING_INFO op: number of mappings (in/out) */ + __u32 num_entries; + + __u32 padding; +}; + +#define AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT (1 << 0) + +struct drm_amdgpu_gem_list_handles { + /* User pointer to array of drm_amdgpu_gem_bo_info_entry */ + __u64 entries; + + /* Size of entries buffer / Number of handles in process (if larger than size of buffer, must retry) */ + __u32 num_entries; + + __u32 padding; +}; + +struct drm_amdgpu_gem_list_handles_entry { + /* gem handle of buffer object */ + __u32 gem_handle; + + /* Currently just one flag: IS_IMPORT */ + __u32 flags; + + /* Size of bo */ + __u64 size; + + /* Preferred domains for GEM_CREATE */ + __u64 preferred_domains; + + /* GEM_CREATE flags for re-creation of buffer */ + __u64 alloc_flags; + + /* physical start_addr alignment in bytes for some HW requirements */ + __u64 alignment; }; #define AMDGPU_VA_OP_MAP 1 @@ -400,14 +889,18 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_MTYPE_MASK (0xf << 5) /* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ #define AMDGPU_VM_MTYPE_DEFAULT (0 << 5) -/* Use NC MTYPE instead of default MTYPE */ +/* Use Non Coherent MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_NC (1 << 5) -/* Use WC MTYPE instead of default MTYPE */ +/* Use Write Combine MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_WC (2 << 5) -/* Use CC MTYPE instead of default MTYPE */ +/* Use Cache Coherent MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_CC (3 << 5) -/* Use UC MTYPE instead of default MTYPE */ +/* Use UnCached MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_UC (4 << 5) +/* Use Read Write MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_RW (5 << 5) +/* don't allocate MALL */ +#define AMDGPU_VM_PAGE_NOALLOC (1 << 9) struct drm_amdgpu_gem_va { /** GEM object handle */ @@ -423,6 +916,19 @@ struct drm_amdgpu_gem_va { __u64 offset_in_bo; /** Specify mapping size. Must be correctly aligned. */ __u64 map_size; + /** + * vm_timeline_point is a sequence number used to add new timeline point. + */ + __u64 vm_timeline_point; + /** + * The vm page table update fence is installed in given vm_timeline_syncobj_out + * at vm_timeline_point. + */ + __u32 vm_timeline_syncobj_out; + /** the number of syncobj handles in @input_fence_syncobj_handles */ + __u32 num_syncobj_handles; + /** Array of sync object handle to wait for given input fences */ + __u64 input_fence_syncobj_handles; }; #define AMDGPU_HW_IP_GFX 0 @@ -432,8 +938,14 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_VCE 4 #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 +/* + * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support + * both encoding and decoding jobs. + */ #define AMDGPU_HW_IP_VCN_ENC 7 -#define AMDGPU_HW_IP_NUM 8 +#define AMDGPU_HW_IP_VCN_JPEG 8 +#define AMDGPU_HW_IP_VPE 9 +#define AMDGPU_HW_IP_NUM 10 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 @@ -442,6 +954,11 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 +#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09 +#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW 0x0a struct drm_amdgpu_cs_chunk { __u32 chunk_id; @@ -455,7 +972,7 @@ struct drm_amdgpu_cs_in { /** Handle of resource list associated with CS */ __u32 bo_list_handle; __u32 num_chunks; - __u32 _pad; + __u32 flags; /** this points to __u64 * which point to cs chunks */ __u64 chunks; }; @@ -480,6 +997,23 @@ union drm_amdgpu_cs { /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ #define AMDGPU_IB_FLAG_PREEMPT (1<<2) +/* The IB fence should do the L2 writeback but not invalidate any shader + * caches (L2/vL1/sL1/I$). */ +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) + +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. + * This will reset wave ID counters for the IB. + */ +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) + +/* Flag the IB as secure (TMZ) + */ +#define AMDGPU_IB_FLAGS_SECURE (1 << 5) + +/* Tell KMD to flush and invalidate caches + */ +#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ @@ -513,6 +1047,27 @@ struct drm_amdgpu_cs_chunk_sem { __u32 handle; }; +struct drm_amdgpu_cs_chunk_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 + +union drm_amdgpu_fence_to_handle { + struct { + struct drm_amdgpu_fence fence; + __u32 what; + __u32 pad; + } in; + struct { + __u32 handle; + } out; +}; + struct drm_amdgpu_cs_chunk_data { union { struct drm_amdgpu_cs_chunk_ib ib_data; @@ -520,12 +1075,34 @@ struct drm_amdgpu_cs_chunk_data { }; }; -/** +#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW 0x1 + +struct drm_amdgpu_cs_chunk_cp_gfx_shadow { + __u64 shadow_va; + __u64 csa_va; + __u64 gds_va; + __u64 flags; +}; + +/* * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU * */ -#define AMDGPU_IDS_FLAGS_FUSION 0x1 -#define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 +#define AMDGPU_IDS_FLAGS_FUSION 0x01 +#define AMDGPU_IDS_FLAGS_PREEMPTION 0x02 +#define AMDGPU_IDS_FLAGS_TMZ 0x04 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x08 +#define AMDGPU_IDS_FLAGS_GANG_SUBMIT 0x10 + +/* + * Query h/w info: Flag identifying VF/PF/PT mode + * + */ +#define AMDGPU_IDS_FLAGS_MODE_MASK 0x300 +#define AMDGPU_IDS_FLAGS_MODE_SHIFT 0x8 +#define AMDGPU_IDS_FLAGS_MODE_PF 0x0 +#define AMDGPU_IDS_FLAGS_MODE_VF 0x1 +#define AMDGPU_IDS_FLAGS_MODE_PT 0x2 /* indicate if acceleration can be working */ #define AMDGPU_INFO_ACCEL_WORKING 0x00 @@ -563,6 +1140,36 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_SOS 0x0c /* Subquery id: Query PSP ASD firmware version */ #define AMDGPU_INFO_FW_ASD 0x0d + /* Subquery id: Query VCN firmware version */ + #define AMDGPU_INFO_FW_VCN 0x0e + /* Subquery id: Query GFX RLC SRLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f + /* Subquery id: Query GFX RLC SRLG firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10 + /* Subquery id: Query GFX RLC SRLS firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 + /* Subquery id: Query DMCU firmware version */ + #define AMDGPU_INFO_FW_DMCU 0x12 + #define AMDGPU_INFO_FW_TA 0x13 + /* Subquery id: Query DMCUB firmware version */ + #define AMDGPU_INFO_FW_DMCUB 0x14 + /* Subquery id: Query TOC firmware version */ + #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 + /* Subquery id: Query GFX RLCP firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCP 0x17 + /* Subquery id: Query GFX RLCV firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCV 0x18 + /* Subquery id: Query MES_KIQ firmware version */ + #define AMDGPU_INFO_FW_MES_KIQ 0x19 + /* Subquery id: Query MES firmware version */ + #define AMDGPU_INFO_FW_MES 0x1a + /* Subquery id: Query IMU firmware version */ + #define AMDGPU_INFO_FW_IMU 0x1b + /* Subquery id: Query VPE firmware version */ + #define AMDGPU_INFO_FW_VPE 0x1c + /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f /* the used VRAM size */ @@ -591,6 +1198,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_VBIOS_SIZE 0x1 /* Subquery id: Query vbios image */ #define AMDGPU_INFO_VBIOS_IMAGE 0x2 + /* Subquery id: Query vbios info */ + #define AMDGPU_INFO_VBIOS_INFO 0x3 /* Query UVD handles */ #define AMDGPU_INFO_NUM_HANDLES 0x1C /* Query sensor related information */ @@ -609,8 +1218,61 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_SENSOR_VDDNB 0x6 /* Subquery id: Query graphics voltage */ #define AMDGPU_INFO_SENSOR_VDDGFX 0x7 + /* Subquery id: Query GPU stable pstate shader clock */ + #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK 0x8 + /* Subquery id: Query GPU stable pstate memory clock */ + #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK 0x9 + /* Subquery id: Query GPU peak pstate shader clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK 0xa + /* Subquery id: Query GPU peak pstate memory clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK 0xb + /* Subquery id: Query input GPU power */ + #define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER 0xc /* Number of VRAM page faults on CPU access. */ #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E +#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F +/* query ras mask of enabled features*/ +#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20 +/* RAS MASK: UMC (VRAM) */ +#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0) +/* RAS MASK: SDMA */ +#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1) +/* RAS MASK: GFX */ +#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2) +/* RAS MASK: MMHUB */ +#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3) +/* RAS MASK: ATHUB */ +#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4) +/* RAS MASK: PCIE */ +#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5) +/* RAS MASK: HDP */ +#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6) +/* RAS MASK: XGMI */ +#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7) +/* RAS MASK: DF */ +#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8) +/* RAS MASK: SMN */ +#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9) +/* RAS MASK: SEM */ +#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10) +/* RAS MASK: MP0 */ +#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11) +/* RAS MASK: MP1 */ +#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12) +/* RAS MASK: FUSE */ +#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13) +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS 0x21 + /* Subquery id: Decode */ + #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 + /* Subquery id: Encode */ + #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 +/* Query the max number of IBs per gang per submission */ +#define AMDGPU_INFO_MAX_IBS 0x22 +/* query last page fault info */ +#define AMDGPU_INFO_GPUVM_FAULT 0x23 +/* query FW object size and alignment */ +#define AMDGPU_INFO_UQ_FW_AREAS 0x24 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff @@ -678,6 +1340,10 @@ struct drm_amdgpu_info { struct { __u32 type; } sensor_info; + + struct { + __u32 type; + } video_cap; }; }; @@ -738,6 +1404,15 @@ struct drm_amdgpu_info_firmware { __u32 feature; }; +struct drm_amdgpu_info_vbios { + __u8 name[64]; + __u8 vbios_pn[64]; + __u32 version; + __u32 pad; + __u8 vbios_ver_str[32]; + __u8 date[32]; +}; + #define AMDGPU_VRAM_TYPE_UNKNOWN 0 #define AMDGPU_VRAM_TYPE_GDDR1 1 #define AMDGPU_VRAM_TYPE_DDR2 2 @@ -746,6 +1421,12 @@ struct drm_amdgpu_info_firmware { #define AMDGPU_VRAM_TYPE_GDDR5 5 #define AMDGPU_VRAM_TYPE_HBM 6 #define AMDGPU_VRAM_TYPE_DDR3 7 +#define AMDGPU_VRAM_TYPE_DDR4 8 +#define AMDGPU_VRAM_TYPE_GDDR6 9 +#define AMDGPU_VRAM_TYPE_DDR5 10 +#define AMDGPU_VRAM_TYPE_LPDDR4 11 +#define AMDGPU_VRAM_TYPE_LPDDR5 12 +#define AMDGPU_VRAM_TYPE_HBM3E 13 struct drm_amdgpu_info_device { /** PCI Device ID */ @@ -771,7 +1452,8 @@ struct drm_amdgpu_info_device { __u32 enabled_rb_pipes_mask; __u32 num_rb_pipes; __u32 num_hw_gfx_contexts; - __u32 _pad; + /* PCIe version (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_gen; __u64 ids_flags; /** Starting virtual address for UMDs. */ __u64 virtual_address_offset; @@ -818,9 +1500,41 @@ struct drm_amdgpu_info_device { __u32 gs_prim_buffer_depth; /* max gs wavefront per vgt*/ __u32 max_gs_waves_per_vgt; - __u32 _pad1; + /* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_num_lanes; /* always on cu bitmap */ __u32 cu_ao_bitmap[4][4]; + /** Starting high virtual address for UMDs. */ + __u64 high_va_offset; + /** The maximum high virtual address */ + __u64 high_va_max; + /* gfx10 pa_sc_tile_steering_override */ + __u32 pa_sc_tile_steering_override; + /* disabled TCCs */ + __u64 tcc_disabled_mask; + __u64 min_engine_clock; + __u64 min_memory_clock; + /* The following fields are only set on gfx11+, older chips set 0. */ + __u32 tcp_cache_size; /* AKA GL0, VMEM cache */ + __u32 num_sqc_per_wgp; + __u32 sqc_data_cache_size; /* AKA SMEM cache */ + __u32 sqc_inst_cache_size; + __u32 gl1c_cache_size; + __u32 gl2c_cache_size; + __u64 mall_size; /* AKA infinity cache */ + /* high 32 bits of the rb pipes mask */ + __u32 enabled_rb_pipes_mask_hi; + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; + /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ + __u32 userq_ip_mask; + __u32 pad; }; struct drm_amdgpu_info_hw_ip { @@ -835,7 +1549,10 @@ struct drm_amdgpu_info_hw_ip { __u32 ib_size_alignment; /** Bitmask of available rings. Bit 0 means ring 0, etc. */ __u32 available_rings; - __u32 _pad; + /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ + __u32 ip_discovery_version; + /* Userq available slots */ + __u32 userq_num_slots; }; struct drm_amdgpu_info_num_handles { @@ -863,6 +1580,61 @@ struct drm_amdgpu_info_vce_clock_table { __u32 pad; }; +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2 0 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4 1 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1 2 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC 3 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC 4 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG 5 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9 6 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1 7 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT 8 + +struct drm_amdgpu_info_video_codec_info { + __u32 valid; + __u32 max_width; + __u32 max_height; + __u32 max_pixels_per_frame; + __u32 max_level; + __u32 pad; +}; + +struct drm_amdgpu_info_video_caps { + struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT]; +}; + +#define AMDGPU_VMHUB_TYPE_MASK 0xff +#define AMDGPU_VMHUB_TYPE_SHIFT 0 +#define AMDGPU_VMHUB_TYPE_GFX 0 +#define AMDGPU_VMHUB_TYPE_MM0 1 +#define AMDGPU_VMHUB_TYPE_MM1 2 +#define AMDGPU_VMHUB_IDX_MASK 0xff00 +#define AMDGPU_VMHUB_IDX_SHIFT 8 + +struct drm_amdgpu_info_gpuvm_fault { + __u64 addr; + __u32 status; + __u32 vmhub; +}; + +struct drm_amdgpu_info_uq_metadata_gfx { + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; +}; + +struct drm_amdgpu_info_uq_metadata { + union { + struct drm_amdgpu_info_uq_metadata_gfx gfx; + }; +}; + /* * Supported GPU families */ @@ -874,6 +1646,15 @@ struct drm_amdgpu_info_vce_clock_table { #define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */ #define AMDGPU_FAMILY_AI 141 /* Vega10 */ #define AMDGPU_FAMILY_RV 142 /* Raven */ +#define AMDGPU_FAMILY_NV 143 /* Navi10 */ +#define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ +#define AMDGPU_FAMILY_GC_11_0_0 145 /* GC 11.0.0 */ +#define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_11_0_1 148 /* GC 11.0.1 */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ +#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ #if defined(__cplusplus) } |
