diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-28 17:44:52 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-28 17:44:52 -0700 |
commit | 0c86b42439b6c11d758b3392a21117934fef00c1 (patch) | |
tree | 6aa7071476067661867af33767cc0e1863397a04 /include/uapi | |
parent | 51aad189f8e0f926c0977d180cae6a78df445f27 (diff) | |
parent | cf05922d63e2ae6a9b1b52ff5236a44c3b29f78c (diff) |
Merge tag 'drm-next-2025-03-28' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie:
"Outside of drm there are some rust patches from Danilo who maintains
that area in here, and some pieces for drm header check tests.
The major things in here are a new driver supporting the touchbar
displays on M1/M2, the nova-core stub driver which is just the vehicle
for adding rust abstractions and start developing a real driver inside
of.
xe adds support for SVM with a non-driver specific SVM core
abstraction that will hopefully be useful for other drivers, along
with support for shrinking for TTM devices. I'm sure xe and AMD
support new devices, but the pipeline depth on these things is hard to
know what they end up being in the marketplace!
uapi:
- add mediatek tiled fourcc
- add support for notifying userspace on device wedged
new driver:
- appletbdrm: support for Apple Touchbar displays on m1/m2
- nova-core: skeleton rust driver to develop nova inside off
firmware:
- add some rust firmware pieces
rust:
- add 'LocalModule' type alias
component:
- add helper to query bound status
fbdev:
- fbtft: remove access to page->index
media:
- cec: tda998x: import driver from drm
dma-buf:
- add fast path for single fence merging
tests:
- fix lockdep warnings
atomic:
- allow full modeset on connector changes
- clarify semantics of allow_modeset and drm_atomic_helper_check
- async-flip: support on arbitary planes
- writeback: fix UAF
- Document atomic-state history
format-helper:
- support ARGB8888 to ARGB4444 conversions
buddy:
- fix multi-root cleanup
ci:
- update IGT
dp:
- support extended wake timeout
- mst: fix RAD to string conversion
- increase DPCD eDP control CAP size to 5 bytes
- add DPCD eDP v1.5 definition
- add helpers for LTTPR transparent mode
panic:
- encode QR code according to Fido 2.2
scheduler:
- add parameter struct for init
- improve job peek/pop operations
- optimise drm_sched_job struct layout
ttm:
- refactor pool allocation
- add helpers for TTM shrinker
panel-orientation:
- add a bunch of new quirks
panel:
- convert panels to multi-style functions
- edp: Add support for B140UAN04.4, BOE NV140FHM-NZ, CSW MNB601LS1-3,
LG LP079QX1-SP0V, MNE007QS3-7, STA 116QHD024002, Starry
116KHD024006, Lenovo T14s Gen6 Snapdragon
- himax-hx83102: Add support for CSOT PNA957QT1-1, Kingdisplay
kd110n11-51ie, Starry 2082109qfh040022-50e
- visionox-r66451: use multi-style MIPI-DSI functions
- raydium-rm67200: Add driver for Raydium RM67200
- simple: Add support for BOE AV123Z7M-N17, BOE AV123Z7M-N17
- sony-td4353-jdi: Use MIPI-DSI multi-func interface
- summit: Add driver for Apple Summit display panel
- visionox-rm692e5: Add driver for Visionox RM692E5
bridge:
- pass full atomic state to various callbacks
- adv7511: Report correct capabilities
- it6505: Fix HDCP V compare
- snd65dsi86: fix device IDs
- nwl-dsi: set bridge type
- ti-sn65si83: add error recovery and set bridge type
- synopsys: add HDMI audio support
xe:
- support device-wedged event
- add mmap support for PCI memory barrier
- perf pmu integration and expose per-engien activity
- add EU stall sampling support
- GPU SVM and Xe SVM implementation
- use TTM shrinker
- add survivability mode to allow the driver to do firmware updates
in critical failure states
- PXP HWDRM support for MTL and LNL
- expose package/vram temps over hwmon
- enable DP tunneling
- drop mmio_ext abstraction
- Reject BO evcition if BO is bound to current VM
- Xe suballocator improvements
- re-use display vmas when possible
- add GuC Buffer Cache abstraction
- PCI ID update for Panther Lake and Battlemage
- Enable SRIOV for Panther Lake
- Refactor VRAM manager location
i915:
- enable extends wake timeout
- support device-wedged event
- Enable DP 128b/132b SST DSC
- FBC dirty rectangle support for display version 30+
- convert i915/xe to drm client setup
- Compute HDMI PLLS for rates not in fixed tables
- Allow DSB usage when PSR is enabled on LNL+
- Enable panel replay without full modeset
- Enable async flips with compressed buffers on ICL+
- support luminance based brightness via DPCD for eDP
- enable VRR enable/disable without full modeset
- allow GuC SLPC default strategies on MTL+ for performance
- lots of display refactoring in move to struct intel_display
amdgpu:
- add device wedged event
- support async page flips on overlay planes
- enable broadcast RGB drm property
- add info ioctl for virt mode
- OEM i2c support for RGB lights
- GC 11.5.2 + 11.5.3 support
- SDMA 6.1.3 support
- NBIO 7.9.1 + 7.11.2 support
- MMHUB 1.8.1 + 3.3.2 support
- DCN 3.6.0 support
- Add dynamic workload profile switching for GC 10-12
- support larger VBIOS sizes
- Mark gttsize parameters as deprecated
- Initial JPEG queue resset support
amdkfd:
- add KFD per process flags for setting precision
- sync pasid values between KGD and KFD
- improve GTT/VRAM handling for APUs
- fix user queue validation on GC7/8
- SDMA queue reset support
raedeon:
- rs400 hyperz fix
i2c:
- td998x: drop platform_data, split driver into media and bridge
ast:
- transmitter chip detection refactoring
- vbios display mode refactoring
- astdp: fix connection status and filter unsupported modes
- cursor handling refactoring
imagination:
- check job dependencies with sched helper
ivpu:
- improve command queue handling
- use workqueue for IRQ handling
- add support HW fault injection
- locking fixes
mgag200:
- add support for G200eH5
msm:
- dpu: add concurrent writeback support for DPU 10.x+
- use LTTPR helpers
- GPU:
- Fix obscure GMU suspend failure
- Expose syncobj timeline support
- Extend GPU devcoredump with pagetable info
- a623 support
- Fix a6xx gen1/gen2 indexed-register blocks in gpu snapshot /
devcoredump
- Display:
- Add cpu-cfg interconnect paths on SM8560 and SM8650
- Introduce KMS OMMU fault handler, causing devcoredump snapshot
- Fixed error pointer dereference in msm_kms_init_aspace()
- DPU:
- Fix mode_changing handling
- Add writeback support on SM6150 (QCS615)
- Fix DSC programming in 1:1:1 topology
- Reworked hardware resource allocation, moving it to the CRTC code
- Enabled support for Concurrent WriteBack (CWB) on SM8650
- Enabled CDM blocks on all relevant platforms
- Reworked debugfs interface for BW/clocks debugging
- Clear perf params before calculating bw
- Support YUV formats on writeback
- Fixed double inclusion
- Fixed writeback in YUV formats when using cloned output, Dropped
wb2_formats_rgb
- Corrected dpu_crtc_check_mode_changed and struct dpu_encoder_virt
kerneldocs
- Fixed uninitialized variable in dpu_crtc_kickoff_clone_mode()
- DSI:
- DSC-related fixes
- Rework clock programming
- DSI PHY:
- Fix 7nm (and lower) PHY programming
- Add proper DT schema definitions for DSI PHY clocks
- HDMI:
- Rework the driver, enabling the use of the HDMI Connector
framework
- Bindings:
- Added eDP PHY on SA8775P
nouveau:
- move drm_slave_encoder interface into driver
- nvkm: refactor GSP RPC
- use LTTPR helpers
mediatek:
- HDMI fixup and refinement
- add MT8188 dsc compatible
- MT8365 SoC support
panthor:
- Expose sizes of intenral BOs via fdinfo
- Fix race between reset and suspend
- Improve locking
qaic:
- Add support for AIC200
renesas:
- Fix limits in DT bindings
rockchip:
- support rk3562-mali
- rk3576: Add HDMI support
- vop2: Add new display modes on RK3588 HDMI0 up to 4K
- Don't change HDMI reference clock rate
- Fix DT bindings
- analogix_dp: add eDP support
- fix shutodnw
solomon:
- Set SPI device table to silence warnings
- Fix pixel and scanline encoding
v3d:
- handle clock
vc4:
- Use drm_exec
- Use dma-resv for wait-BO ioctl
- Remove seqno infrastructure
virtgpu:
- Support partial mappings of GEM objects
- Reserve VGA resources during initialization
- Fix UAF in virtgpu_dma_buf_free_obj()
- Add panic support
vkms:
- Switch to a managed modesetting pipeline
- Add support for ARGB8888
- fix UAf
xlnx:
- Set correct DMA segment size
- use mutex guards
- Fix error handling
- Fix docs"
* tag 'drm-next-2025-03-28' of https://gitlab.freedesktop.org/drm/kernel: (1762 commits)
drm/amd/pm: Update feature list for smu_v13_0_6
drm/amdgpu: Add parameter documentation for amdgpu_sync_fence
drm/amdgpu/discovery: optionally use fw based ip discovery
drm/amdgpu/discovery: use specific ip_discovery.bin for legacy asics
drm/amdgpu/discovery: check ip_discovery fw file available
drm/amd/pm: Remove unnecessay UQ10 to UINT conversion
drm/amd/pm: Remove unnecessay UQ10 to UINT conversion
drm/amdgpu/sdma_v4_4_2: update VM flush implementation for SDMA
drm/amdgpu: Optimize VM invalidation engine allocation and synchronize GPU TLB flush
drm/amd/amdgpu: Increase max rings to enable SDMA page ring
drm/amdgpu: Decode deferred error type in gfx aca bank parser
drm/amdgpu/gfx11: Add Cleaner Shader Support for GFX11.5 GPUs
drm/amdgpu/mes: clean up SDMA HQD loop
drm/amdgpu/mes: enable compute pipes across all MEC
drm/amdgpu/mes: drop MES 10.x leftovers
drm/amdgpu/mes: optimize compute loop handling
drm/amdgpu/sdma: guilty tracking is per instance
drm/amdgpu/sdma: fix engine reset handling
drm/amdgpu: remove invalid usage of sched.ready
drm/amdgpu: add cleaner shader trace point
...
Diffstat (limited to 'include/uapi')
-rw-r--r-- | include/uapi/drm/amdgpu_drm.h | 10 | ||||
-rw-r--r-- | include/uapi/drm/drm_fourcc.h | 41 | ||||
-rw-r--r-- | include/uapi/drm/ivpu_accel.h | 84 | ||||
-rw-r--r-- | include/uapi/drm/panthor_drm.h | 86 | ||||
-rw-r--r-- | include/uapi/drm/xe_drm.h | 240 | ||||
-rw-r--r-- | include/uapi/linux/kfd_ioctl.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/kfd_sysfs.h | 3 |
7 files changed, 426 insertions, 48 deletions
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index aaa4f3bc688b..25d5c6e90a99 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -763,6 +763,16 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow { #define AMDGPU_IDS_FLAGS_TMZ 0x4 #define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 +/* + * Query h/w info: Flag identifying VF/PF/PT mode + * + */ +#define AMDGPU_IDS_FLAGS_MODE_MASK 0x300 +#define AMDGPU_IDS_FLAGS_MODE_SHIFT 0x8 +#define AMDGPU_IDS_FLAGS_MODE_PF 0x0 +#define AMDGPU_IDS_FLAGS_MODE_VF 0x1 +#define AMDGPU_IDS_FLAGS_MODE_PT 0x2 + /* indicate if acceleration can be working */ #define AMDGPU_INFO_ACCEL_WORKING 0x00 /* get the crtc_id from the mode object id? */ diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 70f3b00b0681..e41a3cec6a9e 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -421,6 +421,7 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_ARM 0x08 #define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09 #define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a +#define DRM_FORMAT_MOD_VENDOR_MTK 0x0b /* add more to the end as needed */ @@ -1453,6 +1454,46 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) */ #define AMLOGIC_FBC_OPTION_MEM_SAVING (1ULL << 0) +/* MediaTek modifiers + * Bits Parameter Notes + * ----- ------------------------ --------------------------------------------- + * 7: 0 TILE LAYOUT Values are MTK_FMT_MOD_TILE_* + * 15: 8 COMPRESSION Values are MTK_FMT_MOD_COMPRESS_* + * 23:16 10 BIT LAYOUT Values are MTK_FMT_MOD_10BIT_LAYOUT_* + * + */ + +#define DRM_FORMAT_MOD_MTK(__flags) fourcc_mod_code(MTK, __flags) + +/* + * MediaTek Tiled Modifier + * The lowest 8 bits of the modifier is used to specify the tiling + * layout. Only the 16L_32S tiling is used for now, but we define an + * "untiled" version and leave room for future expansion. + */ +#define MTK_FMT_MOD_TILE_MASK 0xf +#define MTK_FMT_MOD_TILE_NONE 0x0 +#define MTK_FMT_MOD_TILE_16L32S 0x1 + +/* + * Bits 8-15 specify compression options + */ +#define MTK_FMT_MOD_COMPRESS_MASK (0xf << 8) +#define MTK_FMT_MOD_COMPRESS_NONE (0x0 << 8) +#define MTK_FMT_MOD_COMPRESS_V1 (0x1 << 8) + +/* + * Bits 16-23 specify how the bits of 10 bit formats are + * stored out in memory + */ +#define MTK_FMT_MOD_10BIT_LAYOUT_MASK (0xf << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_PACKED (0x0 << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_LSBTILED (0x1 << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_LSBRASTER (0x2 << 16) + +/* alias for the most common tiling format */ +#define DRM_FORMAT_MOD_MTK_16L_32S_TILE DRM_FORMAT_MOD_MTK(MTK_FMT_MOD_TILE_16L32S) + /* * AMD modifiers * diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index a35b97b097bf..746c43bd3eb6 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -22,6 +22,9 @@ extern "C" { #define DRM_IVPU_METRIC_STREAMER_STOP 0x08 #define DRM_IVPU_METRIC_STREAMER_GET_DATA 0x09 #define DRM_IVPU_METRIC_STREAMER_GET_INFO 0x0a +#define DRM_IVPU_CMDQ_CREATE 0x0b +#define DRM_IVPU_CMDQ_DESTROY 0x0c +#define DRM_IVPU_CMDQ_SUBMIT 0x0d #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -57,6 +60,15 @@ extern "C" { DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_INFO, \ struct drm_ivpu_metric_streamer_get_data) +#define DRM_IOCTL_IVPU_CMDQ_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_CREATE, struct drm_ivpu_cmdq_create) + +#define DRM_IOCTL_IVPU_CMDQ_DESTROY \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_DESTROY, struct drm_ivpu_cmdq_destroy) + +#define DRM_IOCTL_IVPU_CMDQ_SUBMIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_SUBMIT, struct drm_ivpu_cmdq_submit) + /** * DOC: contexts * @@ -107,6 +119,13 @@ extern "C" { * accessible by hardware DMA. */ #define DRM_IVPU_CAP_DMA_MEMORY_RANGE 2 +/** + * DRM_IVPU_CAP_MANAGE_CMDQ + * + * Driver supports explicit command queue operations like command queue create, + * command queue destroy and submit job on specific command queue. + */ +#define DRM_IVPU_CAP_MANAGE_CMDQ 3 /** * struct drm_ivpu_param - Get/Set VPU parameters @@ -316,6 +335,44 @@ struct drm_ivpu_submit { __u32 priority; }; +/** + * struct drm_ivpu_cmdq_submit - Submit commands to the VPU using explicit command queue + * + * Execute a single command buffer on a given command queue. + * Handles to all referenced buffer objects have to be provided in @buffers_ptr. + * + * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl. + */ +struct drm_ivpu_cmdq_submit { + /** + * @buffers_ptr: + * + * A pointer to an u32 array of GEM handles of the BOs required for this job. + * The number of elements in the array must be equal to the value given by @buffer_count. + * + * The first BO is the command buffer. The rest of array has to contain all + * BOs referenced from the command buffer. + */ + __u64 buffers_ptr; + + /** @buffer_count: Number of elements in the @buffers_ptr */ + __u32 buffer_count; + + /** @cmdq_id: ID for the command queue where job will be submitted */ + __u32 cmdq_id; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** + * @commands_offset: + * + * Offset inside the first buffer in @buffers_ptr containing commands + * to be executed. The offset has to be 8-byte aligned. + */ + __u32 commands_offset; +}; + /* drm_ivpu_bo_wait job status codes */ #define DRM_IVPU_JOB_STATUS_SUCCESS 0 #define DRM_IVPU_JOB_STATUS_ABORTED 256 @@ -389,6 +446,33 @@ struct drm_ivpu_metric_streamer_get_data { }; /** + * struct drm_ivpu_cmdq_create - Create command queue for job submission + */ +struct drm_ivpu_cmdq_create { + /** @cmdq_id: Returned ID of created command queue */ + __u32 cmdq_id; + /** + * @priority: + * + * Priority to be set for related job command queue, can be one of the following: + * %DRM_IVPU_JOB_PRIORITY_DEFAULT + * %DRM_IVPU_JOB_PRIORITY_IDLE + * %DRM_IVPU_JOB_PRIORITY_NORMAL + * %DRM_IVPU_JOB_PRIORITY_FOCUS + * %DRM_IVPU_JOB_PRIORITY_REALTIME + */ + __u32 priority; +}; + +/** + * struct drm_ivpu_cmdq_destroy - Destroy a command queue + */ +struct drm_ivpu_cmdq_destroy { + /** @cmdq_id: ID of command queue to destroy */ + __u32 cmdq_id; +}; + +/** * struct drm_ivpu_metric_streamer_stop - Stop collecting metric data */ struct drm_ivpu_metric_streamer_stop { diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index b99763cbae48..97e2c4510e69 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -130,48 +130,6 @@ enum drm_panthor_ioctl_id { }; /** - * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number - * @__access: Access type. Must be R, W or RW. - * @__id: One of the DRM_PANTHOR_xxx id. - * @__type: Suffix of the type being passed to the IOCTL. - * - * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx - * values instead. - * - * Return: An IOCTL number to be passed to ioctl() from userspace. - */ -#define DRM_IOCTL_PANTHOR(__access, __id, __type) \ - DRM_IO ## __access(DRM_COMMAND_BASE + DRM_PANTHOR_ ## __id, \ - struct drm_panthor_ ## __type) - -#define DRM_IOCTL_PANTHOR_DEV_QUERY \ - DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query) -#define DRM_IOCTL_PANTHOR_VM_CREATE \ - DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create) -#define DRM_IOCTL_PANTHOR_VM_DESTROY \ - DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy) -#define DRM_IOCTL_PANTHOR_VM_BIND \ - DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind) -#define DRM_IOCTL_PANTHOR_VM_GET_STATE \ - DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state) -#define DRM_IOCTL_PANTHOR_BO_CREATE \ - DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create) -#define DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET \ - DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset) -#define DRM_IOCTL_PANTHOR_GROUP_CREATE \ - DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create) -#define DRM_IOCTL_PANTHOR_GROUP_DESTROY \ - DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy) -#define DRM_IOCTL_PANTHOR_GROUP_SUBMIT \ - DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit) -#define DRM_IOCTL_PANTHOR_GROUP_GET_STATE \ - DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state) -#define DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE \ - DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create) -#define DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY \ - DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy) - -/** * DOC: IOCTL arguments */ @@ -1019,6 +977,50 @@ struct drm_panthor_tiler_heap_destroy { __u32 pad; }; +/** + * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number + * @__access: Access type. Must be R, W or RW. + * @__id: One of the DRM_PANTHOR_xxx id. + * @__type: Suffix of the type being passed to the IOCTL. + * + * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx + * values instead. + * + * Return: An IOCTL number to be passed to ioctl() from userspace. + */ +#define DRM_IOCTL_PANTHOR(__access, __id, __type) \ + DRM_IO ## __access(DRM_COMMAND_BASE + DRM_PANTHOR_ ## __id, \ + struct drm_panthor_ ## __type) + +enum { + DRM_IOCTL_PANTHOR_DEV_QUERY = + DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query), + DRM_IOCTL_PANTHOR_VM_CREATE = + DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create), + DRM_IOCTL_PANTHOR_VM_DESTROY = + DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy), + DRM_IOCTL_PANTHOR_VM_BIND = + DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind), + DRM_IOCTL_PANTHOR_VM_GET_STATE = + DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state), + DRM_IOCTL_PANTHOR_BO_CREATE = + DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create), + DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET = + DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset), + DRM_IOCTL_PANTHOR_GROUP_CREATE = + DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create), + DRM_IOCTL_PANTHOR_GROUP_DESTROY = + DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy), + DRM_IOCTL_PANTHOR_GROUP_SUBMIT = + DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit), + DRM_IOCTL_PANTHOR_GROUP_GET_STATE = + DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state), + DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE = + DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create), + DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY = + DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy), +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index f62689ca861a..616916985e3f 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -393,6 +393,10 @@ struct drm_xe_query_mem_regions { * * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM - Flag is set if the device * has usable VRAM + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY - Flag is set if the device + * has low latency hint support + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the + * device has CPU address mirroring support * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment * required by this device, typically SZ_4K or SZ_64K * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address @@ -409,6 +413,8 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 #define DRM_XE_QUERY_CONFIG_FLAGS 1 #define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -630,6 +636,39 @@ struct drm_xe_query_uc_fw_version { }; /** + * struct drm_xe_query_pxp_status - query if PXP is ready + * + * If PXP is enabled and no fatal error has occurred, the status will be set to + * one of the following values: + * 0: PXP init still in progress + * 1: PXP init complete + * + * If PXP is not enabled or something has gone wrong, the query will be failed + * with one of the following error codes: + * -ENODEV: PXP not supported or disabled; + * -EIO: fatal error occurred during init, so PXP will never be enabled; + * -EINVAL: incorrect value provided as part of the query; + * -EFAULT: error copying the memory between kernel and userspace. + * + * The status can only be 0 in the first few seconds after driver load. If + * everything works as expected, the status will transition to init complete in + * less than 1 second, while in case of errors the driver might take longer to + * start returning an error code, but it should still take less than 10 seconds. + * + * The supported session type bitmask is based on the values in + * enum drm_xe_pxp_session_type. TYPE_NONE is always supported and therefore + * is not reported in the bitmask. + * + */ +struct drm_xe_query_pxp_status { + /** @status: current PXP status */ + __u32 status; + + /** @supported_session_types: bitmask of supported PXP session types */ + __u32 supported_session_types; +}; + +/** * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main * structure to query device information * @@ -648,6 +687,7 @@ struct drm_xe_query_uc_fw_version { * attributes. * - %DRM_XE_DEVICE_QUERY_GT_TOPOLOGY * - %DRM_XE_DEVICE_QUERY_ENGINE_CYCLES + * - %DRM_XE_DEVICE_QUERY_PXP_STATUS * * If size is set to 0, the driver fills it with the required size for * the requested type of data to query. If size is equal to the required @@ -700,6 +740,8 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 #define DRM_XE_DEVICE_QUERY_OA_UNITS 8 +#define DRM_XE_DEVICE_QUERY_PXP_STATUS 9 +#define DRM_XE_DEVICE_QUERY_EU_STALL 10 /** @query: The type of data to query */ __u32 query; @@ -743,8 +785,23 @@ struct drm_xe_device_query { * - %DRM_XE_GEM_CPU_CACHING_WC - Allocate the pages as write-combined. This * is uncached. Scanout surfaces should likely use this. All objects * that can be placed in VRAM must use this. + * + * This ioctl supports setting the following properties via the + * %DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY extension, which uses the + * generic @drm_xe_ext_set_property struct: + * + * - %DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE - set the type of PXP session + * this object will be used with. Valid values are listed in enum + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so + * there is no need to explicitly set that. Objects used with session of type + * %DRM_XE_PXP_TYPE_HWDRM will be marked as invalid if a PXP invalidation + * event occurs after their creation. Attempting to flip an invalid object + * will cause a black frame to be displayed instead. Submissions with invalid + * objects mapped in the VM will be rejected. */ struct drm_xe_gem_create { +#define DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY 0 +#define DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE 0 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -811,6 +868,32 @@ struct drm_xe_gem_create { /** * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET + * + * The @flags can be: + * - %DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER - For user to query special offset + * for use in mmap ioctl. Writing to the returned mmap address will generate a + * PCI memory barrier with low overhead (avoiding IOCTL call as well as writing + * to VRAM which would also add overhead), acting like an MI_MEM_FENCE + * instruction. + * + * Note: The mmap size can be at most 4K, due to HW limitations. As a result + * this interface is only supported on CPU architectures that support 4K page + * size. The mmap_offset ioctl will detect this and gracefully return an + * error, where userspace is expected to have a different fallback method for + * triggering a barrier. + * + * Roughly the usage would be as follows: + * + * .. code-block:: C + * + * struct drm_xe_gem_mmap_offset mmo = { + * .handle = 0, // must be set to 0 + * .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER, + * }; + * + * err = ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo); + * map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo.offset); + * map[i] = 0xdeadbeaf; // issue barrier */ struct drm_xe_gem_mmap_offset { /** @extensions: Pointer to the first extension struct, if any */ @@ -819,7 +902,8 @@ struct drm_xe_gem_mmap_offset { /** @handle: Handle for the object being mapped. */ __u32 handle; - /** @flags: Must be zero */ +#define DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER (1 << 0) + /** @flags: Flags */ __u32 flags; /** @offset: The fake offset to use for subsequent mmap call */ @@ -906,6 +990,15 @@ struct drm_xe_vm_destroy { * will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. This flag is intended to * implement VK sparse bindings. + * - %DRM_XE_VM_BIND_FLAG_CHECK_PXP - If the object is encrypted via PXP, + * reject the binding if the encryption key is no longer valid. This + * flag has no effect on BOs that are not marked as using PXP. + * - %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR - When the CPU address mirror flag is + * set, no mappings are created rather the range is reserved for CPU address + * mirroring which will be populated on GPU page faults or prefetches. Only + * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address + * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO + * handle MBZ, and the BO offset MBZ. */ struct drm_xe_vm_bind_op { /** @extensions: Pointer to the first extension struct, if any */ @@ -958,7 +1051,9 @@ struct drm_xe_vm_bind_op { * on the @pat_index. For such mappings there is no actual memory being * mapped (the address in the PTE is invalid), so the various PAT memory * attributes likely do not apply. Simply leaving as zero is one - * option (still a valid pat_index). + * option (still a valid pat_index). Same applies to + * DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR bindings as for such mapping + * there is no actual memory being mapped. */ __u16 pat_index; @@ -974,6 +1069,14 @@ struct drm_xe_vm_bind_op { /** @userptr: user pointer to bind on */ __u64 userptr; + + /** + * @cpu_addr_mirror_offset: Offset from GPU @addr to create + * CPU address mirror mappings. MBZ with current level of + * support (e.g. 1 to 1 mapping between GPU and CPU mappings + * only supported). + */ + __s64 cpu_addr_mirror_offset; }; /** @@ -996,6 +1099,8 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) +#define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) +#define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) /** @flags: Bind flags */ __u32 flags; @@ -1087,6 +1192,24 @@ struct drm_xe_vm_bind { /** * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE * + * This ioctl supports setting the following properties via the + * %DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY extension, which uses the + * generic @drm_xe_ext_set_property struct: + * + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY - set the queue priority. + * CAP_SYS_NICE is required to set a value above normal. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE - set the queue timeslice + * duration in microseconds. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE - set the type of PXP session + * this queue will be used with. Valid values are listed in enum + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so + * there is no need to explicitly set that. When a queue of type + * %DRM_XE_PXP_TYPE_HWDRM is created, the PXP default HWDRM session + * (%XE_PXP_HWDRM_DEFAULT_SESSION) will be started, if isn't already running. + * Given that going into a power-saving state kills PXP HWDRM sessions, + * runtime PM will be blocked while queues of this type are alive. + * All PXP queues will be killed if a PXP invalidation event occurs. + * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class * &DRM_XE_ENGINE_CLASS_RENDER. @@ -1105,12 +1228,27 @@ struct drm_xe_vm_bind { * }; * ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create); * + * Allow users to provide a hint to kernel for cases demanding low latency + * profile. Please note it will have impact on power consumption. User can + * indicate low latency hint with flag while creating exec queue as + * mentioned below, + * + * struct drm_xe_exec_queue_create exec_queue_create = { + * .flags = DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT, + * .extensions = 0, + * .vm_id = vm, + * .num_bb_per_exec = 1, + * .num_eng_per_bb = 1, + * .instances = to_user_pointer(&instance), + * }; + * ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create); + * */ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 - +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -1123,7 +1261,8 @@ struct drm_xe_exec_queue_create { /** @vm_id: VM to use for this exec queue */ __u32 vm_id; - /** @flags: MBZ */ +#define DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT (1 << 0) + /** @flags: flags to use for this exec queue */ __u32 flags; /** @exec_queue_id: Returned exec queue ID */ @@ -1397,6 +1536,8 @@ struct drm_xe_wait_user_fence { enum drm_xe_observation_type { /** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */ DRM_XE_OBSERVATION_TYPE_OA, + /** @DRM_XE_OBSERVATION_TYPE_EU_STALL: EU stall sampling observation stream type */ + DRM_XE_OBSERVATION_TYPE_EU_STALL, }; /** @@ -1729,6 +1870,97 @@ struct drm_xe_oa_stream_info { __u64 reserved[3]; }; +/** + * enum drm_xe_pxp_session_type - Supported PXP session types. + * + * We currently only support HWDRM sessions, which are used for protected + * content that ends up being displayed, but the HW supports multiple types, so + * we might extend support in the future. + */ +enum drm_xe_pxp_session_type { + /** @DRM_XE_PXP_TYPE_NONE: PXP not used */ + DRM_XE_PXP_TYPE_NONE = 0, + /** + * @DRM_XE_PXP_TYPE_HWDRM: HWDRM sessions are used for content that ends + * up on the display. + */ + DRM_XE_PXP_TYPE_HWDRM = 1, +}; + +/* ID of the protected content session managed by Xe when PXP is active */ +#define DRM_XE_PXP_HWDRM_DEFAULT_SESSION 0xf + +/** + * enum drm_xe_eu_stall_property_id - EU stall sampling input property ids. + * + * These properties are passed to the driver at open as a chain of + * @drm_xe_ext_set_property structures with @property set to these + * properties' enums and @value set to the corresponding values of these + * properties. @drm_xe_user_extension base.name should be set to + * @DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY. + * + * With the file descriptor obtained from open, user space must enable + * the EU stall stream fd with @DRM_XE_OBSERVATION_IOCTL_ENABLE before + * calling read(). EIO errno from read() indicates HW dropped data + * due to full buffer. + */ +enum drm_xe_eu_stall_property_id { +#define DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_EU_STALL_PROP_GT_ID: @gt_id of the GT on which + * EU stall data will be captured. + */ + DRM_XE_EU_STALL_PROP_GT_ID = 1, + + /** + * @DRM_XE_EU_STALL_PROP_SAMPLE_RATE: Sampling rate in + * GPU cycles from @sampling_rates in struct @drm_xe_query_eu_stall + */ + DRM_XE_EU_STALL_PROP_SAMPLE_RATE, + + /** + * @DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS: Minimum number of + * EU stall data reports to be present in the kernel buffer + * before unblocking a blocked poll or read. + */ + DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS, +}; + +/** + * struct drm_xe_query_eu_stall - Information about EU stall sampling. + * + * If a query is made with a struct @drm_xe_device_query where .query + * is equal to @DRM_XE_DEVICE_QUERY_EU_STALL, then the reply uses + * struct @drm_xe_query_eu_stall in .data. + */ +struct drm_xe_query_eu_stall { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @capabilities: EU stall capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_EU_STALL_CAPS_BASE (1 << 0) + + /** @record_size: size of each EU stall data record */ + __u64 record_size; + + /** @per_xecore_buf_size: internal per XeCore buffer size */ + __u64 per_xecore_buf_size; + + /** @reserved: Reserved */ + __u64 reserved[5]; + + /** @num_sampling_rates: Number of sampling rates in @sampling_rates array */ + __u64 num_sampling_rates; + + /** + * @sampling_rates: Flexible array of sampling rates + * sorted in the fastest to slowest order. + * Sampling rates are specified in GPU clock cycles. + */ + __u64 sampling_rates[]; +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index fa9f9846b88e..1e59344c5673 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -43,9 +43,10 @@ * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl * - 1.16 - Add contiguous VRAM allocation flag * - 1.17 - Add SDMA queue creation with target SDMA engine ID + * - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 17 +#define KFD_IOCTL_MINOR_VERSION 18 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -62,6 +63,8 @@ struct kfd_ioctl_get_version_args { #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 +#define KFD_MIN_QUEUE_RING_SIZE 1024 + struct kfd_ioctl_create_queue_args { __u64 ring_base_address; /* to KFD */ __u64 write_pointer_address; /* from KFD */ @@ -148,6 +151,9 @@ struct kfd_dbg_device_info_entry { #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 +/* Misc. per process flags */ +#define KFD_PROC_FLAG_MFMA_HIGH_PRECISION (1 << 0) + struct kfd_ioctl_set_memory_policy_args { __u64 alternate_aperture_base; /* to KFD */ __u64 alternate_aperture_size; /* to KFD */ @@ -155,7 +161,7 @@ struct kfd_ioctl_set_memory_policy_args { __u32 gpu_id; /* to KFD */ __u32 default_policy; /* to KFD */ __u32 alternate_policy; /* to KFD */ - __u32 pad; + __u32 misc_process_flag; /* to KFD */ }; /* diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h index 859b8e91d4d3..1125fe47959f 100644 --- a/include/uapi/linux/kfd_sysfs.h +++ b/include/uapi/linux/kfd_sysfs.h @@ -63,6 +63,9 @@ #define HSA_CAP_PER_QUEUE_RESET_SUPPORTED 0x80000000 #define HSA_CAP_RESERVED 0x000f8000 +#define HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED 0x00000001 +#define HSA_CAP2_RESERVED 0xfffffffe + /* debug_prop bits in node properties */ #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_SHIFT 0 |