From 62fe99cef94a5900cac3bf15fd03ee8baad1a99c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 19 May 2023 14:50:29 +0800 Subject: ublk: add read()/write() support for ublk char device Support pread()/pwrite() on ublk char device for reading/writing request io buffer, so data copy between io request buffer and userspace buffer can be moved to ublk server from ublk driver. Then UBLK_F_NEED_GET_DATA becomes not necessary, so ublk server can allocate buffer without one extra round uring command communication for userspace to provide buffer. IO buffer can be located by iocb->ki_pos which encodes buffer offset, io tag and queue id info, and type of iocb->ki_pos is u64, so it is big enough for holding reasonable queue depth, nr_queues and max io buffer size. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230519065030.351216-7-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 640bf687b94a..c0c1632c671e 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -93,9 +93,29 @@ #define UBLKSRV_CMD_BUF_OFFSET 0 #define UBLKSRV_IO_BUF_OFFSET 0x80000000 -/* tag bit is 12bit, so at most 4096 IOs for each queue */ +/* tag bit is 16bit, so far limit at most 4096 IOs for each queue */ #define UBLK_MAX_QUEUE_DEPTH 4096 +/* single IO buffer max size is 32MB */ +#define UBLK_IO_BUF_OFF 0 +#define UBLK_IO_BUF_BITS 25 +#define UBLK_IO_BUF_BITS_MASK ((1ULL << UBLK_IO_BUF_BITS) - 1) + +/* so at most 64K IOs for each queue */ +#define UBLK_TAG_OFF UBLK_IO_BUF_BITS +#define UBLK_TAG_BITS 16 +#define UBLK_TAG_BITS_MASK ((1ULL << UBLK_TAG_BITS) - 1) + +/* max 4096 queues */ +#define UBLK_QID_OFF (UBLK_TAG_OFF + UBLK_TAG_BITS) +#define UBLK_QID_BITS 12 +#define UBLK_QID_BITS_MASK ((1ULL << UBLK_QID_BITS) - 1) + +#define UBLK_MAX_NR_QUEUES (1U << UBLK_QID_BITS) + +#define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS) +#define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) + /* * zero copy requires 4k block size, and can remap ublk driver's io * request into ublksrv's vm space -- cgit From 1172d5b8beca6b899deb9f7f2850e7e47ec16198 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 19 May 2023 14:50:30 +0800 Subject: ublk: support user copy Currently copy between io request buffer(pages) and userspace buffer is done inside ublk_map_io() or ublk_unmap_io(). This way performs very well in case of pre-allocated userspace io buffer. For dynamically allocated or external userspace backend io buffer, UBLK_F_NEED_GET_DATA is added for ublk server to provide buffer by one extra command communication for WRITE request. For READ, userspace simply provides buffer, but can't know when the buffer is done[1]. Add UBLK_F_USER_COPY by moving io data copy out of kernel by providing read()/write() on /dev/ublkcN, and simply let ublk server do the io data copy. This way makes both side cleaner, the cost is that one extra syscall for copy io data between request and backend buffer. With UBLK_F_USER_COPY, it actually becomes possible to run per-io zero copy now, such as, only do zero copy for big size IO, so it can be thought as one prep patch for supporting zero copy. Meantime zero copy still needs to expose read()/write() buffer for some corner case, such as passthrough IO. [1] READ buffer in UBLK_F_NEED_GET_DATA https://lore.kernel.org/linux-block/116d8a56-0881-56d3-9bcc-78ff3e1dc4e5@linux.alibaba.com/T/#m23bd4b8634c0a054e6797063167b469949a247bb ublksrv loop usercopy code: https://github.com/ming1/ubdsrv/commits/usercopy Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230519065030.351216-8-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index c0c1632c671e..54b5b0aeefca 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -165,6 +165,9 @@ /* use ioctl encoding for uring command */ #define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) +/* Copy between request and user buffer by pread()/pwrite() */ +#define UBLK_F_USER_COPY (1UL << 7) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit From b5bbc52fd01278642773818642288999a0236cb6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 3 Jun 2023 12:06:01 +0800 Subject: ublk: add control command of UBLK_U_CMD_GET_FEATURES Add control command of UBLK_U_CMD_GET_FEATURES for returning driver's feature set or capability. This way can simplify userspace for maintaining compatibility because userspace doesn't need to send command to one device for querying driver feature set any more. Such as, with the queried feature set, userspace can choose to use: - UBLK_CMD_GET_DEV_INFO2 or UBLK_CMD_GET_DEV_INFO, - UBLK_U_CMD_* or UBLK_CMD_* Userspace code: https://github.com/ming1/ubdsrv/commits/features-cmd Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230603040601.775227-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 54b5b0aeefca..4b8558db90e1 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -47,6 +47,14 @@ _IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_GET_DEV_INFO2 \ _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_GET_FEATURES \ + _IOR('u', 0x13, struct ublksrv_ctrl_cmd) + +/* + * 64bits are enough now, and it should be easy to extend in case of + * running out of feature flags + */ +#define UBLK_FEATURES_LEN 8 /* * IO commands, issued by ublk server, and handled by ublk driver. -- cgit From 3a41db531e5124adaa3a9ab9ca0c724aee85b10c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Mar 2023 18:45:41 +0200 Subject: pktcdvd: Get rid of custom printing macros We may use traditional dev_*() macros instead of custom ones provided by the driver. Signed-off-by: Andy Shevchenko Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20230310164549.22133-2-andriy.shevchenko@linux.intel.com Signed-off-by: Jens Axboe --- include/uapi/linux/pktcdvd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h index 6a5552dfd6af..987a3022dc5f 100644 --- a/include/uapi/linux/pktcdvd.h +++ b/include/uapi/linux/pktcdvd.h @@ -16,6 +16,7 @@ #include /* + * UNUSED: * 1 for normal debug messages, 2 is very verbose. 0 to turn it off. */ #define PACKET_DEBUG 1 -- cgit From 95a55437dc49fb3342c82e61f5472a71c63d9ed0 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Wed, 21 Jun 2023 08:17:24 +1200 Subject: block: change all __u32 annotations to __be32 in affs_hardblocks.h The Amiga partition parser module uses signed int for partition sector address and count, which will overflow for disks larger than 1 TB. Use u64 as type for sector address and size to allow using disks up to 2 TB without LBD support, and disks larger than 2 TB with LBD. The RBD format allows to specify disk sizes up to 2^128 bytes (though native OS limitations reduce this somewhat, to max 2^68 bytes), so check for u64 overflow carefully to protect against overflowing sector_t. This bug was reported originally in 2012, and the fix was created by the RDB author, Joanne Dow . A patch had been discussed and reviewed on linux-m68k at that time but never officially submitted (now resubmitted as patch 1 of this series). Patch 3 (this series) adds additional error checking and warning messages. One of the error checks now makes use of the previously unused rdb_CylBlocks field, which causes a 'sparse' warning (cast to restricted __be32). Annotate all 32 bit fields in affs_hardblocks.h as __be32, as the on-disk format of RDB and partition blocks is always big endian. Reported-by: Martin Steigerwald Closes: https://bugzilla.kernel.org/show_bug.cgi?id=43511 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Message-ID: <201206192146.09327.Martin@lichtvoll.de> Cc: # 5.2 Signed-off-by: Michael Schmitz Reviewed-by: Christoph Hellwig Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230620201725.7020-3-schmitzmic@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/affs_hardblocks.h | 68 ++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/affs_hardblocks.h b/include/uapi/linux/affs_hardblocks.h index 5e2fb8481252..a5aff2eb5f70 100644 --- a/include/uapi/linux/affs_hardblocks.h +++ b/include/uapi/linux/affs_hardblocks.h @@ -7,42 +7,42 @@ /* Just the needed definitions for the RDB of an Amiga HD. */ struct RigidDiskBlock { - __u32 rdb_ID; + __be32 rdb_ID; __be32 rdb_SummedLongs; - __s32 rdb_ChkSum; - __u32 rdb_HostID; + __be32 rdb_ChkSum; + __be32 rdb_HostID; __be32 rdb_BlockBytes; - __u32 rdb_Flags; - __u32 rdb_BadBlockList; + __be32 rdb_Flags; + __be32 rdb_BadBlockList; __be32 rdb_PartitionList; - __u32 rdb_FileSysHeaderList; - __u32 rdb_DriveInit; - __u32 rdb_Reserved1[6]; - __u32 rdb_Cylinders; - __u32 rdb_Sectors; - __u32 rdb_Heads; - __u32 rdb_Interleave; - __u32 rdb_Park; - __u32 rdb_Reserved2[3]; - __u32 rdb_WritePreComp; - __u32 rdb_ReducedWrite; - __u32 rdb_StepRate; - __u32 rdb_Reserved3[5]; - __u32 rdb_RDBBlocksLo; - __u32 rdb_RDBBlocksHi; - __u32 rdb_LoCylinder; - __u32 rdb_HiCylinder; - __u32 rdb_CylBlocks; - __u32 rdb_AutoParkSeconds; - __u32 rdb_HighRDSKBlock; - __u32 rdb_Reserved4; + __be32 rdb_FileSysHeaderList; + __be32 rdb_DriveInit; + __be32 rdb_Reserved1[6]; + __be32 rdb_Cylinders; + __be32 rdb_Sectors; + __be32 rdb_Heads; + __be32 rdb_Interleave; + __be32 rdb_Park; + __be32 rdb_Reserved2[3]; + __be32 rdb_WritePreComp; + __be32 rdb_ReducedWrite; + __be32 rdb_StepRate; + __be32 rdb_Reserved3[5]; + __be32 rdb_RDBBlocksLo; + __be32 rdb_RDBBlocksHi; + __be32 rdb_LoCylinder; + __be32 rdb_HiCylinder; + __be32 rdb_CylBlocks; + __be32 rdb_AutoParkSeconds; + __be32 rdb_HighRDSKBlock; + __be32 rdb_Reserved4; char rdb_DiskVendor[8]; char rdb_DiskProduct[16]; char rdb_DiskRevision[4]; char rdb_ControllerVendor[8]; char rdb_ControllerProduct[16]; char rdb_ControllerRevision[4]; - __u32 rdb_Reserved5[10]; + __be32 rdb_Reserved5[10]; }; #define IDNAME_RIGIDDISK 0x5244534B /* "RDSK" */ @@ -50,16 +50,16 @@ struct RigidDiskBlock { struct PartitionBlock { __be32 pb_ID; __be32 pb_SummedLongs; - __s32 pb_ChkSum; - __u32 pb_HostID; + __be32 pb_ChkSum; + __be32 pb_HostID; __be32 pb_Next; - __u32 pb_Flags; - __u32 pb_Reserved1[2]; - __u32 pb_DevFlags; + __be32 pb_Flags; + __be32 pb_Reserved1[2]; + __be32 pb_DevFlags; __u8 pb_DriveName[32]; - __u32 pb_Reserved2[15]; + __be32 pb_Reserved2[15]; __be32 pb_Environment[17]; - __u32 pb_EReserved[15]; + __be32 pb_EReserved[15]; }; #define IDNAME_PARTITION 0x50415254 /* "PART" */ -- cgit