From 03b1230ca12a12e045d83b0357792075bf94a1e0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Dec 2019 18:50:25 -0700 Subject: io_uring: ensure async punted sendmsg/recvmsg requests copy data Just like commit f67676d160c6 for read/write requests, this one ensures that the msghdr data is fully copied if we need to punt a recvmsg or sendmsg system call to async context. Signed-off-by: Jens Axboe --- include/linux/socket.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 4bde63021c09..903507fb901f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -378,12 +378,19 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, bool forbid_cmsg_compat); -extern long __sys_sendmsg_sock(struct socket *sock, - struct user_msghdr __user *msg, +extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg, unsigned int flags); -extern long __sys_recvmsg_sock(struct socket *sock, - struct user_msghdr __user *msg, +extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, + struct user_msghdr __user *umsg, + struct sockaddr __user *uaddr, unsigned int flags); +extern int sendmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct iovec **iov); +extern int recvmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct sockaddr __user **uaddr, + struct iovec **iov); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, -- cgit From f499a021ea8c9f70321fce3d674d8eca5bbeee2c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Dec 2019 16:28:46 -0700 Subject: io_uring: ensure async punted connect requests copy data Just like commit f67676d160c6 for read/write requests, this one ensures that the sockaddr data has been copied for IORING_OP_CONNECT if we need to punt the request to async context. Signed-off-by: Jens Axboe --- include/linux/socket.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 903507fb901f..2d2313403101 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -406,9 +406,8 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); -extern int __sys_connect_file(struct file *file, - struct sockaddr __user *uservaddr, int addrlen, - int file_flags); +extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, + int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); -- cgit From 795ee49c1a28d1b3eeb2b463f18d557700fc6153 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 30 Nov 2019 23:23:52 +0300 Subject: block: optimise bvec_iter_advance() bvec_iter_advance() is quite popular, but compilers fail to do proper alias analysis and optimise it good enough. The assembly is checked for gcc 9.2, x86-64. - remove @iter->bi_size from min(...), as it's always less than @bytes. Modify at the beginning and forget about it. - the compiler isn't able to collapse memory dependencies and remove writes in the loop. Help it by explicitely using local vars. Signed-off-by: Arvind Sankar Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/bvec.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index a032f01e928c..679a42253170 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -87,26 +87,24 @@ struct bvec_iter_all { static inline bool bvec_iter_advance(const struct bio_vec *bv, struct bvec_iter *iter, unsigned bytes) { + unsigned int idx = iter->bi_idx; + if (WARN_ONCE(bytes > iter->bi_size, "Attempted to advance past end of bvec iter\n")) { iter->bi_size = 0; return false; } - while (bytes) { - const struct bio_vec *cur = bv + iter->bi_idx; - unsigned len = min3(bytes, iter->bi_size, - cur->bv_len - iter->bi_bvec_done); - - bytes -= len; - iter->bi_size -= len; - iter->bi_bvec_done += len; + iter->bi_size -= bytes; + bytes += iter->bi_bvec_done; - if (iter->bi_bvec_done == cur->bv_len) { - iter->bi_bvec_done = 0; - iter->bi_idx++; - } + while (bytes && bytes >= bv[idx].bv_len) { + bytes -= bv[idx].bv_len; + idx++; } + + iter->bi_idx = idx; + iter->bi_bvec_done = bytes; return true; } -- cgit From 9b38bb4b1e6de47b379afaad2c707df639bb4dc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:04 +0100 Subject: block: simplify blkdev_nr_zones Simplify the arguments to blkdev_nr_zones by passing a gendisk instead of the block_device and capacity. This also removes the need for __blkdev_nr_zones as all callers are outside the fast path and can deal with the additional branch. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6012e2592628..c5852de402b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,8 +357,7 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); - -extern unsigned int blkdev_nr_zones(struct block_device *bdev); +unsigned int blkdev_nr_zones(struct gendisk *disk); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); @@ -371,7 +370,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blkdev_nr_zones(struct block_device *bdev) +static inline unsigned int blkdev_nr_zones(struct gendisk *disk) { return 0; } -- cgit From f216fdd77b5654f8c4f6fac6020d6aabc58878ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:05 +0100 Subject: block: replace seq_zones_bitmap with conv_zones_bitmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invert the meaning of seq_zones_bitmap by keeping a bitmap of conventional zones. This allows not having a bitmap for devices that do not have conventional zones. Reviewed-by: Javier González Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c5852de402b6..503c4d4c5884 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -503,9 +503,9 @@ struct request_queue { /* * Zoned block device information for request dispatch control. * nr_zones is the total number of zones of the device. This is always - * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones - * bits which indicates if a zone is conventional (bit clear) or - * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones + * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones + * bits which indicates if a zone is conventional (bit set) or + * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones * bits which indicates if a zone is write locked, that is, if a write * request targeting the zone was dispatched. All three fields are * initialized by the low level device driver (e.g. scsi/sd.c). @@ -518,7 +518,7 @@ struct request_queue { * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; - unsigned long *seq_zones_bitmap; + unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -723,9 +723,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, static inline bool blk_queue_zone_is_seq(struct request_queue *q, sector_t sector) { - if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap) + if (!blk_queue_is_zoned(q)) return false; - return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); + if (!q->conv_zones_bitmap) + return true; + return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) -- cgit From ae58954d8734c44298f55ed71e683ea944994fab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:07 +0100 Subject: block: don't handle bio based drivers in blk_revalidate_disk_zones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bio based drivers only need to update q->nr_zones. Do that manually instead of overloading blk_revalidate_disk_zones to keep that function simpler for the next round of changes that will rely even more on the request based functionality. Reviewed-by: Javier González Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 503c4d4c5884..47eb22a3b7f9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -375,11 +375,6 @@ static inline unsigned int blkdev_nr_zones(struct gendisk *disk) return 0; } -static inline int blk_revalidate_disk_zones(struct gendisk *disk) -{ - return 0; -} - static inline int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) -- cgit