diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 10 | ||||
-rw-r--r-- | drivers/block/Makefile | 4 | ||||
-rw-r--r-- | drivers/block/amiflop.c | 10 | ||||
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 4 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 2 | ||||
-rw-r--r-- | drivers/block/aoe/aoemain.c | 2 | ||||
-rw-r--r-- | drivers/block/brd.c | 75 | ||||
-rw-r--r-- | drivers/block/floppy.c | 59 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 6 | ||||
-rw-r--r-- | drivers/block/nbd.c | 10 | ||||
-rw-r--r-- | drivers/block/null_blk/main.c | 2 | ||||
-rw-r--r-- | drivers/block/rbd.c | 2 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-clt.c | 6 | ||||
-rw-r--r-- | drivers/block/rnull.rs | 80 | ||||
-rw-r--r-- | drivers/block/rnull/Kconfig | 13 | ||||
-rw-r--r-- | drivers/block/rnull/Makefile | 3 | ||||
-rw-r--r-- | drivers/block/rnull/configfs.rs | 262 | ||||
-rw-r--r-- | drivers/block/rnull/rnull.rs | 104 | ||||
-rw-r--r-- | drivers/block/sunvdc.c | 7 | ||||
-rw-r--r-- | drivers/block/swim.c | 4 | ||||
-rw-r--r-- | drivers/block/ublk_drv.c | 242 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 8 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 4 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 25 |
24 files changed, 629 insertions, 315 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index df38fb364904..77d694448990 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -17,6 +17,7 @@ menuconfig BLK_DEV if BLK_DEV source "drivers/block/null_blk/Kconfig" +source "drivers/block/rnull/Kconfig" config BLK_DEV_FD tristate "Normal floppy disk support" @@ -311,15 +312,6 @@ config VIRTIO_BLK This is the virtual block driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. -config BLK_DEV_RUST_NULL - tristate "Rust null block driver (Experimental)" - depends on RUST - help - This is the Rust implementation of the null block driver. For now it - is only a minimal stub. - - If unsure, say N. - config BLK_DEV_RBD tristate "Rados block device (RBD)" depends on INET && BLOCK diff --git a/drivers/block/Makefile b/drivers/block/Makefile index a695ce74ef22..2d8096eb8cdf 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -9,9 +9,6 @@ # needed for trace events ccflags-y += -I$(src) -obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o -rnull_mod-y := rnull.o - obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o @@ -38,6 +35,7 @@ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/ +obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull/ obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o obj-$(CONFIG_BLK_DEV_ZONED_LOOP) += zloop.o diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 6357d86eafdc..2932b6653b6f 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1523,13 +1523,13 @@ static blk_status_t amiflop_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } -static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int fd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - int drive = MINOR(bdev->bd_dev) & 3; + struct amiga_floppy_struct *p = disk->private_data; - geo->heads = unit[drive].type->heads; - geo->sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult; - geo->cylinders = unit[drive].type->tracks; + geo->heads = p->type->heads; + geo->sectors = p->dtype->sects * p->type->sect_mult; + geo->cylinders = p->type->tracks; return 0; } diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 00b74a845328..34ead75e7e02 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -269,9 +269,9 @@ static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx, } static int -aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) +aoeblk_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct aoedev *d = bdev->bd_disk->private_data; + struct aoedev *d = disk->private_data; if ((d->flags & DEVFL_UP) == 0) { printk(KERN_ERR "aoe: disk not up\n"); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 6298f8e271e3..a9affb7c264d 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1761,6 +1761,6 @@ aoecmd_exit(void) kfree(kts); kfree(ktiowq); - free_page((unsigned long) page_address(empty_page)); + __free_page(empty_page); empty_page = NULL; } diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c index cdf6e4041bb9..3b21750038ee 100644 --- a/drivers/block/aoe/aoemain.c +++ b/drivers/block/aoe/aoemain.c @@ -44,7 +44,7 @@ aoe_init(void) { int ret; - aoe_wq = alloc_workqueue("aoe_wq", 0, 0); + aoe_wq = alloc_workqueue("aoe_wq", WQ_PERCPU, 0); if (!aoe_wq) return -ENOMEM; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 0c2eabe14af3..9778259b30d4 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -44,45 +44,74 @@ struct brd_device { }; /* - * Look up and return a brd's page for a given sector. + * Look up and return a brd's page with reference grabbed for a given sector. */ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) { - return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); + struct page *page; + XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); + + rcu_read_lock(); +repeat: + page = xas_load(&xas); + if (xas_retry(&xas, page)) { + xas_reset(&xas); + goto repeat; + } + + if (!page) + goto out; + + if (!get_page_unless_zero(page)) { + xas_reset(&xas); + goto repeat; + } + + if (unlikely(page != xas_reload(&xas))) { + put_page(page); + xas_reset(&xas); + goto repeat; + } +out: + rcu_read_unlock(); + + return page; } /* * Insert a new page for a given sector, if one does not already exist. + * The returned page will grab reference. */ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, blk_opf_t opf) - __releases(rcu) - __acquires(rcu) { gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO; struct page *page, *ret; - rcu_read_unlock(); page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); - if (!page) { - rcu_read_lock(); + if (!page) return ERR_PTR(-ENOMEM); - } xa_lock(&brd->brd_pages); ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, page, gfp); - rcu_read_lock(); - if (ret) { + if (!ret) { + brd->brd_nr_pages++; + get_page(page); + xa_unlock(&brd->brd_pages); + return page; + } + + if (!xa_is_err(ret)) { + get_page(ret); xa_unlock(&brd->brd_pages); - __free_page(page); - if (xa_is_err(ret)) - return ERR_PTR(xa_err(ret)); + put_page(page); return ret; } - brd->brd_nr_pages++; + xa_unlock(&brd->brd_pages); - return page; + put_page(page); + return ERR_PTR(xa_err(ret)); } /* @@ -95,7 +124,7 @@ static void brd_free_pages(struct brd_device *brd) pgoff_t idx; xa_for_each(&brd->brd_pages, idx, page) { - __free_page(page); + put_page(page); cond_resched(); } @@ -117,7 +146,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); - rcu_read_lock(); page = brd_lookup_page(brd, sector); if (!page && op_is_write(opf)) { page = brd_insert_page(brd, sector, opf); @@ -135,13 +163,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) memset(kaddr, 0, bv.bv_len); } kunmap_local(kaddr); - rcu_read_unlock(); bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len); + if (page) + put_page(page); return true; out_error: - rcu_read_unlock(); if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT)) bio_wouldblock_error(bio); else @@ -149,13 +177,6 @@ out_error: return false; } -static void brd_free_one_page(struct rcu_head *head) -{ - struct page *page = container_of(head, struct page, rcu_head); - - __free_page(page); -} - static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) { sector_t aligned_sector = round_up(sector, PAGE_SECTORS); @@ -170,7 +191,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) { page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); if (page) { - call_rcu(&page->rcu_head, brd_free_one_page); + put_page(page); brd->brd_nr_pages--; } aligned_sector += PAGE_SECTORS; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 24be0c2c4075..5336c3c5ca36 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -163,35 +163,35 @@ /* do print messages for unexpected interrupts */ static int print_unex = 1; -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/workqueue.h> -#include <linux/fdreg.h> -#include <linux/fd.h> -#include <linux/hdreg.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/mm.h> +#include <linux/async.h> #include <linux/bio.h> -#include <linux/string.h> -#include <linux/jiffies.h> -#include <linux/fcntl.h> +#include <linux/compat.h> #include <linux/delay.h> -#include <linux/mc146818rtc.h> /* CMOS defines */ -#include <linux/ioport.h> -#include <linux/interrupt.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/fd.h> +#include <linux/fdreg.h> +#include <linux/fs.h> +#include <linux/hdreg.h> #include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> #include <linux/major.h> -#include <linux/platform_device.h> +#include <linux/mc146818rtc.h> /* CMOS defines */ +#include <linux/mm.h> #include <linux/mod_devicetable.h> +#include <linux/module.h> #include <linux/mutex.h> -#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/timer.h> #include <linux/uaccess.h> -#include <linux/async.h> -#include <linux/compat.h> +#include <linux/workqueue.h> /* * PS/2 floppies have much slower step rates than regular floppies. @@ -233,8 +233,6 @@ static unsigned short virtual_dma_port = 0x3f0; irqreturn_t floppy_interrupt(int irq, void *dev_id); static int set_dor(int fdc, char mask, char data); -#define K_64 0x10000 /* 64KB */ - /* the following is the mask of allowed drives. By default units 2 and * 3 of both floppy controllers are disabled, because switching on the * motor of these drives causes system hangs on some PCI computers. drive @@ -3092,16 +3090,13 @@ static int raw_cmd_copyin(int cmd, void __user *param, *rcmd = NULL; loop: - ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL); - if (!ptr) - return -ENOMEM; + ptr = memdup_user(param, sizeof(*ptr)); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); *rcmd = ptr; - ret = copy_from_user(ptr, param, sizeof(*ptr)); ptr->next = NULL; ptr->buffer_length = 0; ptr->kernel_data = NULL; - if (ret) - return -EFAULT; param += sizeof(struct floppy_raw_cmd); if (ptr->cmd_count > FD_RAW_CMD_FULLSIZE) return -EINVAL; @@ -3363,9 +3358,9 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g) return 0; } -static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int fd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - int drive = (long)bdev->bd_disk->private_data; + int drive = (long)disk->private_data; int type = ITYPE(drive_state[drive].fd_device); struct floppy_struct *g; int ret; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 8fc7761397bd..567192e371a8 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3148,17 +3148,17 @@ static int mtip_block_compat_ioctl(struct block_device *dev, * that each partition is also 4KB aligned. Non-aligned partitions adversely * affects performance. * - * @dev Pointer to the block_device strucutre. + * @disk Pointer to the gendisk strucutre. * @geo Pointer to a hd_geometry structure. * * return value * 0 Operation completed successfully. * -ENOTTY An error occurred while reading the drive capacity. */ -static int mtip_block_getgeo(struct block_device *dev, +static int mtip_block_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct driver_data *dd = dev->bd_disk->private_data; + struct driver_data *dd = disk->private_data; sector_t capacity; if (!dd) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6463d0e8d0ce..1188f32a5e5e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -311,7 +311,7 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, if (args) { INIT_WORK(&args->work, nbd_dead_link_work); args->index = nbd->index; - queue_work(system_wq, &args->work); + queue_work(system_percpu_wq, &args->work); } } if (!nsock->dead) { @@ -1217,6 +1217,14 @@ static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd, if (!sock) return NULL; + if (!sk_is_tcp(sock->sk) && + !sk_is_stream_unix(sock->sk)) { + dev_err(disk_to_dev(nbd->disk), "Unsupported socket: should be TCP or UNIX.\n"); + *err = -EINVAL; + sockfd_put(sock); + return NULL; + } + if (sock->ops->shutdown == sock_no_shutdown) { dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n"); *err = -EINVAL; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 91642c9a3b29..f982027e8c85 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -223,7 +223,7 @@ MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed nu static unsigned long g_cache_size; module_param_named(cache_size, g_cache_size, ulong, 0444); -MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)"); +MODULE_PARM_DESC(cache_size, "Cache size in MiB for memory-backed device. Default: 0 (none)"); static bool g_fua = true; module_param_named(fua, g_fua, bool, 0444); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index faafd7ff43d6..af0e21149dbc 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7389,7 +7389,7 @@ static int __init rbd_init(void) * The number of active work items is limited by the number of * rbd devices * queue depth, so leave @max_active at default. */ - rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0); + rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!rbd_wq) { rc = -ENOMEM; goto err_out_slab; diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 15627417f12e..f1409e54010a 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -942,11 +942,11 @@ static void rnbd_client_release(struct gendisk *gen) rnbd_clt_put_dev(dev); } -static int rnbd_client_getgeo(struct block_device *block_device, +static int rnbd_client_getgeo(struct gendisk *disk, struct hd_geometry *geo) { u64 size; - struct rnbd_clt_dev *dev = block_device->bd_disk->private_data; + struct rnbd_clt_dev *dev = disk->private_data; struct queue_limits *limit = &dev->queue->limits; size = dev->size * (limit->logical_block_size / SECTOR_SIZE); @@ -1809,7 +1809,7 @@ static int __init rnbd_client_init(void) unregister_blkdev(rnbd_client_major, "rnbd"); return err; } - rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0); + rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", WQ_PERCPU, 0); if (!rnbd_clt_wq) { pr_err("Failed to load module, alloc_workqueue failed.\n"); rnbd_clt_destroy_sysfs_files(); diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs deleted file mode 100644 index d07e76ae2c13..000000000000 --- a/drivers/block/rnull.rs +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -//! This is a Rust implementation of the C null block driver. -//! -//! Supported features: -//! -//! - blk-mq interface -//! - direct completion -//! - block size 4k -//! -//! The driver is not configurable. - -use kernel::{ - alloc::flags, - block::mq::{ - self, - gen_disk::{self, GenDisk}, - Operations, TagSet, - }, - error::Result, - new_mutex, pr_info, - prelude::*, - sync::{Arc, Mutex}, - types::ARef, -}; - -module! { - type: NullBlkModule, - name: "rnull_mod", - authors: ["Andreas Hindborg"], - description: "Rust implementation of the C null block driver", - license: "GPL v2", -} - -#[pin_data] -struct NullBlkModule { - #[pin] - _disk: Mutex<GenDisk<NullBlkDevice>>, -} - -impl kernel::InPlaceModule for NullBlkModule { - fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> { - pr_info!("Rust null_blk loaded\n"); - - // Use a immediately-called closure as a stable `try` block - let disk = /* try */ (|| { - let tagset = Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?; - - gen_disk::GenDiskBuilder::new() - .capacity_sectors(4096 << 11) - .logical_block_size(4096)? - .physical_block_size(4096)? - .rotational(false) - .build(format_args!("rnullb{}", 0), tagset) - })(); - - try_pin_init!(Self { - _disk <- new_mutex!(disk?, "nullb:disk"), - }) - } -} - -struct NullBlkDevice; - -#[vtable] -impl Operations for NullBlkDevice { - #[inline(always)] - fn queue_rq(rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result { - mq::Request::end_ok(rq) - .map_err(|_e| kernel::error::code::EIO) - // We take no refcounts on the request, so we expect to be able to - // end the request. The request reference must be unique at this - // point, and so `end_ok` cannot fail. - .expect("Fatal error - expected to be able to end request"); - - Ok(()) - } - - fn commit_rqs() {} -} diff --git a/drivers/block/rnull/Kconfig b/drivers/block/rnull/Kconfig new file mode 100644 index 000000000000..7bc5b376c128 --- /dev/null +++ b/drivers/block/rnull/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Rust null block device driver configuration + +config BLK_DEV_RUST_NULL + tristate "Rust null block driver (Experimental)" + depends on RUST && CONFIGFS_FS + help + This is the Rust implementation of the null block driver. Like + the C version, the driver allows the user to create virutal block + devices that can be configured via various configuration options. + + If unsure, say N. diff --git a/drivers/block/rnull/Makefile b/drivers/block/rnull/Makefile new file mode 100644 index 000000000000..11cfa5e615dc --- /dev/null +++ b/drivers/block/rnull/Makefile @@ -0,0 +1,3 @@ + +obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o +rnull_mod-y := rnull.o diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs new file mode 100644 index 000000000000..8498e9bae6fd --- /dev/null +++ b/drivers/block/rnull/configfs.rs @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 + +use super::{NullBlkDevice, THIS_MODULE}; +use core::fmt::{Display, Write}; +use kernel::{ + block::mq::gen_disk::{GenDisk, GenDiskBuilder}, + c_str, + configfs::{self, AttributeOperations}, + configfs_attrs, new_mutex, + page::PAGE_SIZE, + prelude::*, + str::{kstrtobool_bytes, CString}, + sync::Mutex, +}; +use pin_init::PinInit; + +pub(crate) fn subsystem() -> impl PinInit<kernel::configfs::Subsystem<Config>, Error> { + let item_type = configfs_attrs! { + container: configfs::Subsystem<Config>, + data: Config, + child: DeviceConfig, + attributes: [ + features: 0, + ], + }; + + kernel::configfs::Subsystem::new(c_str!("rnull"), item_type, try_pin_init!(Config {})) +} + +#[pin_data] +pub(crate) struct Config {} + +#[vtable] +impl AttributeOperations<0> for Config { + type Data = Config; + + fn show(_this: &Config, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + writer.write_str("blocksize,size,rotational,irqmode\n")?; + Ok(writer.bytes_written()) + } +} + +#[vtable] +impl configfs::GroupOperations for Config { + type Child = DeviceConfig; + + fn make_group( + &self, + name: &CStr, + ) -> Result<impl PinInit<configfs::Group<DeviceConfig>, Error>> { + let item_type = configfs_attrs! { + container: configfs::Group<DeviceConfig>, + data: DeviceConfig, + attributes: [ + // Named for compatibility with C null_blk + power: 0, + blocksize: 1, + rotational: 2, + size: 3, + irqmode: 4, + ], + }; + + Ok(configfs::Group::new( + name.try_into()?, + item_type, + // TODO: cannot coerce new_mutex!() to impl PinInit<_, Error>, so put mutex inside + try_pin_init!( DeviceConfig { + data <- new_mutex!(DeviceConfigInner { + powered: false, + block_size: 4096, + rotational: false, + disk: None, + capacity_mib: 4096, + irq_mode: IRQMode::None, + name: name.try_into()?, + }), + }), + )) + } +} + +#[derive(Debug, Clone, Copy)] +pub(crate) enum IRQMode { + None, + Soft, +} + +impl TryFrom<u8> for IRQMode { + type Error = kernel::error::Error; + + fn try_from(value: u8) -> Result<Self> { + match value { + 0 => Ok(Self::None), + 1 => Ok(Self::Soft), + _ => Err(EINVAL), + } + } +} + +impl Display for IRQMode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::None => f.write_str("0")?, + Self::Soft => f.write_str("1")?, + } + Ok(()) + } +} + +#[pin_data] +pub(crate) struct DeviceConfig { + #[pin] + data: Mutex<DeviceConfigInner>, +} + +#[pin_data] +struct DeviceConfigInner { + powered: bool, + name: CString, + block_size: u32, + rotational: bool, + capacity_mib: u64, + irq_mode: IRQMode, + disk: Option<GenDisk<NullBlkDevice>>, +} + +#[vtable] +impl configfs::AttributeOperations<0> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + + if this.data.lock().powered { + writer.write_str("1\n")?; + } else { + writer.write_str("0\n")?; + } + + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + let power_op = kstrtobool_bytes(page)?; + let mut guard = this.data.lock(); + + if !guard.powered && power_op { + guard.disk = Some(NullBlkDevice::new( + &guard.name, + guard.block_size, + guard.rotational, + guard.capacity_mib, + guard.irq_mode, + )?); + guard.powered = true; + } else if guard.powered && !power_op { + drop(guard.disk.take()); + guard.powered = false; + } + + Ok(()) + } +} + +#[vtable] +impl configfs::AttributeOperations<1> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + writer.write_fmt(fmt!("{}\n", this.data.lock().block_size))?; + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + if this.data.lock().powered { + return Err(EBUSY); + } + + let text = core::str::from_utf8(page)?.trim(); + let value = text.parse::<u32>().map_err(|_| EINVAL)?; + + GenDiskBuilder::validate_block_size(value)?; + this.data.lock().block_size = value; + Ok(()) + } +} + +#[vtable] +impl configfs::AttributeOperations<2> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + + if this.data.lock().rotational { + writer.write_str("1\n")?; + } else { + writer.write_str("0\n")?; + } + + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + if this.data.lock().powered { + return Err(EBUSY); + } + + this.data.lock().rotational = kstrtobool_bytes(page)?; + + Ok(()) + } +} + +#[vtable] +impl configfs::AttributeOperations<3> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + writer.write_fmt(fmt!("{}\n", this.data.lock().capacity_mib))?; + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + if this.data.lock().powered { + return Err(EBUSY); + } + + let text = core::str::from_utf8(page)?.trim(); + let value = text.parse::<u64>().map_err(|_| EINVAL)?; + + this.data.lock().capacity_mib = value; + Ok(()) + } +} + +#[vtable] +impl configfs::AttributeOperations<4> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + writer.write_fmt(fmt!("{}\n", this.data.lock().irq_mode))?; + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + if this.data.lock().powered { + return Err(EBUSY); + } + + let text = core::str::from_utf8(page)?.trim(); + let value = text.parse::<u8>().map_err(|_| EINVAL)?; + + this.data.lock().irq_mode = IRQMode::try_from(value)?; + Ok(()) + } +} diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs new file mode 100644 index 000000000000..1ec694d7f1a6 --- /dev/null +++ b/drivers/block/rnull/rnull.rs @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This is a Rust implementation of the C null block driver. + +mod configfs; + +use configfs::IRQMode; +use kernel::{ + block::{ + self, + mq::{ + self, + gen_disk::{self, GenDisk}, + Operations, TagSet, + }, + }, + error::Result, + pr_info, + prelude::*, + sync::Arc, + types::ARef, +}; +use pin_init::PinInit; + +module! { + type: NullBlkModule, + name: "rnull_mod", + authors: ["Andreas Hindborg"], + description: "Rust implementation of the C null block driver", + license: "GPL v2", +} + +#[pin_data] +struct NullBlkModule { + #[pin] + configfs_subsystem: kernel::configfs::Subsystem<configfs::Config>, +} + +impl kernel::InPlaceModule for NullBlkModule { + fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> { + pr_info!("Rust null_blk loaded\n"); + + try_pin_init!(Self { + configfs_subsystem <- configfs::subsystem(), + }) + } +} + +struct NullBlkDevice; + +impl NullBlkDevice { + fn new( + name: &CStr, + block_size: u32, + rotational: bool, + capacity_mib: u64, + irq_mode: IRQMode, + ) -> Result<GenDisk<Self>> { + let tagset = Arc::pin_init(TagSet::new(1, 256, 1), GFP_KERNEL)?; + + let queue_data = Box::new(QueueData { irq_mode }, GFP_KERNEL)?; + + gen_disk::GenDiskBuilder::new() + .capacity_sectors(capacity_mib << (20 - block::SECTOR_SHIFT)) + .logical_block_size(block_size)? + .physical_block_size(block_size)? + .rotational(rotational) + .build(fmt!("{}", name.to_str()?), tagset, queue_data) + } +} + +struct QueueData { + irq_mode: IRQMode, +} + +#[vtable] +impl Operations for NullBlkDevice { + type QueueData = KBox<QueueData>; + + #[inline(always)] + fn queue_rq(queue_data: &QueueData, rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result { + match queue_data.irq_mode { + IRQMode::None => mq::Request::end_ok(rq) + .map_err(|_e| kernel::error::code::EIO) + // We take no refcounts on the request, so we expect to be able to + // end the request. The request reference must be unique at this + // point, and so `end_ok` cannot fail. + .expect("Fatal error - expected to be able to end request"), + IRQMode::Soft => mq::Request::complete(rq), + } + Ok(()) + } + + fn commit_rqs(_queue_data: &QueueData) {} + + fn complete(rq: ARef<mq::Request<Self>>) { + mq::Request::end_ok(rq) + .map_err(|_e| kernel::error::code::EIO) + // We take no refcounts on the request, so we expect to be able to + // end the request. The request reference must be unique at this + // point, and so `end_ok` cannot fail. + .expect("Fatal error - expected to be able to end request"); + } +} diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 7af21fe67671..db1fe9772a4d 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -119,9 +119,8 @@ static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr) return vio_dring_avail(dr, VDC_TX_RING_SIZE); } -static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int vdc_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct gendisk *disk = bdev->bd_disk; sector_t nsect = get_capacity(disk); sector_t cylinders = nsect; @@ -1189,7 +1188,7 @@ static void vdc_ldc_reset(struct vdc_port *port) } if (port->ldc_timeout) - mod_delayed_work(system_wq, &port->ldc_reset_timer_work, + mod_delayed_work(system_percpu_wq, &port->ldc_reset_timer_work, round_jiffies(jiffies + HZ * port->ldc_timeout)); mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ)); return; @@ -1217,7 +1216,7 @@ static int __init vdc_init(void) { int err; - sunvdc_wq = alloc_workqueue("sunvdc", 0, 0); + sunvdc_wq = alloc_workqueue("sunvdc", WQ_PERCPU, 0); if (!sunvdc_wq) return -ENOMEM; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index eda33c5eb5e2..416015947ae6 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -711,9 +711,9 @@ static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode, return -ENOTTY; } -static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int floppy_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct floppy_state *fs = bdev->bd_disk->private_data; + struct floppy_state *fs = disk->private_data; struct floppy_struct *g; int ret; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 67d4a867aec4..0c74a41a6753 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -201,7 +201,6 @@ struct ublk_queue { bool force_abort; bool canceling; bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */ - unsigned short nr_io_ready; /* how many ios setup */ spinlock_t cancel_lock; struct ublk_device *dev; struct ublk_io ios[]; @@ -234,7 +233,7 @@ struct ublk_device { struct ublk_params params; struct completion completion; - unsigned int nr_queues_ready; + u32 nr_io_ready; bool unprivileged_daemons; struct mutex cancel_mutex; bool canceling; @@ -252,8 +251,7 @@ static void ublk_io_release(void *priv); static void ublk_stop_dev_unlocked(struct ublk_device *ub); static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - const struct ublk_queue *ubq, struct ublk_io *io, - size_t offset); + u16 q_id, u16 tag, struct ublk_io *io, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc * @@ -532,7 +530,8 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, #endif -static inline void __ublk_complete_rq(struct request *req); +static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io, + bool need_map); static dev_t ublk_chr_devt; static const struct class ublk_chr_class = { @@ -664,22 +663,44 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; } +static inline bool ublk_dev_support_zero_copy(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY; +} + static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq) { return ubq->flags & UBLK_F_AUTO_BUF_REG; } +static inline bool ublk_dev_support_auto_buf_reg(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_AUTO_BUF_REG; +} + static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) { return ubq->flags & UBLK_F_USER_COPY; } +static inline bool ublk_dev_support_user_copy(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_USER_COPY; +} + static inline bool ublk_need_map_io(const struct ublk_queue *ubq) { return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) && !ublk_support_auto_buf_reg(ubq); } +static inline bool ublk_dev_need_map_io(const struct ublk_device *ub) +{ + return !ublk_dev_support_user_copy(ub) && + !ublk_dev_support_zero_copy(ub) && + !ublk_dev_support_auto_buf_reg(ub); +} + static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) { /* @@ -697,6 +718,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) ublk_support_auto_buf_reg(ubq); } +static inline bool ublk_dev_need_req_ref(const struct ublk_device *ub) +{ + return ublk_dev_support_user_copy(ub) || + ublk_dev_support_zero_copy(ub) || + ublk_dev_support_auto_buf_reg(ub); +} + static inline void ublk_init_req_ref(const struct ublk_queue *ubq, struct ublk_io *io) { @@ -711,8 +739,11 @@ static inline bool ublk_get_req_ref(struct ublk_io *io) static inline void ublk_put_req_ref(struct ublk_io *io, struct request *req) { - if (refcount_dec_and_test(&io->ref)) - __ublk_complete_rq(req); + if (!refcount_dec_and_test(&io->ref)) + return; + + /* ublk_need_map_io() and ublk_need_req_ref() are mutually exclusive */ + __ublk_complete_rq(req, io, false); } static inline bool ublk_sub_req_ref(struct ublk_io *io) @@ -728,6 +759,11 @@ static inline bool ublk_need_get_data(const struct ublk_queue *ubq) return ubq->flags & UBLK_F_NEED_GET_DATA; } +static inline bool ublk_dev_need_get_data(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_NEED_GET_DATA; +} + /* Called in slow path only, keep it noinline for trace purpose */ static noinline struct ublk_device *ublk_get_device(struct ublk_device *ub) { @@ -764,11 +800,9 @@ static inline int __ublk_queue_cmd_buf_size(int depth) return round_up(depth * sizeof(struct ublksrv_io_desc), PAGE_SIZE); } -static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) +static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub) { - struct ublk_queue *ubq = ublk_get_queue(ub, q_id); - - return __ublk_queue_cmd_buf_size(ubq->q_depth); + return __ublk_queue_cmd_buf_size(ub->dev_info.queue_depth); } static int ublk_max_cmd_buf_size(void) @@ -1019,13 +1053,13 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, return rq_bytes; } -static int ublk_unmap_io(const struct ublk_queue *ubq, +static int ublk_unmap_io(bool need_map, const struct request *req, const struct ublk_io *io) { const unsigned int rq_bytes = blk_rq_bytes(req); - if (!ublk_need_map_io(ubq)) + if (!need_map) return rq_bytes; if (ublk_need_unmap_req(req)) { @@ -1072,13 +1106,8 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req) { struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); struct ublk_io *io = &ubq->ios[req->tag]; - enum req_op op = req_op(req); u32 ublk_op; - if (!ublk_queue_is_zoned(ubq) && - (op_is_zone_mgmt(op) || op == REQ_OP_ZONE_APPEND)) - return BLK_STS_IOERR; - switch (req_op(req)) { case REQ_OP_READ: ublk_op = UBLK_IO_OP_READ; @@ -1117,10 +1146,9 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( } /* todo: handle partial completion */ -static inline void __ublk_complete_rq(struct request *req) +static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io, + bool need_map) { - struct ublk_queue *ubq = req->mq_hctx->driver_data; - struct ublk_io *io = &ubq->ios[req->tag]; unsigned int unmapped_bytes; blk_status_t res = BLK_STS_OK; @@ -1144,7 +1172,7 @@ static inline void __ublk_complete_rq(struct request *req) goto exit; /* for READ request, writing data in iod->addr to rq buffers */ - unmapped_bytes = ublk_unmap_io(ubq, req, io); + unmapped_bytes = ublk_unmap_io(need_map, req, io); /* * Extremely impossible since we got data filled in just before @@ -1189,7 +1217,7 @@ static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, struct io_uring_cmd *cmd = __ublk_prep_compl_io_cmd(io, req); /* tell ublksrv one io request is coming */ - io_uring_cmd_done(cmd, res, 0, issue_flags); + io_uring_cmd_done(cmd, res, issue_flags); } #define UBLK_REQUEUE_DELAY_MS 3 @@ -1500,9 +1528,6 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) { int i; - /* All old ioucmds have to be completed */ - ubq->nr_io_ready = 0; - for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; @@ -1551,7 +1576,7 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */ ub->mm = NULL; - ub->nr_queues_ready = 0; + ub->nr_io_ready = 0; ub->unprivileged_daemons = false; ub->ublksrv_tgid = -1; } @@ -1775,23 +1800,23 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) __func__, q_id, current->pid, vma->vm_start, phys_off, (unsigned long)sz); - if (sz != ublk_queue_cmd_buf_size(ub, q_id)) + if (sz != ublk_queue_cmd_buf_size(ub)) return -EINVAL; pfn = virt_to_phys(ublk_queue_cmd_buf(ub, q_id)) >> PAGE_SHIFT; return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); } -static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, +static void __ublk_fail_req(struct ublk_device *ub, struct ublk_io *io, struct request *req) { WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); - if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) + if (ublk_nosrv_should_reissue_outstanding(ub)) blk_mq_requeue_request(req, false); else { io->res = -EIO; - __ublk_complete_rq(req); + __ublk_complete_rq(req, io, ublk_dev_need_map_io(ub)); } } @@ -1811,7 +1836,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) struct ublk_io *io = &ubq->ios[i]; if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) - __ublk_fail_req(ubq, io, io->req); + __ublk_fail_req(ub, io, io->req); } } @@ -1873,7 +1898,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, spin_unlock(&ubq->cancel_lock); if (!done) - io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, issue_flags); + io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, issue_flags); } /* @@ -1916,9 +1941,11 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, ublk_cancel_cmd(ubq, pdu->tag, issue_flags); } -static inline bool ublk_queue_ready(struct ublk_queue *ubq) +static inline bool ublk_dev_ready(const struct ublk_device *ub) { - return ubq->nr_io_ready == ubq->q_depth; + u32 total = (u32)ub->dev_info.nr_hw_queues * ub->dev_info.queue_depth; + + return ub->nr_io_ready == total; } static void ublk_cancel_queue(struct ublk_queue *ubq) @@ -2042,16 +2069,14 @@ static void ublk_reset_io_flags(struct ublk_device *ub) } /* device can only be started after all IOs are ready */ -static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) +static void ublk_mark_io_ready(struct ublk_device *ub) __must_hold(&ub->mutex) { - ubq->nr_io_ready++; - if (ublk_queue_ready(ubq)) - ub->nr_queues_ready++; if (!ub->unprivileged_daemons && !capable(CAP_SYS_ADMIN)) ub->unprivileged_daemons = true; - if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) { + ub->nr_io_ready++; + if (ublk_dev_ready(ub)) { /* now we are ready for handling ublk io request */ ublk_reset_io_flags(ub); complete_all(&ub->completion); @@ -2122,11 +2147,11 @@ ublk_fill_io_cmd(struct ublk_io *io, struct io_uring_cmd *cmd) } static inline int -ublk_config_io_buf(const struct ublk_queue *ubq, struct ublk_io *io, +ublk_config_io_buf(const struct ublk_device *ub, struct ublk_io *io, struct io_uring_cmd *cmd, unsigned long buf_addr, u16 *buf_idx) { - if (ublk_support_auto_buf_reg(ubq)) + if (ublk_dev_support_auto_buf_reg(ub)) return ublk_handle_auto_buf_reg(io, cmd, buf_idx); io->addr = buf_addr; @@ -2165,18 +2190,18 @@ static void ublk_io_release(void *priv) } static int ublk_register_io_buf(struct io_uring_cmd *cmd, - const struct ublk_queue *ubq, + struct ublk_device *ub, + u16 q_id, u16 tag, struct ublk_io *io, unsigned int index, unsigned int issue_flags) { - struct ublk_device *ub = cmd->file->private_data; struct request *req; int ret; - if (!ublk_support_zero_copy(ubq)) + if (!ublk_dev_support_zero_copy(ub)) return -EINVAL; - req = __ublk_check_and_get_req(ub, ubq, io, 0); + req = __ublk_check_and_get_req(ub, q_id, tag, io, 0); if (!req) return -EINVAL; @@ -2192,7 +2217,8 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, static int ublk_daemon_register_io_buf(struct io_uring_cmd *cmd, - const struct ublk_queue *ubq, struct ublk_io *io, + struct ublk_device *ub, + u16 q_id, u16 tag, struct ublk_io *io, unsigned index, unsigned issue_flags) { unsigned new_registered_buffers; @@ -2205,9 +2231,10 @@ ublk_daemon_register_io_buf(struct io_uring_cmd *cmd, */ new_registered_buffers = io->task_registered_buffers + 1; if (unlikely(new_registered_buffers >= UBLK_REFCOUNT_INIT)) - return ublk_register_io_buf(cmd, ubq, io, index, issue_flags); + return ublk_register_io_buf(cmd, ub, q_id, tag, io, index, + issue_flags); - if (!ublk_support_zero_copy(ubq) || !ublk_rq_has_data(req)) + if (!ublk_dev_support_zero_copy(ub) || !ublk_rq_has_data(req)) return -EINVAL; ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index, @@ -2229,14 +2256,14 @@ static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, return io_buffer_unregister_bvec(cmd, index, issue_flags); } -static int ublk_check_fetch_buf(const struct ublk_queue *ubq, __u64 buf_addr) +static int ublk_check_fetch_buf(const struct ublk_device *ub, __u64 buf_addr) { - if (ublk_need_map_io(ubq)) { + if (ublk_dev_need_map_io(ub)) { /* * FETCH_RQ has to provide IO buffer if NEED GET * DATA is not enabled */ - if (!buf_addr && !ublk_need_get_data(ubq)) + if (!buf_addr && !ublk_dev_need_get_data(ub)) return -EINVAL; } else if (buf_addr) { /* User copy requires addr to be unset */ @@ -2245,10 +2272,9 @@ static int ublk_check_fetch_buf(const struct ublk_queue *ubq, __u64 buf_addr) return 0; } -static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, +static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_device *ub, struct ublk_io *io, __u64 buf_addr) { - struct ublk_device *ub = ubq->dev; int ret = 0; /* @@ -2257,8 +2283,8 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, * FETCH, so it is fine even for IO_URING_F_NONBLOCK. */ mutex_lock(&ub->mutex); - /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ - if (ublk_queue_ready(ubq)) { + /* UBLK_IO_FETCH_REQ is only allowed before dev is setup */ + if (ublk_dev_ready(ub)) { ret = -EBUSY; goto out; } @@ -2272,28 +2298,28 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV); ublk_fill_io_cmd(io, cmd); - ret = ublk_config_io_buf(ubq, io, cmd, buf_addr, NULL); + ret = ublk_config_io_buf(ub, io, cmd, buf_addr, NULL); if (ret) goto out; WRITE_ONCE(io->task, get_task_struct(current)); - ublk_mark_io_ready(ub, ubq); + ublk_mark_io_ready(ub); out: mutex_unlock(&ub->mutex); return ret; } -static int ublk_check_commit_and_fetch(const struct ublk_queue *ubq, +static int ublk_check_commit_and_fetch(const struct ublk_device *ub, struct ublk_io *io, __u64 buf_addr) { struct request *req = io->req; - if (ublk_need_map_io(ubq)) { + if (ublk_dev_need_map_io(ub)) { /* * COMMIT_AND_FETCH_REQ has to provide IO buffer if * NEED GET DATA is not enabled or it is Read IO. */ - if (!buf_addr && (!ublk_need_get_data(ubq) || + if (!buf_addr && (!ublk_dev_need_get_data(ub) || req_op(req) == REQ_OP_READ)) return -EINVAL; } else if (req_op(req) != REQ_OP_ZONE_APPEND && buf_addr) { @@ -2307,10 +2333,10 @@ static int ublk_check_commit_and_fetch(const struct ublk_queue *ubq, return 0; } -static bool ublk_need_complete_req(const struct ublk_queue *ubq, +static bool ublk_need_complete_req(const struct ublk_device *ub, struct ublk_io *io) { - if (ublk_need_req_ref(ubq)) + if (ublk_dev_need_req_ref(ub)) return ublk_sub_req_ref(io); return true; } @@ -2333,23 +2359,28 @@ static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io, return ublk_start_io(ubq, req, io); } -static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, - unsigned int issue_flags, - const struct ublksrv_io_cmd *ub_cmd) +static int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd, + unsigned int issue_flags) { + /* May point to userspace-mapped memory */ + const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe); u16 buf_idx = UBLK_INVALID_BUF_IDX; struct ublk_device *ub = cmd->file->private_data; struct ublk_queue *ubq; struct ublk_io *io; u32 cmd_op = cmd->cmd_op; - unsigned tag = ub_cmd->tag; + u16 q_id = READ_ONCE(ub_src->q_id); + u16 tag = READ_ONCE(ub_src->tag); + s32 result = READ_ONCE(ub_src->result); + u64 addr = READ_ONCE(ub_src->addr); /* unioned with zone_append_lba */ struct request *req; int ret; bool compl; + WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED); + pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", - __func__, cmd->cmd_op, ub_cmd->q_id, tag, - ub_cmd->result); + __func__, cmd->cmd_op, q_id, tag, result); ret = ublk_check_cmd_op(cmd_op); if (ret) @@ -2360,25 +2391,24 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, * so no need to validate the q_id, tag, or task */ if (_IOC_NR(cmd_op) == UBLK_IO_UNREGISTER_IO_BUF) - return ublk_unregister_io_buf(cmd, ub, ub_cmd->addr, - issue_flags); + return ublk_unregister_io_buf(cmd, ub, addr, issue_flags); ret = -EINVAL; - if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues) + if (q_id >= ub->dev_info.nr_hw_queues) goto out; - ubq = ublk_get_queue(ub, ub_cmd->q_id); + ubq = ublk_get_queue(ub, q_id); - if (tag >= ubq->q_depth) + if (tag >= ub->dev_info.queue_depth) goto out; io = &ubq->ios[tag]; /* UBLK_IO_FETCH_REQ can be handled on any task, which sets io->task */ if (unlikely(_IOC_NR(cmd_op) == UBLK_IO_FETCH_REQ)) { - ret = ublk_check_fetch_buf(ubq, ub_cmd->addr); + ret = ublk_check_fetch_buf(ub, addr); if (ret) goto out; - ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); + ret = ublk_fetch(cmd, ub, io, addr); if (ret) goto out; @@ -2392,8 +2422,8 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, * so can be handled on any task */ if (_IOC_NR(cmd_op) == UBLK_IO_REGISTER_IO_BUF) - return ublk_register_io_buf(cmd, ubq, io, ub_cmd->addr, - issue_flags); + return ublk_register_io_buf(cmd, ub, q_id, tag, io, + addr, issue_flags); goto out; } @@ -2414,24 +2444,24 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, switch (_IOC_NR(cmd_op)) { case UBLK_IO_REGISTER_IO_BUF: - return ublk_daemon_register_io_buf(cmd, ubq, io, ub_cmd->addr, + return ublk_daemon_register_io_buf(cmd, ub, q_id, tag, io, addr, issue_flags); case UBLK_IO_COMMIT_AND_FETCH_REQ: - ret = ublk_check_commit_and_fetch(ubq, io, ub_cmd->addr); + ret = ublk_check_commit_and_fetch(ub, io, addr); if (ret) goto out; - io->res = ub_cmd->result; + io->res = result; req = ublk_fill_io_cmd(io, cmd); - ret = ublk_config_io_buf(ubq, io, cmd, ub_cmd->addr, &buf_idx); - compl = ublk_need_complete_req(ubq, io); + ret = ublk_config_io_buf(ub, io, cmd, addr, &buf_idx); + compl = ublk_need_complete_req(ub, io); /* can't touch 'ublk_io' any more */ if (buf_idx != UBLK_INVALID_BUF_IDX) io_buffer_unregister_bvec(cmd, buf_idx, issue_flags); if (req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = ub_cmd->zone_append_lba; + req->__sector = addr; if (compl) - __ublk_complete_rq(req); + __ublk_complete_rq(req, io, ublk_dev_need_map_io(ub)); if (ret) goto out; @@ -2443,7 +2473,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, * request */ req = ublk_fill_io_cmd(io, cmd); - ret = ublk_config_io_buf(ubq, io, cmd, ub_cmd->addr, NULL); + ret = ublk_config_io_buf(ub, io, cmd, addr, NULL); WARN_ON_ONCE(ret); if (likely(ublk_get_data(ubq, io, req))) { __ublk_prep_compl_io_cmd(io, req); @@ -2463,16 +2493,15 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - const struct ublk_queue *ubq, struct ublk_io *io, size_t offset) + u16 q_id, u16 tag, struct ublk_io *io, size_t offset) { - unsigned tag = io - ubq->ios; struct request *req; /* * can't use io->req in case of concurrent UBLK_IO_COMMIT_AND_FETCH_REQ, * which would overwrite it with io->cmd */ - req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); + req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); if (!req) return NULL; @@ -2494,33 +2523,13 @@ fail_put: return NULL; } -static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd, - unsigned int issue_flags) -{ - /* - * Not necessary for async retry, but let's keep it simple and always - * copy the values to avoid any potential reuse. - */ - const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe); - const struct ublksrv_io_cmd ub_cmd = { - .q_id = READ_ONCE(ub_src->q_id), - .tag = READ_ONCE(ub_src->tag), - .result = READ_ONCE(ub_src->result), - .addr = READ_ONCE(ub_src->addr) - }; - - WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED); - - return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd); -} - static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd, unsigned int issue_flags) { int ret = ublk_ch_uring_cmd_local(cmd, issue_flags); if (ret != -EIOCBQUEUED) - io_uring_cmd_done(cmd, ret, 0, issue_flags); + io_uring_cmd_done(cmd, ret, issue_flags); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) @@ -2583,17 +2592,14 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb, return ERR_PTR(-EINVAL); ubq = ublk_get_queue(ub, q_id); - if (!ubq) - return ERR_PTR(-EINVAL); - - if (!ublk_support_user_copy(ubq)) + if (!ublk_dev_support_user_copy(ub)) return ERR_PTR(-EACCES); - if (tag >= ubq->q_depth) + if (tag >= ub->dev_info.queue_depth) return ERR_PTR(-EINVAL); *io = &ubq->ios[tag]; - req = __ublk_check_and_get_req(ub, ubq, *io, buf_off); + req = __ublk_check_and_get_req(ub, q_id, tag, *io, buf_off); if (!req) return ERR_PTR(-EINVAL); @@ -2656,7 +2662,7 @@ static const struct file_operations ublk_ch_fops = { static void ublk_deinit_queue(struct ublk_device *ub, int q_id) { - int size = ublk_queue_cmd_buf_size(ub, q_id); + int size = ublk_queue_cmd_buf_size(ub); struct ublk_queue *ubq = ublk_get_queue(ub, q_id); int i; @@ -2683,7 +2689,7 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id) ubq->flags = ub->dev_info.flags; ubq->q_id = q_id; ubq->q_depth = ub->dev_info.queue_depth; - size = ublk_queue_cmd_buf_size(ub, q_id); + size = ublk_queue_cmd_buf_size(ub); ptr = (void *) __get_free_pages(gfp_flags, get_order(size)); if (!ptr) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index e649fa67bac1..f061420dfb10 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -829,9 +829,9 @@ out: } /* We provide getgeo only to please some old bootloader/partitioning tools */ -static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) +static int virtblk_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct virtio_blk *vblk = bd->bd_disk->private_data; + struct virtio_blk *vblk = disk->private_data; int ret = 0; mutex_lock(&vblk->vdev_mutex); @@ -853,7 +853,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) /* some standard values, similar to sd */ geo->heads = 1 << 6; geo->sectors = 1 << 5; - geo->cylinders = get_capacity(bd->bd_disk) >> 11; + geo->cylinders = get_capacity(disk) >> 11; } out: mutex_unlock(&vblk->vdev_mutex); @@ -1682,7 +1682,7 @@ static int __init virtio_blk_init(void) { int error; - virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); + virtblk_wq = alloc_workqueue("virtio-blk", WQ_PERCPU, 0); if (!virtblk_wq) return -ENOMEM; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 5babe575c288..04fc6b552c04 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -493,11 +493,11 @@ static void blkif_restart_queue_callback(void *arg) schedule_work(&rinfo->work); } -static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) +static int blkif_getgeo(struct gendisk *disk, struct hd_geometry *hg) { /* We don't have real geometry info, but let's at least return values consistent with the size of the device */ - sector_t nsect = get_capacity(bd->bd_disk); + sector_t nsect = get_capacity(disk); sector_t cylinders = nsect; hg->heads = 0xff; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f31652085adc..a43074657531 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1085,7 +1085,7 @@ static int read_from_bdev_sync(struct zram *zram, struct page *page, work.entry = entry; INIT_WORK_ONSTACK(&work.work, zram_sync_read); - queue_work(system_unbound_wq, &work.work); + queue_work(system_dfl_wq, &work.work); flush_work(&work.work); destroy_work_on_stack(&work.work); @@ -1225,18 +1225,6 @@ static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) zram->comp_algs[prio] = alg; } -static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, - char *buf, ssize_t at) -{ - ssize_t sz; - - down_read(&zram->init_lock); - sz = zcomp_available_show(zram->comp_algs[prio], buf, at); - up_read(&zram->init_lock); - - return sz; -} - static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) { char *compressor; @@ -1387,8 +1375,12 @@ static ssize_t comp_algorithm_show(struct device *dev, char *buf) { struct zram *zram = dev_to_zram(dev); + ssize_t sz; - return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf, 0); + down_read(&zram->init_lock); + sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); + up_read(&zram->init_lock); + return sz; } static ssize_t comp_algorithm_store(struct device *dev, @@ -1412,14 +1404,15 @@ static ssize_t recomp_algorithm_show(struct device *dev, ssize_t sz = 0; u32 prio; + down_read(&zram->init_lock); for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { if (!zram->comp_algs[prio]) continue; sz += sysfs_emit_at(buf, sz, "#%d: ", prio); - sz += __comp_algorithm_show(zram, prio, buf, sz); + sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); } - + up_read(&zram->init_lock); return sz; } |