diff options
Diffstat (limited to 'block/ioctl.c')
| -rw-r--r-- | block/ioctl.c | 1159 |
1 files changed, 845 insertions, 314 deletions
diff --git a/block/ioctl.c b/block/ioctl.c index a31d91d9bc5a..61feed686418 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -1,4 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/capability.h> +#include <linux/compat.h> #include <linux/blkdev.h> #include <linux/export.h> #include <linux/gfp.h> @@ -7,425 +9,954 @@ #include <linux/backing-dev.h> #include <linux/fs.h> #include <linux/blktrace_api.h> -#include <asm/uaccess.h> - -static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) +#include <linux/pr.h> +#include <linux/uaccess.h> +#include <linux/pagemap.h> +#include <linux/io_uring/cmd.h> +#include <linux/blk-integrity.h> +#include <uapi/linux/blkdev.h> +#include "blk.h" +#include "blk-crypto-internal.h" + +static int blkpg_do_ioctl(struct block_device *bdev, + struct blkpg_partition __user *upart, int op) { - struct block_device *bdevp; - struct gendisk *disk; - struct hd_struct *part, *lpart; - struct blkpg_ioctl_arg a; + struct gendisk *disk = bdev->bd_disk; struct blkpg_partition p; - struct disk_part_iter piter; - long long start, length; - int partno; + sector_t start, length, capacity, end; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg))) - return -EFAULT; - if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition))) + if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) return -EFAULT; - disk = bdev->bd_disk; - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return -EINVAL; - partno = p.pno; - if (partno <= 0) + + if (p.pno <= 0) return -EINVAL; - switch (a.op) { - case BLKPG_ADD_PARTITION: - start = p.start >> 9; - length = p.length >> 9; - /* check for fit in a hd_struct */ - if (sizeof(sector_t) == sizeof(long) && - sizeof(long long) > sizeof(long)) { - long pstart = start, plength = length; - if (pstart != start || plength != length - || pstart < 0 || plength < 0 || partno > 65535) - return -EINVAL; - } - mutex_lock(&bdev->bd_mutex); - - /* overlap? */ - disk_part_iter_init(&piter, disk, - DISK_PITER_INCL_EMPTY); - while ((part = disk_part_iter_next(&piter))) { - if (!(start + length <= part->start_sect || - start >= part->start_sect + part->nr_sects)) { - disk_part_iter_exit(&piter); - mutex_unlock(&bdev->bd_mutex); - return -EBUSY; - } - } - disk_part_iter_exit(&piter); - - /* all seems OK */ - part = add_partition(disk, partno, start, length, - ADDPART_FLAG_NONE, NULL); - mutex_unlock(&bdev->bd_mutex); - return IS_ERR(part) ? PTR_ERR(part) : 0; - case BLKPG_DEL_PARTITION: - part = disk_get_part(disk, partno); - if (!part) - return -ENXIO; - - bdevp = bdget(part_devt(part)); - disk_put_part(part); - if (!bdevp) - return -ENOMEM; - - mutex_lock(&bdevp->bd_mutex); - if (bdevp->bd_openers) { - mutex_unlock(&bdevp->bd_mutex); - bdput(bdevp); - return -EBUSY; - } - /* all seems OK */ - fsync_bdev(bdevp); - invalidate_bdev(bdevp); - - mutex_lock_nested(&bdev->bd_mutex, 1); - delete_partition(disk, partno); - mutex_unlock(&bdev->bd_mutex); - mutex_unlock(&bdevp->bd_mutex); - bdput(bdevp); - - return 0; - case BLKPG_RESIZE_PARTITION: - start = p.start >> 9; - /* new length of partition in bytes */ - length = p.length >> 9; - /* check for fit in a hd_struct */ - if (sizeof(sector_t) == sizeof(long) && - sizeof(long long) > sizeof(long)) { - long pstart = start, plength = length; - if (pstart != start || plength != length - || pstart < 0 || plength < 0) - return -EINVAL; - } - part = disk_get_part(disk, partno); - if (!part) - return -ENXIO; - bdevp = bdget(part_devt(part)); - if (!bdevp) { - disk_put_part(part); - return -ENOMEM; - } - mutex_lock(&bdevp->bd_mutex); - mutex_lock_nested(&bdev->bd_mutex, 1); - if (start != part->start_sect) { - mutex_unlock(&bdevp->bd_mutex); - mutex_unlock(&bdev->bd_mutex); - bdput(bdevp); - disk_put_part(part); - return -EINVAL; - } - /* overlap? */ - disk_part_iter_init(&piter, disk, - DISK_PITER_INCL_EMPTY); - while ((lpart = disk_part_iter_next(&piter))) { - if (lpart->partno != partno && - !(start + length <= lpart->start_sect || - start >= lpart->start_sect + lpart->nr_sects) - ) { - disk_part_iter_exit(&piter); - mutex_unlock(&bdevp->bd_mutex); - mutex_unlock(&bdev->bd_mutex); - bdput(bdevp); - disk_put_part(part); - return -EBUSY; - } - } - disk_part_iter_exit(&piter); - part_nr_sects_write(part, (sector_t)length); - i_size_write(bdevp->bd_inode, p.length); - mutex_unlock(&bdevp->bd_mutex); - mutex_unlock(&bdev->bd_mutex); - bdput(bdevp); - disk_put_part(part); - return 0; - default: - return -EINVAL; + if (op == BLKPG_DEL_PARTITION) + return bdev_del_partition(disk, p.pno); + + if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) + return -EINVAL; + /* Check that the partition is aligned to the block size */ + if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) + return -EINVAL; + + start = p.start >> SECTOR_SHIFT; + length = p.length >> SECTOR_SHIFT; + capacity = get_capacity(disk); + + if (check_add_overflow(start, length, &end)) + return -EINVAL; + + if (start >= capacity || end > capacity) + return -EINVAL; + + switch (op) { + case BLKPG_ADD_PARTITION: + return bdev_add_partition(disk, p.pno, start, length); + case BLKPG_RESIZE_PARTITION: + return bdev_resize_partition(disk, p.pno, start, length); + default: + return -EINVAL; } } -static int blkdev_reread_part(struct block_device *bdev) +static int blkpg_ioctl(struct block_device *bdev, + struct blkpg_ioctl_arg __user *arg) { - struct gendisk *disk = bdev->bd_disk; - int res; + struct blkpg_partition __user *udata; + int op; - if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) - return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (!mutex_trylock(&bdev->bd_mutex)) - return -EBUSY; - res = rescan_partitions(disk, bdev); - mutex_unlock(&bdev->bd_mutex); - return res; + if (get_user(op, &arg->op) || get_user(udata, &arg->data)) + return -EFAULT; + + return blkpg_do_ioctl(bdev, udata, op); } -static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, - uint64_t len, int secure) +#ifdef CONFIG_COMPAT +struct compat_blkpg_ioctl_arg { + compat_int_t op; + compat_int_t flags; + compat_int_t datalen; + compat_caddr_t data; +}; + +static int compat_blkpg_ioctl(struct block_device *bdev, + struct compat_blkpg_ioctl_arg __user *arg) { - unsigned long flags = 0; + compat_caddr_t udata; + int op; - if (start & 511) + if (get_user(op, &arg->op) || get_user(udata, &arg->data)) + return -EFAULT; + + return blkpg_do_ioctl(bdev, compat_ptr(udata), op); +} +#endif + +/* + * Check that [start, start + len) is a valid range from the block device's + * perspective, including verifying that it can be correctly translated into + * logical block addresses. + */ +static int blk_validate_byte_range(struct block_device *bdev, + uint64_t start, uint64_t len) +{ + unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; + uint64_t end; + + if ((start | len) & bs_mask) return -EINVAL; - if (len & 511) + if (!len) + return -EINVAL; + if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) return -EINVAL; - start >>= 9; - len >>= 9; - if (start + len > (i_size_read(bdev->bd_inode) >> 9)) + return 0; +} + +static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, + unsigned long arg) +{ + uint64_t range[2], start, len; + struct bio *prev = NULL, *bio; + sector_t sector, nr_sects; + struct blk_plug plug; + int err; + + if (copy_from_user(range, (void __user *)arg, sizeof(range))) + return -EFAULT; + start = range[0]; + len = range[1]; + + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; + + if (!(mode & BLK_OPEN_WRITE)) + return -EBADF; + if (bdev_read_only(bdev)) + return -EPERM; + err = blk_validate_byte_range(bdev, start, len); + if (err) + return err; + + inode_lock(bdev->bd_mapping->host); + filemap_invalidate_lock(bdev->bd_mapping); + err = truncate_bdev_range(bdev, mode, start, start + len - 1); + if (err) + goto fail; + + sector = start >> SECTOR_SHIFT; + nr_sects = len >> SECTOR_SHIFT; + + blk_start_plug(&plug); + while (1) { + if (fatal_signal_pending(current)) { + if (prev) + bio_await_chain(prev); + err = -EINTR; + goto out_unplug; + } + bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, + GFP_KERNEL); + if (!bio) + break; + prev = bio_chain_and_submit(prev, bio); + } + if (prev) { + err = submit_bio_wait(prev); + if (err == -EOPNOTSUPP) + err = 0; + bio_put(prev); + } +out_unplug: + blk_finish_plug(&plug); +fail: + filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); + return err; +} + +static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, + void __user *argp) +{ + uint64_t start, len, end; + uint64_t range[2]; + int err; + + if (!(mode & BLK_OPEN_WRITE)) + return -EBADF; + if (!bdev_max_secure_erase_sectors(bdev)) + return -EOPNOTSUPP; + if (copy_from_user(range, argp, sizeof(range))) + return -EFAULT; + + start = range[0]; + len = range[1]; + if ((start & 511) || (len & 511)) + return -EINVAL; + if (check_add_overflow(start, len, &end) || + end > bdev_nr_bytes(bdev)) return -EINVAL; - if (secure) - flags |= BLKDEV_DISCARD_SECURE; - return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); + + inode_lock(bdev->bd_mapping->host); + filemap_invalidate_lock(bdev->bd_mapping); + err = truncate_bdev_range(bdev, mode, start, end - 1); + if (!err) + err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, + GFP_KERNEL); + filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); + return err; } -static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start, - uint64_t len) + +static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, + unsigned long arg) { + uint64_t range[2]; + uint64_t start, end, len; + int err; + + if (!(mode & BLK_OPEN_WRITE)) + return -EBADF; + + if (copy_from_user(range, (void __user *)arg, sizeof(range))) + return -EFAULT; + + start = range[0]; + len = range[1]; + end = start + len - 1; + if (start & 511) return -EINVAL; if (len & 511) return -EINVAL; - start >>= 9; - len >>= 9; - - if (start + len > (i_size_read(bdev->bd_inode) >> 9)) + if (end >= (uint64_t)bdev_nr_bytes(bdev)) return -EINVAL; + if (end < start) + return -EINVAL; + + /* Invalidate the page cache, including dirty pages */ + inode_lock(bdev->bd_mapping->host); + filemap_invalidate_lock(bdev->bd_mapping); + err = truncate_bdev_range(bdev, mode, start, end); + if (err) + goto fail; - return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL); + err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, + BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); + +fail: + filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); + return err; } -static int put_ushort(unsigned long arg, unsigned short val) +static int put_ushort(unsigned short __user *argp, unsigned short val) { - return put_user(val, (unsigned short __user *)arg); + return put_user(val, argp); } -static int put_int(unsigned long arg, int val) +static int put_int(int __user *argp, int val) { - return put_user(val, (int __user *)arg); + return put_user(val, argp); } -static int put_uint(unsigned long arg, unsigned int val) +static int put_uint(unsigned int __user *argp, unsigned int val) { - return put_user(val, (unsigned int __user *)arg); + return put_user(val, argp); } -static int put_long(unsigned long arg, long val) +static int put_long(long __user *argp, long val) { - return put_user(val, (long __user *)arg); + return put_user(val, argp); } -static int put_ulong(unsigned long arg, unsigned long val) +static int put_ulong(unsigned long __user *argp, unsigned long val) { - return put_user(val, (unsigned long __user *)arg); + return put_user(val, argp); } -static int put_u64(unsigned long arg, u64 val) +static int put_u64(u64 __user *argp, u64 val) { - return put_user(val, (u64 __user *)arg); + return put_user(val, argp); } -int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, +#ifdef CONFIG_COMPAT +static int compat_put_long(compat_long_t __user *argp, long val) +{ + return put_user(val, argp); +} + +static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) +{ + return put_user(val, argp); +} +#endif + +#ifdef CONFIG_COMPAT +/* + * This is the equivalent of compat_ptr_ioctl(), to be used by block + * drivers that implement only commands that are completely compatible + * between 32-bit and 64-bit user space + */ +int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned cmd, unsigned long arg) { struct gendisk *disk = bdev->bd_disk; if (disk->fops->ioctl) - return disk->fops->ioctl(bdev, mode, cmd, arg); + return disk->fops->ioctl(bdev, mode, cmd, + (unsigned long)compat_ptr(arg)); - return -ENOTTY; + return -ENOIOCTLCMD; } -/* - * For the record: _GPL here is only because somebody decided to slap it - * on the previous export. Sheer idiocy, since it wasn't copyrightable - * at all and could be open-coded without any exports by anybody who cares. - */ -EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); +EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); +#endif -/* - * Is it an unrecognized ioctl? The correct returns are either - * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a - * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl - * code before returning. - * - * Confused drivers sometimes return EINVAL, which is wrong. It - * means "I understood the ioctl command, but the parameters to - * it were wrong". - * - * We should aim to just fix the broken drivers, the EINVAL case - * should go away. - */ -static inline int is_unrecognized_ioctl(int ret) +static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) { - return ret == -EINVAL || - ret == -ENOTTY || - ret == -ENOIOCTLCMD; + /* no sense to make reservations for partitions */ + if (bdev_is_partition(bdev)) + return false; + + if (capable(CAP_SYS_ADMIN)) + return true; + /* + * Only allow unprivileged reservations if the file descriptor is open + * for writing. + */ + return mode & BLK_OPEN_WRITE; } -/* - * always keep this in sync with compat_blkdev_ioctl() - */ -int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, - unsigned long arg) +static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, + struct pr_registration __user *arg) { - struct gendisk *disk = bdev->bd_disk; - struct backing_dev_info *bdi; - loff_t size; - int ret, n; + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_registration reg; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_register) + return -EOPNOTSUPP; + if (copy_from_user(®, arg, sizeof(reg))) + return -EFAULT; - switch(cmd) { - case BLKFLSBUF: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + if (reg.flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); +} - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - if (!is_unrecognized_ioctl(ret)) - return ret; +static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, + struct pr_reservation __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_reservation rsv; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_reserve) + return -EOPNOTSUPP; + if (copy_from_user(&rsv, arg, sizeof(rsv))) + return -EFAULT; - fsync_bdev(bdev); - invalidate_bdev(bdev); - return 0; + if (rsv.flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); +} - case BLKROSET: - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - if (!is_unrecognized_ioctl(ret)) - return ret; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (get_user(n, (int __user *)(arg))) - return -EFAULT; - set_device_ro(bdev, n); - return 0; +static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, + struct pr_reservation __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_reservation rsv; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_release) + return -EOPNOTSUPP; + if (copy_from_user(&rsv, arg, sizeof(rsv))) + return -EFAULT; - case BLKDISCARD: - case BLKSECDISCARD: { - uint64_t range[2]; + if (rsv.flags) + return -EOPNOTSUPP; + return ops->pr_release(bdev, rsv.key, rsv.type); +} - if (!(mode & FMODE_WRITE)) - return -EBADF; +static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, + struct pr_preempt __user *arg, bool abort) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_preempt p; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_preempt) + return -EOPNOTSUPP; + if (copy_from_user(&p, arg, sizeof(p))) + return -EFAULT; - if (copy_from_user(range, (void __user *)arg, sizeof(range))) - return -EFAULT; + if (p.flags) + return -EOPNOTSUPP; + return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); +} - return blk_ioctl_discard(bdev, range[0], range[1], - cmd == BLKSECDISCARD); +static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, + struct pr_clear __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_clear c; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_clear) + return -EOPNOTSUPP; + if (copy_from_user(&c, arg, sizeof(c))) + return -EFAULT; + + if (c.flags) + return -EOPNOTSUPP; + return ops->pr_clear(bdev, c.key); +} + +static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, + struct pr_read_keys __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_keys *keys_info; + struct pr_read_keys read_keys; + u64 __user *keys_ptr; + size_t keys_info_len; + size_t keys_copy_len; + int ret; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_read_keys) + return -EOPNOTSUPP; + + if (copy_from_user(&read_keys, arg, sizeof(read_keys))) + return -EFAULT; + + keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); + if (keys_info_len == SIZE_MAX) + return -EINVAL; + + keys_info = kzalloc(keys_info_len, GFP_KERNEL); + if (!keys_info) + return -ENOMEM; + + keys_info->num_keys = read_keys.num_keys; + + ret = ops->pr_read_keys(bdev, keys_info); + if (ret) + goto out; + + /* Copy out individual keys */ + keys_ptr = u64_to_user_ptr(read_keys.keys_ptr); + keys_copy_len = min(read_keys.num_keys, keys_info->num_keys) * + sizeof(keys_info->keys[0]); + + if (copy_to_user(keys_ptr, keys_info->keys, keys_copy_len)) { + ret = -EFAULT; + goto out; } - case BLKZEROOUT: { - uint64_t range[2]; - if (!(mode & FMODE_WRITE)) - return -EBADF; + /* Copy out the arg struct */ + read_keys.generation = keys_info->generation; + read_keys.num_keys = keys_info->num_keys; + + if (copy_to_user(arg, &read_keys, sizeof(read_keys))) + ret = -EFAULT; +out: + kfree(keys_info); + return ret; +} + +static int blkdev_pr_read_reservation(struct block_device *bdev, + blk_mode_t mode, struct pr_read_reservation __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_held_reservation rsv = {}; + struct pr_read_reservation out = {}; + int ret; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_read_reservation) + return -EOPNOTSUPP; + + ret = ops->pr_read_reservation(bdev, &rsv); + if (ret) + return ret; + + out.key = rsv.key; + out.generation = rsv.generation; + out.type = rsv.type; - if (copy_from_user(range, (void __user *)arg, sizeof(range))) - return -EFAULT; + if (copy_to_user(arg, &out, sizeof(out))) + return -EFAULT; + return 0; +} + +static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, + unsigned long arg) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; - return blk_ioctl_zeroout(bdev, range[0], range[1]); + mutex_lock(&bdev->bd_holder_lock); + if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) + bdev->bd_holder_ops->sync(bdev); + else { + mutex_unlock(&bdev->bd_holder_lock); + sync_blockdev(bdev); } - case HDIO_GETGEO: { - struct hd_geometry geo; + invalidate_bdev(bdev); + return 0; +} - if (!arg) - return -EINVAL; - if (!disk->fops->getgeo) - return -ENOTTY; - - /* - * We need to set the startsect first, the driver may - * want to override it. - */ - memset(&geo, 0, sizeof(geo)); - geo.start = get_start_sect(bdev); - ret = disk->fops->getgeo(bdev, &geo); +static int blkdev_roset(struct block_device *bdev, unsigned cmd, + unsigned long arg) +{ + int ret, n; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (get_user(n, (int __user *)arg)) + return -EFAULT; + if (bdev->bd_disk->fops->set_read_only) { + ret = bdev->bd_disk->fops->set_read_only(bdev, n); if (ret) return ret; - if (copy_to_user((struct hd_geometry __user *)arg, &geo, - sizeof(geo))) - return -EFAULT; - return 0; } - case BLKRAGET: - case BLKFRAGET: - if (!arg) - return -EINVAL; - bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; - return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); + if (n) + bdev_set_flag(bdev, BD_READ_ONLY); + else + bdev_clear_flag(bdev, BD_READ_ONLY); + return 0; +} + +static int blkdev_getgeo(struct block_device *bdev, + struct hd_geometry __user *argp) +{ + struct gendisk *disk = bdev->bd_disk; + struct hd_geometry geo; + int ret; + + if (!argp) + return -EINVAL; + if (!disk->fops->getgeo) + return -ENOTTY; + + /* + * We need to set the startsect first, the driver may + * want to override it. + */ + memset(&geo, 0, sizeof(geo)); + geo.start = get_start_sect(bdev); + ret = disk->fops->getgeo(disk, &geo); + if (ret) + return ret; + if (copy_to_user(argp, &geo, sizeof(geo))) + return -EFAULT; + return 0; +} + +#ifdef CONFIG_COMPAT +struct compat_hd_geometry { + unsigned char heads; + unsigned char sectors; + unsigned short cylinders; + u32 start; +}; + +static int compat_hdio_getgeo(struct block_device *bdev, + struct compat_hd_geometry __user *ugeo) +{ + struct gendisk *disk = bdev->bd_disk; + struct hd_geometry geo; + int ret; + + if (!ugeo) + return -EINVAL; + if (!disk->fops->getgeo) + return -ENOTTY; + + memset(&geo, 0, sizeof(geo)); + /* + * We need to set the startsect first, the driver may + * want to override it. + */ + geo.start = get_start_sect(bdev); + ret = disk->fops->getgeo(disk, &geo); + if (ret) + return ret; + + ret = copy_to_user(ugeo, &geo, 4); + ret |= put_user(geo.start, &ugeo->start); + if (ret) + ret = -EFAULT; + + return ret; +} +#endif + +/* set the logical block size */ +static int blkdev_bszset(struct file *file, blk_mode_t mode, + int __user *argp) +{ + // this one might be file_inode(file)->i_rdev - a rare valid + // use of file_inode() for those. + dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; + struct file *excl_file; + int ret, n; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!argp) + return -EINVAL; + if (get_user(n, argp)) + return -EFAULT; + + if (mode & BLK_OPEN_EXCL) + return set_blocksize(file, n); + + excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); + if (IS_ERR(excl_file)) + return -EBUSY; + ret = set_blocksize(excl_file, n); + fput(excl_file); + return ret; +} + +/* + * Common commands that are handled the same way on native and compat + * user space. Note the separate arg/argp parameters that are needed + * to deal with the compat_ptr() conversion. + */ +static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, + unsigned int cmd, unsigned long arg, + void __user *argp) +{ + unsigned int max_sectors; + + switch (cmd) { + case BLKFLSBUF: + return blkdev_flushbuf(bdev, cmd, arg); + case BLKROSET: + return blkdev_roset(bdev, cmd, arg); + case BLKDISCARD: + return blk_ioctl_discard(bdev, mode, arg); + case BLKSECDISCARD: + return blk_ioctl_secure_erase(bdev, mode, argp); + case BLKZEROOUT: + return blk_ioctl_zeroout(bdev, mode, arg); + case BLKGETDISKSEQ: + return put_u64(argp, bdev->bd_disk->diskseq); + case BLKREPORTZONE: + case BLKREPORTZONEV2: + return blkdev_report_zones_ioctl(bdev, cmd, arg); + case BLKRESETZONE: + case BLKOPENZONE: + case BLKCLOSEZONE: + case BLKFINISHZONE: + return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); + case BLKGETZONESZ: + return put_uint(argp, bdev_zone_sectors(bdev)); + case BLKGETNRZONES: + return put_uint(argp, bdev_nr_zones(bdev)); case BLKROGET: - return put_int(arg, bdev_read_only(bdev) != 0); - case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ - return put_int(arg, block_size(bdev)); + return put_int(argp, bdev_read_only(bdev) != 0); case BLKSSZGET: /* get block device logical block size */ - return put_int(arg, bdev_logical_block_size(bdev)); + return put_int(argp, bdev_logical_block_size(bdev)); case BLKPBSZGET: /* get block device physical block size */ - return put_uint(arg, bdev_physical_block_size(bdev)); + return put_uint(argp, bdev_physical_block_size(bdev)); case BLKIOMIN: - return put_uint(arg, bdev_io_min(bdev)); + return put_uint(argp, bdev_io_min(bdev)); case BLKIOOPT: - return put_uint(arg, bdev_io_opt(bdev)); + return put_uint(argp, bdev_io_opt(bdev)); case BLKALIGNOFF: - return put_int(arg, bdev_alignment_offset(bdev)); + return put_int(argp, bdev_alignment_offset(bdev)); case BLKDISCARDZEROES: - return put_uint(arg, bdev_discard_zeroes_data(bdev)); + return put_uint(argp, 0); case BLKSECTGET: - return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); + max_sectors = min_t(unsigned int, USHRT_MAX, + queue_max_sectors(bdev_get_queue(bdev))); + return put_ushort(argp, max_sectors); case BLKROTATIONAL: - return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); + return put_ushort(argp, !bdev_nonrot(bdev)); case BLKRASET: case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; - bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; + bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; return 0; - case BLKBSZSET: - /* set the logical block size */ + case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!arg) + if (bdev_is_partition(bdev)) return -EINVAL; - if (get_user(n, (int __user *) arg)) - return -EFAULT; - if (!(mode & FMODE_EXCL)) { - bdgrab(bdev); - if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) - return -EBUSY; - } - ret = set_blocksize(bdev, n); - if (!(mode & FMODE_EXCL)) - blkdev_put(bdev, mode | FMODE_EXCL); - return ret; + return disk_scan_partitions(bdev->bd_disk, + mode | BLK_OPEN_STRICT_SCAN); + case BLKTRACESTART: + case BLKTRACESTOP: + case BLKTRACETEARDOWN: + return blk_trace_ioctl(bdev, cmd, argp); + case BLKCRYPTOIMPORTKEY: + case BLKCRYPTOGENERATEKEY: + case BLKCRYPTOPREPAREKEY: + return blk_crypto_ioctl(bdev, cmd, argp); + case IOC_PR_REGISTER: + return blkdev_pr_register(bdev, mode, argp); + case IOC_PR_RESERVE: + return blkdev_pr_reserve(bdev, mode, argp); + case IOC_PR_RELEASE: + return blkdev_pr_release(bdev, mode, argp); + case IOC_PR_PREEMPT: + return blkdev_pr_preempt(bdev, mode, argp, false); + case IOC_PR_PREEMPT_ABORT: + return blkdev_pr_preempt(bdev, mode, argp, true); + case IOC_PR_CLEAR: + return blkdev_pr_clear(bdev, mode, argp); + case IOC_PR_READ_KEYS: + return blkdev_pr_read_keys(bdev, mode, argp); + case IOC_PR_READ_RESERVATION: + return blkdev_pr_read_reservation(bdev, mode, argp); + default: + return blk_get_meta_cap(bdev, cmd, argp); + } +} + +/* + * Always keep this in sync with compat_blkdev_ioctl() + * to handle all incompatible commands in both functions. + * + * New commands must be compatible and go into blkdev_common_ioctl + */ +long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + struct block_device *bdev = I_BDEV(file->f_mapping->host); + void __user *argp = (void __user *)arg; + blk_mode_t mode = file_to_blk_mode(file); + int ret; + + switch (cmd) { + /* These need separate implementations for the data structure */ + case HDIO_GETGEO: + return blkdev_getgeo(bdev, argp); case BLKPG: - ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); - break; - case BLKRRPART: - ret = blkdev_reread_part(bdev); - break; + return blkpg_ioctl(bdev, argp); + + /* Compat mode returns 32-bit data instead of 'long' */ + case BLKRAGET: + case BLKFRAGET: + if (!argp) + return -EINVAL; + return put_long(argp, + (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); case BLKGETSIZE: - size = i_size_read(bdev->bd_inode); - if ((size >> 9) > ~0UL) + if (bdev_nr_sectors(bdev) > ~0UL) return -EFBIG; - return put_ulong(arg, size >> 9); + return put_ulong(argp, bdev_nr_sectors(bdev)); + + /* The data is compatible, but the command number is different */ + case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ + return put_int(argp, block_size(bdev)); + case BLKBSZSET: + return blkdev_bszset(file, mode, argp); case BLKGETSIZE64: - return put_u64(arg, i_size_read(bdev->bd_inode)); - case BLKTRACESTART: - case BLKTRACESTOP: + return put_u64(argp, bdev_nr_bytes(bdev)); + + /* Incompatible alignment on i386 */ case BLKTRACESETUP: - case BLKTRACETEARDOWN: - ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg); + case BLKTRACESETUP2: + return blk_trace_ioctl(bdev, cmd, argp); + default: break; + } + + ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); + if (ret != -ENOIOCTLCMD) + return ret; + + if (!bdev->bd_disk->fops->ioctl) + return -ENOTTY; + return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); +} + +#ifdef CONFIG_COMPAT + +#define BLKBSZGET_32 _IOR(0x12, 112, int) +#define BLKBSZSET_32 _IOW(0x12, 113, int) +#define BLKGETSIZE64_32 _IOR(0x12, 114, int) + +/* Most of the generic ioctls are handled in the normal fallback path. + This assumes the blkdev's low level compat_ioctl always returns + ENOIOCTLCMD for unknown ioctls. */ +long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + int ret; + void __user *argp = compat_ptr(arg); + struct block_device *bdev = I_BDEV(file->f_mapping->host); + struct gendisk *disk = bdev->bd_disk; + blk_mode_t mode = file_to_blk_mode(file); + + switch (cmd) { + /* These need separate implementations for the data structure */ + case HDIO_GETGEO: + return compat_hdio_getgeo(bdev, argp); + case BLKPG: + return compat_blkpg_ioctl(bdev, argp); + + /* Compat mode returns 32-bit data instead of 'long' */ + case BLKRAGET: + case BLKFRAGET: + if (!argp) + return -EINVAL; + return compat_put_long(argp, + (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); + case BLKGETSIZE: + if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) + return -EFBIG; + return compat_put_ulong(argp, bdev_nr_sectors(bdev)); + + /* The data is compatible, but the command number is different */ + case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ + return put_int(argp, bdev_logical_block_size(bdev)); + case BLKBSZSET_32: + return blkdev_bszset(file, mode, argp); + case BLKGETSIZE64_32: + return put_u64(argp, bdev_nr_bytes(bdev)); + + /* Incompatible alignment on i386 */ + case BLKTRACESETUP32: + return blk_trace_ioctl(bdev, cmd, argp); default: - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); + break; } + + ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); + if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) + ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); + return ret; } -EXPORT_SYMBOL_GPL(blkdev_ioctl); +#endif + +struct blk_iou_cmd { + int res; + bool nowait; +}; + +static void blk_cmd_complete(struct io_tw_req tw_req, io_tw_token_t tw) +{ + struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req); + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + + if (bic->res == -EAGAIN && bic->nowait) + io_uring_cmd_issue_blocking(cmd); + else + io_uring_cmd_done(cmd, bic->res, + IO_URING_CMD_TASK_WORK_ISSUE_FLAGS); +} + +static void bio_cmd_bio_end_io(struct bio *bio) +{ + struct io_uring_cmd *cmd = bio->bi_private; + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + + if (unlikely(bio->bi_status) && !bic->res) + bic->res = blk_status_to_errno(bio->bi_status); + + io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); + bio_put(bio); +} + +static int blkdev_cmd_discard(struct io_uring_cmd *cmd, + struct block_device *bdev, + uint64_t start, uint64_t len, bool nowait) +{ + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; + sector_t sector = start >> SECTOR_SHIFT; + sector_t nr_sects = len >> SECTOR_SHIFT; + struct bio *prev = NULL, *bio; + int err; + + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; + if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) + return -EBADF; + if (bdev_read_only(bdev)) + return -EPERM; + err = blk_validate_byte_range(bdev, start, len); + if (err) + return err; + + err = filemap_invalidate_pages(bdev->bd_mapping, start, + start + len - 1, nowait); + if (err) + return err; + + while (true) { + bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); + if (!bio) + break; + if (nowait) { + /* + * Don't allow multi-bio non-blocking submissions as + * subsequent bios may fail but we won't get a direct + * indication of that. Normally, the caller should + * retry from a blocking context. + */ + if (unlikely(nr_sects)) { + bio_put(bio); + return -EAGAIN; + } + bio->bi_opf |= REQ_NOWAIT; + } + + prev = bio_chain_and_submit(prev, bio); + } + if (unlikely(!prev)) + return -EAGAIN; + if (unlikely(nr_sects)) + bic->res = -EAGAIN; + + prev->bi_private = cmd; + prev->bi_end_io = bio_cmd_bio_end_io; + submit_bio(prev); + return -EIOCBQUEUED; +} + +int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + const struct io_uring_sqe *sqe = cmd->sqe; + u32 cmd_op = cmd->cmd_op; + uint64_t start, len; + + if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || + sqe->rw_flags || sqe->file_index)) + return -EINVAL; + + bic->res = 0; + bic->nowait = issue_flags & IO_URING_F_NONBLOCK; + + start = READ_ONCE(sqe->addr); + len = READ_ONCE(sqe->addr3); + + switch (cmd_op) { + case BLOCK_URING_CMD_DISCARD: + return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); + } + return -EINVAL; +} |
