diff options
Diffstat (limited to 'block/genhd.c')
| -rw-r--r-- | block/genhd.c | 750 |
1 files changed, 421 insertions, 329 deletions
diff --git a/block/genhd.c b/block/genhd.c index 23cf83b3331c..69c75117ba2c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -25,8 +25,9 @@ #include <linux/pm_runtime.h> #include <linux/badblocks.h> #include <linux/part_stat.h> -#include "blk-throttle.h" +#include <linux/blktrace_api.h> +#include "blk-throttle.h" #include "blk.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" @@ -57,12 +58,14 @@ static DEFINE_IDA(ext_devt_ida); void set_capacity(struct gendisk *disk, sector_t sectors) { - struct block_device *bdev = disk->part0; + if (sectors > BLK_DEV_MAX_SECTORS) { + pr_warn_once("%s: truncate capacity from %lld to %lld\n", + disk->disk_name, sectors, + BLK_DEV_MAX_SECTORS); + sectors = BLK_DEV_MAX_SECTORS; + } - spin_lock(&bdev->bd_size_lock); - i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - bdev->bd_nr_sectors = sectors; - spin_unlock(&bdev->bd_size_lock); + bdev_set_nr_sectors(disk->part0, sectors); } EXPORT_SYMBOL(set_capacity); @@ -87,7 +90,7 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size) (disk->flags & GENHD_FL_HIDDEN)) return false; - pr_info("%s: detected capacity change from %lld to %lld\n", + pr_info_ratelimited("%s: detected capacity change from %lld to %lld\n", disk->disk_name, capacity, size); /* @@ -122,37 +125,50 @@ static void part_stat_read_all(struct block_device *part, } } -static unsigned int part_in_flight(struct block_device *part) +static void bdev_count_inflight_rw(struct block_device *part, + unsigned int inflight[2], bool mq_driver) { - unsigned int inflight = 0; + int write = 0; + int read = 0; int cpu; + if (mq_driver) { + blk_mq_in_driver_rw(part, inflight); + return; + } + for_each_possible_cpu(cpu) { - inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) + - part_stat_local_read_cpu(part, in_flight[1], cpu); + read += part_stat_local_read_cpu(part, in_flight[READ], cpu); + write += part_stat_local_read_cpu(part, in_flight[WRITE], cpu); } - if ((int)inflight < 0) - inflight = 0; - return inflight; + /* + * While iterating all CPUs, some IOs may be issued from a CPU already + * traversed and complete on a CPU that has not yet been traversed, + * causing the inflight number to be negative. + */ + inflight[READ] = read > 0 ? read : 0; + inflight[WRITE] = write > 0 ? write : 0; } -static void part_in_flight_rw(struct block_device *part, - unsigned int inflight[2]) +/** + * bdev_count_inflight - get the number of inflight IOs for a block device. + * + * @part: the block device. + * + * Inflight here means started IO accounting, from bdev_start_io_acct() for + * bio-based block device, and from blk_account_io_start() for rq-based block + * device. + */ +unsigned int bdev_count_inflight(struct block_device *part) { - int cpu; + unsigned int inflight[2] = {0}; - inflight[0] = 0; - inflight[1] = 0; - for_each_possible_cpu(cpu) { - inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu); - inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu); - } - if ((int)inflight[0] < 0) - inflight[0] = 0; - if ((int)inflight[1] < 0) - inflight[1] = 0; + bdev_count_inflight_rw(part, inflight, false); + + return inflight[READ] + inflight[WRITE]; } +EXPORT_SYMBOL_GPL(bdev_count_inflight); /* * Can be deleted altogether. Later. @@ -258,7 +274,7 @@ int __register_blkdev(unsigned int major, const char *name, #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD p->probe = probe; #endif - strlcpy(p->name, name, sizeof(p->name)); + strscpy(p->name, name, sizeof(p->name)); p->next = NULL; index = major_to_index(major); @@ -323,18 +339,6 @@ void blk_free_ext_minor(unsigned int minor) ida_free(&ext_devt_ida, minor); } -static char *bdevt_str(dev_t devt, char *buf) -{ - if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { - char tbuf[BDEVT_SIZE]; - snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); - snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); - } else - snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); - - return buf; -} - void disk_uevent(struct gendisk *disk, enum kobject_action action) { struct block_device *part; @@ -356,55 +360,96 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action) } EXPORT_SYMBOL_GPL(disk_uevent); -int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner) +int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) { - struct block_device *bdev; + struct file *file; + int ret = 0; - if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) - return -EINVAL; - if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state)) + if (!disk_has_partscan(disk)) return -EINVAL; if (disk->open_partitions) return -EBUSY; - /* Someone else has bdev exclusively open? */ - if (disk->part0->bd_holder && disk->part0->bd_holder != owner) - return -EBUSY; + + /* + * If the device is opened exclusively by current thread already, it's + * safe to scan partitons, otherwise, use bd_prepare_to_claim() to + * synchronize with other exclusive openers and other partition + * scanners. + */ + if (!(mode & BLK_OPEN_EXCL)) { + ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions, + NULL); + if (ret) + return ret; + } set_bit(GD_NEED_PART_SCAN, &disk->state); - bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - blkdev_put(bdev, mode); - return 0; + file = bdev_file_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL, + NULL, NULL); + if (IS_ERR(file)) + ret = PTR_ERR(file); + else + fput(file); + + /* + * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, + * and this will cause that re-assemble partitioned raid device will + * creat partition for underlying disk. + */ + clear_bit(GD_NEED_PART_SCAN, &disk->state); + if (!(mode & BLK_OPEN_EXCL)) + bd_abort_claiming(disk->part0, disk_scan_partitions); + return ret; } -/** - * device_add_disk - add disk information to kernel list - * @parent: parent device for the disk - * @disk: per-device partitioning information - * @groups: Additional per-device sysfs groups - * - * This function registers the partitioning information in @disk - * with the kernel. - */ -int __must_check device_add_disk(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups) +static void add_disk_final(struct gendisk *disk) +{ + struct device *ddev = disk_to_dev(disk); + + if (!(disk->flags & GENHD_FL_HIDDEN)) { + /* Make sure the first partition scan will be proceed */ + if (get_capacity(disk) && disk_has_partscan(disk)) + set_bit(GD_NEED_PART_SCAN, &disk->state); + + bdev_add(disk->part0, ddev->devt); + if (get_capacity(disk)) + disk_scan_partitions(disk, BLK_OPEN_READ); + + /* + * Announce the disk and partitions after all partitions are + * created. (for hidden disks uevents remain suppressed forever) + */ + dev_set_uevent_suppress(ddev, 0); + disk_uevent(disk, KOBJ_ADD); + } + + blk_apply_bdi_limits(disk->bdi, &disk->queue->limits); + disk_add_events(disk); + set_bit(GD_ADDED, &disk->state); +} + +static int __add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode) { struct device *ddev = disk_to_dev(disk); int ret; - /* Only makes sense for bio-based to set ->poll_bio */ - if (queue_is_mq(disk->queue) && disk->fops->poll_bio) + if (WARN_ON_ONCE(bdev_nr_sectors(disk->part0) > BLK_DEV_MAX_SECTORS)) return -EINVAL; - /* - * The disk queue should now be all set with enough information about - * the device for the elevator code to pick an adequate default - * elevator if one is needed, that is, for devices requesting queue - * registration. - */ - elevator_init_mq(disk->queue); + if (queue_is_mq(disk->queue)) { + /* + * ->submit_bio and ->poll_bio are bypassed for blk-mq drivers. + */ + if (disk->fops->submit_bio || disk->fops->poll_bio) + return -EINVAL; + } else { + if (!disk->fops->submit_bio) + return -EINVAL; + bdev_set_flag(disk->part0, BD_HAS_SUBMIT_BIO); + } /* * If the driver provides an explicit major number it also must provide @@ -416,22 +461,24 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, ret = -EINVAL; if (disk->major) { if (WARN_ON(!disk->minors)) - goto out_exit_elevator; + goto out; if (disk->minors > DISK_MAX_PARTS) { pr_err("block: can't allocate more than %d partitions\n", DISK_MAX_PARTS); disk->minors = DISK_MAX_PARTS; } - if (disk->first_minor + disk->minors > MINORMASK + 1) - goto out_exit_elevator; + if (disk->first_minor > MINORMASK || + disk->minors > MINORMASK + 1 || + disk->first_minor + disk->minors > MINORMASK + 1) + goto out; } else { if (WARN_ON(disk->minors)) - goto out_exit_elevator; + goto out; ret = blk_alloc_ext_minor(); if (ret < 0) - goto out_exit_elevator; + goto out; disk->major = BLOCK_EXT_MAJOR; disk->first_minor = ret; } @@ -442,6 +489,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, ddev->parent = parent; ddev->groups = groups; dev_set_name(ddev, "%s", disk->disk_name); + if (fwnode) + device_set_node(ddev, fwnode); if (!(disk->flags & GENHD_FL_HIDDEN)) ddev->devt = MKDEV(disk->major, disk->first_minor); ret = device_add(ddev); @@ -452,12 +501,10 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, if (ret) goto out_device_del; - if (!sysfs_deprecated) { - ret = sysfs_create_link(block_depr, &ddev->kobj, - kobject_name(&ddev->kobj)); - if (ret) - goto out_device_del; - } + ret = sysfs_create_link(block_depr, &ddev->kobj, + kobject_name(&ddev->kobj)); + if (ret) + goto out_device_del; /* * avoid probable deadlock caused by allocating memory with @@ -466,15 +513,11 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, */ pm_runtime_set_memalloc_noio(ddev, true); - ret = blk_integrity_add(disk); - if (ret) - goto out_del_block_link; - disk->part0->bd_holder_dir = kobject_create_and_add("holders", &ddev->kobj); if (!disk->part0->bd_holder_dir) { ret = -ENOMEM; - goto out_del_integrity; + goto out_del_block_link; } disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); if (!disk->slave_dir) { @@ -496,17 +539,6 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, &disk->bdi->dev->kobj, "bdi"); if (ret) goto out_unregister_bdi; - - bdev_add(disk->part0, ddev->devt); - if (get_capacity(disk)) - disk_scan_partitions(disk, FMODE_READ, NULL); - - /* - * Announce the disk and partitions after all partitions are - * created. (for hidden disks uevents remain suppressed forever) - */ - dev_set_uevent_suppress(ddev, 0); - disk_uevent(disk, KOBJ_ADD); } else { /* * Even if the block_device for a hidden gendisk is not @@ -515,10 +547,6 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, */ disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor); } - - disk_update_readahead(disk); - disk_add_events(disk); - set_bit(GD_ADDED, &disk->state); return 0; out_unregister_bdi: @@ -532,93 +560,175 @@ out_put_slave_dir: disk->slave_dir = NULL; out_put_holder_dir: kobject_put(disk->part0->bd_holder_dir); -out_del_integrity: - blk_integrity_del(disk); out_del_block_link: - if (!sysfs_deprecated) - sysfs_remove_link(block_depr, dev_name(ddev)); + sysfs_remove_link(block_depr, dev_name(ddev)); + pm_runtime_set_memalloc_noio(ddev, false); out_device_del: device_del(ddev); out_free_ext_minor: if (disk->major == BLOCK_EXT_MAJOR) blk_free_ext_minor(disk->first_minor); -out_exit_elevator: - if (disk->queue->elevator) - elevator_exit(disk->queue); +out: return ret; } -EXPORT_SYMBOL(device_add_disk); /** - * blk_mark_disk_dead - mark a disk as dead - * @disk: disk to mark as dead + * add_disk_fwnode - add disk information to kernel list with fwnode + * @parent: parent device for the disk + * @disk: per-device partitioning information + * @groups: Additional per-device sysfs groups + * @fwnode: attached disk fwnode * - * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O - * to this disk. + * This function registers the partitioning information in @disk + * with the kernel. Also attach a fwnode to the disk device. */ -void blk_mark_disk_dead(struct gendisk *disk) +int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode) { - set_bit(GD_DEAD, &disk->state); - blk_queue_start_drain(disk->queue); + struct blk_mq_tag_set *set; + unsigned int memflags; + int ret; + + if (queue_is_mq(disk->queue)) { + set = disk->queue->tag_set; + memflags = memalloc_noio_save(); + down_read(&set->update_nr_hwq_lock); + ret = __add_disk(parent, disk, groups, fwnode); + up_read(&set->update_nr_hwq_lock); + memalloc_noio_restore(memflags); + } else { + ret = __add_disk(parent, disk, groups, fwnode); + } /* - * Stop buffered writers from dirtying pages that can't be written out. + * add_disk_final() needn't to read `nr_hw_queues`, so move it out + * of read lock `set->update_nr_hwq_lock` for avoiding unnecessary + * lock dependency on `disk->open_mutex` from scanning partition. */ - set_capacity_and_notify(disk, 0); + if (!ret) + add_disk_final(disk); + return ret; } -EXPORT_SYMBOL_GPL(blk_mark_disk_dead); +EXPORT_SYMBOL_GPL(add_disk_fwnode); /** - * del_gendisk - remove the gendisk - * @disk: the struct gendisk to remove - * - * Removes the gendisk and all its associated resources. This deletes the - * partitions associated with the gendisk, and unregisters the associated - * request_queue. - * - * This is the counter to the respective __device_add_disk() call. - * - * The final removal of the struct gendisk happens when its refcount reaches 0 - * with put_disk(), which should be called after del_gendisk(), if - * __device_add_disk() was used. + * device_add_disk - add disk information to kernel list + * @parent: parent device for the disk + * @disk: per-device partitioning information + * @groups: Additional per-device sysfs groups * - * Drivers exist which depend on the release of the gendisk to be synchronous, - * it should not be deferred. + * This function registers the partitioning information in @disk + * with the kernel. + */ +int __must_check device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) +{ + return add_disk_fwnode(parent, disk, groups, NULL); +} +EXPORT_SYMBOL(device_add_disk); + +static void blk_report_disk_dead(struct gendisk *disk, bool surprise) +{ + struct block_device *bdev; + unsigned long idx; + + /* + * On surprise disk removal, bdev_mark_dead() may call into file + * systems below. Make it clear that we're expecting to not hold + * disk->open_mutex. + */ + lockdep_assert_not_held(&disk->open_mutex); + + rcu_read_lock(); + xa_for_each(&disk->part_tbl, idx, bdev) { + if (!kobject_get_unless_zero(&bdev->bd_device.kobj)) + continue; + rcu_read_unlock(); + + bdev_mark_dead(bdev, surprise); + + put_device(&bdev->bd_device); + rcu_read_lock(); + } + rcu_read_unlock(); +} + +static bool __blk_mark_disk_dead(struct gendisk *disk) +{ + /* + * Fail any new I/O. + */ + if (test_and_set_bit(GD_DEAD, &disk->state)) + return false; + + if (test_bit(GD_OWNS_QUEUE, &disk->state)) + blk_queue_flag_set(QUEUE_FLAG_DYING, disk->queue); + + /* + * Stop buffered writers from dirtying pages that can't be written out. + */ + set_capacity(disk, 0); + + /* + * Prevent new I/O from crossing bio_queue_enter(). + */ + return blk_queue_start_drain(disk->queue); +} + +/** + * blk_mark_disk_dead - mark a disk as dead + * @disk: disk to mark as dead * - * Context: can sleep + * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O + * to this disk. */ -void del_gendisk(struct gendisk *disk) +void blk_mark_disk_dead(struct gendisk *disk) +{ + __blk_mark_disk_dead(disk); + blk_report_disk_dead(disk, true); +} +EXPORT_SYMBOL_GPL(blk_mark_disk_dead); + +static void __del_gendisk(struct gendisk *disk) { struct request_queue *q = disk->queue; + struct block_device *part; + unsigned long idx; + bool start_drain; might_sleep(); if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN))) return; - blk_integrity_del(disk); disk_del_events(disk); + /* + * Prevent new openers by unlinked the bdev inode. + */ mutex_lock(&disk->open_mutex); - remove_inode_hash(disk->part0->bd_inode); - blk_drop_partitions(disk); + xa_for_each(&disk->part_tbl, idx, part) + bdev_unhash(part); mutex_unlock(&disk->open_mutex); - fsync_bdev(disk->part0); - __invalidate_device(disk->part0, true); - /* - * Fail any new I/O. + * Tell the file system to write back all dirty data and shut down if + * it hasn't been notified earlier. */ - set_bit(GD_DEAD, &disk->state); - if (test_bit(GD_OWNS_QUEUE, &disk->state)) - blk_queue_flag_set(QUEUE_FLAG_DYING, q); - set_capacity(disk, 0); + if (!test_bit(GD_DEAD, &disk->state)) + blk_report_disk_dead(disk, false); /* - * Prevent new I/O from crossing bio_queue_enter(). + * Drop all partitions now that the disk is marked dead. */ - blk_queue_start_drain(q); + mutex_lock(&disk->open_mutex); + start_drain = __blk_mark_disk_dead(disk); + if (start_drain) + blk_freeze_acquire_lock(q); + xa_for_each_start(&disk->part_tbl, idx, part, 1) + drop_partition(part); + mutex_unlock(&disk->open_mutex); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); @@ -638,8 +748,7 @@ void del_gendisk(struct gendisk *disk) part_stat_set_all(disk->part0, 0); disk->part0->bd_stamp = 0; - if (!sysfs_deprecated) - sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); + sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); @@ -653,25 +762,67 @@ void del_gendisk(struct gendisk *disk) if (queue_is_mq(q)) blk_mq_cancel_work_sync(q); - blk_mq_quiesce_queue(q); - if (q->elevator) { - mutex_lock(&q->sysfs_lock); - elevator_exit(q); - mutex_unlock(&q->sysfs_lock); - } rq_qos_exit(q); - blk_mq_unquiesce_queue(q); /* * If the disk does not own the queue, allow using passthrough requests * again. Else leave the queue frozen to fail all I/O. */ - if (!test_bit(GD_OWNS_QUEUE, &disk->state)) { - blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); + if (!test_bit(GD_OWNS_QUEUE, &disk->state)) __blk_mq_unfreeze_queue(q, true); + else if (queue_is_mq(q)) + blk_mq_exit_queue(q); + + if (start_drain) + blk_unfreeze_release_lock(q); +} + +static void disable_elv_switch(struct request_queue *q) +{ + struct blk_mq_tag_set *set = q->tag_set; + WARN_ON_ONCE(!queue_is_mq(q)); + + down_write(&set->update_nr_hwq_lock); + blk_queue_flag_set(QUEUE_FLAG_NO_ELV_SWITCH, q); + up_write(&set->update_nr_hwq_lock); +} + +/** + * del_gendisk - remove the gendisk + * @disk: the struct gendisk to remove + * + * Removes the gendisk and all its associated resources. This deletes the + * partitions associated with the gendisk, and unregisters the associated + * request_queue. + * + * This is the counter to the respective device_add_disk() call. + * + * The final removal of the struct gendisk happens when its refcount reaches 0 + * with put_disk(), which should be called after del_gendisk(), if + * device_add_disk() was used. + * + * Drivers exist which depend on the release of the gendisk to be synchronous, + * it should not be deferred. + * + * Context: can sleep + */ +void del_gendisk(struct gendisk *disk) +{ + struct blk_mq_tag_set *set; + unsigned int memflags; + + if (!queue_is_mq(disk->queue)) { + __del_gendisk(disk); } else { - if (queue_is_mq(q)) - blk_mq_exit_queue(q); + set = disk->queue->tag_set; + + disable_elv_switch(disk->queue); + + memflags = memalloc_noio_save(); + down_read(&set->update_nr_hwq_lock); + __del_gendisk(disk); + up_read(&set->update_nr_hwq_lock); + memalloc_noio_restore(memflags); } } EXPORT_SYMBOL(del_gendisk); @@ -691,7 +842,7 @@ void invalidate_disk(struct gendisk *disk) struct block_device *bdev = disk->part0; invalidate_bdev(bdev); - bdev->bd_inode->i_mapping->wb_err = 0; + bdev->bd_mapping->wb_err = 0; set_capacity(disk, 0); } EXPORT_SYMBOL(invalidate_disk); @@ -704,7 +855,7 @@ static ssize_t disk_badblocks_show(struct device *dev, struct gendisk *disk = dev_to_disk(dev); if (!disk->bb) - return sprintf(page, "\n"); + return sysfs_emit(page, "\n"); return badblocks_show(disk->bb, page, 0); } @@ -722,7 +873,7 @@ static ssize_t disk_badblocks_store(struct device *dev, } #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD -void blk_request_module(dev_t devt) +static bool blk_probe_dev(dev_t devt) { unsigned int major = MAJOR(devt); struct blk_major_name **n; @@ -732,67 +883,28 @@ void blk_request_module(dev_t devt) if ((*n)->major == major && (*n)->probe) { (*n)->probe(devt); mutex_unlock(&major_names_lock); - return; + return true; } } mutex_unlock(&major_names_lock); - - if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) - /* Make old-style 2.4 aliases work */ - request_module("block-major-%d", MAJOR(devt)); + return false; } -#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */ -/* - * print a full list of all partitions - intended for places where the root - * filesystem can't be mounted and thus to give the victim some idea of what - * went wrong - */ -void __init printk_all_partitions(void) +void blk_request_module(dev_t devt) { - struct class_dev_iter iter; - struct device *dev; - - class_dev_iter_init(&iter, &block_class, NULL, &disk_type); - while ((dev = class_dev_iter_next(&iter))) { - struct gendisk *disk = dev_to_disk(dev); - struct block_device *part; - char devt_buf[BDEVT_SIZE]; - unsigned long idx; + int error; - /* - * Don't show empty devices or things that have been - * suppressed - */ - if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN)) - continue; + if (blk_probe_dev(devt)) + return; - /* - * Note, unlike /proc/partitions, I am showing the numbers in - * hex - the same format as the root= option takes. - */ - rcu_read_lock(); - xa_for_each(&disk->part_tbl, idx, part) { - if (!bdev_nr_sectors(part)) - continue; - printk("%s%s %10llu %pg %s", - bdev_is_partition(part) ? " " : "", - bdevt_str(part->bd_dev, devt_buf), - bdev_nr_sectors(part) >> 1, part, - part->bd_meta_info ? - part->bd_meta_info->uuid : ""); - if (bdev_is_partition(part)) - printk("\n"); - else if (dev->parent && dev->parent->driver) - printk(" driver: %s\n", - dev->parent->driver->name); - else - printk(" (driver?)\n"); - } - rcu_read_unlock(); - } - class_dev_iter_exit(&iter); + error = request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)); + /* Make old-style 2.4 aliases work */ + if (error > 0) + error = request_module("block-major-%d", MAJOR(devt)); + if (!error) + blk_probe_dev(devt); } +#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */ #ifdef CONFIG_PROC_FS /* iterator */ @@ -884,7 +996,6 @@ static int __init genhd_device_init(void) { int error; - block_class.dev_kobj = sysfs_dev_block_kobj; error = class_register(&block_class); if (unlikely(error)) return error; @@ -893,8 +1004,7 @@ static int __init genhd_device_init(void) register_blkdev(BLOCK_EXT_MAJOR, "blkext"); /* create top-level block dir */ - if (!sysfs_deprecated) - block_depr = kobject_create_and_add("block", NULL); + block_depr = kobject_create_and_add("block", NULL); return 0; } @@ -905,7 +1015,7 @@ static ssize_t disk_range_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", disk->minors); + return sysfs_emit(buf, "%d\n", disk->minors); } static ssize_t disk_ext_range_show(struct device *dev, @@ -913,7 +1023,7 @@ static ssize_t disk_ext_range_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", + return sysfs_emit(buf, "%d\n", (disk->flags & GENHD_FL_NO_PART) ? 1 : DISK_MAX_PARTS); } @@ -922,7 +1032,7 @@ static ssize_t disk_removable_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", + return sysfs_emit(buf, "%d\n", (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); } @@ -931,7 +1041,7 @@ static ssize_t disk_hidden_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", + return sysfs_emit(buf, "%d\n", (disk->flags & GENHD_FL_HIDDEN ? 1 : 0)); } @@ -940,35 +1050,30 @@ static ssize_t disk_ro_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); + return sysfs_emit(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); } ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev))); + return sysfs_emit(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev))); } ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct block_device *bdev = dev_to_bdev(dev); - struct request_queue *q = bdev_get_queue(bdev); struct disk_stats stat; unsigned int inflight; - if (queue_is_mq(q)) - inflight = blk_mq_in_flight(q, bdev); - else - inflight = part_in_flight(bdev); - + inflight = bdev_count_inflight(bdev); if (inflight) { part_stat_lock(); update_io_ticks(bdev, jiffies, true); part_stat_unlock(); } part_stat_read_all(bdev, &stat); - return sprintf(buf, + return sysfs_emit(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " "%8u %8u %8u " @@ -998,27 +1103,28 @@ ssize_t part_stat_show(struct device *dev, (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC)); } +/* + * Show the number of IOs issued to driver. + * For bio-based device, started from bdev_start_io_acct(); + * For rq-based device, started from blk_mq_start_request(); + */ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf) { struct block_device *bdev = dev_to_bdev(dev); struct request_queue *q = bdev_get_queue(bdev); - unsigned int inflight[2]; + unsigned int inflight[2] = {0}; - if (queue_is_mq(q)) - blk_mq_in_flight_rw(q, bdev, inflight); - else - part_in_flight_rw(bdev, inflight); + bdev_count_inflight_rw(bdev, inflight, queue_is_mq(q)); - return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); + return sysfs_emit(buf, "%8u %8u\n", inflight[READ], inflight[WRITE]); } static ssize_t disk_capability_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct gendisk *disk = dev_to_disk(dev); - - return sprintf(buf, "%x\n", disk->flags); + dev_warn_once(dev, "the capability attribute has been deprecated.\n"); + return sysfs_emit(buf, "0\n"); } static ssize_t disk_alignment_offset_show(struct device *dev, @@ -1027,7 +1133,7 @@ static ssize_t disk_alignment_offset_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0)); + return sysfs_emit(buf, "%d\n", bdev_alignment_offset(disk->part0)); } static ssize_t disk_discard_alignment_show(struct device *dev, @@ -1036,7 +1142,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0)); + return sysfs_emit(buf, "%d\n", bdev_alignment_offset(disk->part0)); } static ssize_t diskseq_show(struct device *dev, @@ -1044,7 +1150,13 @@ static ssize_t diskseq_show(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%llu\n", disk->diskseq); + return sysfs_emit(buf, "%llu\n", disk->diskseq); +} + +static ssize_t partscan_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%u\n", disk_has_partscan(dev_to_disk(dev))); } static DEVICE_ATTR(range, 0444, disk_range_show, NULL); @@ -1060,12 +1172,14 @@ static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL); +static DEVICE_ATTR(partscan, 0444, partscan_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail); + return sysfs_emit(buf, "%d\n", + bdev_test_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL)); } ssize_t part_fail_store(struct device *dev, @@ -1074,9 +1188,12 @@ ssize_t part_fail_store(struct device *dev, { int i; - if (count > 0 && sscanf(buf, "%d", &i) > 0) - dev_to_bdev(dev)->bd_make_it_fail = i; - + if (count > 0 && sscanf(buf, "%d", &i) > 0) { + if (i) + bdev_set_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL); + else + bdev_clear_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL); + } return count; } @@ -1106,6 +1223,7 @@ static struct attribute *disk_attrs[] = { &dev_attr_events_async.attr, &dev_attr_events_poll_msecs.attr, &dev_attr_diskseq.attr, + &dev_attr_partscan.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif @@ -1135,6 +1253,9 @@ static const struct attribute_group *disk_attr_groups[] = { #ifdef CONFIG_BLK_DEV_IO_TRACE &blk_trace_attr_group, #endif +#ifdef CONFIG_BLK_DEV_INTEGRITY + &blk_integrity_attr_group, +#endif NULL }; @@ -1144,7 +1265,7 @@ static const struct attribute_group *disk_attr_groups[] = { * * This function releases all allocated resources of the gendisk. * - * Drivers which used __device_add_disk() have a gendisk with a request_queue + * Drivers which used device_add_disk() have a gendisk with a request_queue * assigned. Since the request_queue sits on top of the gendisk for these * drivers we also call blk_put_queue() for them, and we expect the * request_queue refcount to reach 0 at this point, and so the request_queue @@ -1159,6 +1280,8 @@ static void disk_release(struct device *dev) might_sleep(); WARN_ON_ONCE(disk_live(disk)); + blk_trace_remove(disk->queue); + /* * To undo the all initialization from blk_mq_init_allocated_queue in * case of a probe failure where add_disk is never called we have to @@ -1177,16 +1300,17 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); - disk_free_zone_bitmaps(disk); + disk_free_zone_resources(disk); xa_destroy(&disk->part_tbl); + kobject_put(&disk->queue_kobj); disk->queue->disk = NULL; blk_put_queue(disk->queue); if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk) disk->fops->free_disk(disk); - iput(disk->part0->bd_inode); /* frees the disk */ + bdev_drop(disk->part0); /* frees the disk */ } static int block_uevent(const struct device *dev, struct kobj_uevent_env *env) @@ -1196,12 +1320,12 @@ static int block_uevent(const struct device *dev, struct kobj_uevent_env *env) return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq); } -struct class block_class = { +const struct class block_class = { .name = "block", .dev_uevent = block_uevent, }; -static char *block_devnode(struct device *dev, umode_t *mode, +static char *block_devnode(const struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid) { struct gendisk *disk = dev_to_disk(dev); @@ -1246,51 +1370,43 @@ static int diskstats_show(struct seq_file *seqf, void *v) xa_for_each(&gp->part_tbl, idx, hd) { if (bdev_is_partition(hd) && !bdev_nr_sectors(hd)) continue; - if (queue_is_mq(gp->queue)) - inflight = blk_mq_in_flight(gp->queue, hd); - else - inflight = part_in_flight(hd); + inflight = bdev_count_inflight(hd); if (inflight) { part_stat_lock(); update_io_ticks(hd, jiffies, true); part_stat_unlock(); } part_stat_read_all(hd, &stat); - seq_printf(seqf, "%4d %7d %pg " - "%lu %lu %lu %u " - "%lu %lu %lu %u " - "%u %u %u " - "%lu %lu %lu %u " - "%lu %u" - "\n", - MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd, - stat.ios[STAT_READ], - stat.merges[STAT_READ], - stat.sectors[STAT_READ], - (unsigned int)div_u64(stat.nsecs[STAT_READ], - NSEC_PER_MSEC), - stat.ios[STAT_WRITE], - stat.merges[STAT_WRITE], - stat.sectors[STAT_WRITE], - (unsigned int)div_u64(stat.nsecs[STAT_WRITE], - NSEC_PER_MSEC), - inflight, - jiffies_to_msecs(stat.io_ticks), - (unsigned int)div_u64(stat.nsecs[STAT_READ] + - stat.nsecs[STAT_WRITE] + - stat.nsecs[STAT_DISCARD] + - stat.nsecs[STAT_FLUSH], - NSEC_PER_MSEC), - stat.ios[STAT_DISCARD], - stat.merges[STAT_DISCARD], - stat.sectors[STAT_DISCARD], - (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], - NSEC_PER_MSEC), - stat.ios[STAT_FLUSH], - (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], - NSEC_PER_MSEC) - ); + seq_put_decimal_ull_width(seqf, "", MAJOR(hd->bd_dev), 4); + seq_put_decimal_ull_width(seqf, " ", MINOR(hd->bd_dev), 7); + seq_printf(seqf, " %pg", hd); + seq_put_decimal_ull(seqf, " ", stat.ios[STAT_READ]); + seq_put_decimal_ull(seqf, " ", stat.merges[STAT_READ]); + seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_READ]); + seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_READ], + NSEC_PER_MSEC)); + seq_put_decimal_ull(seqf, " ", stat.ios[STAT_WRITE]); + seq_put_decimal_ull(seqf, " ", stat.merges[STAT_WRITE]); + seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_WRITE]); + seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_WRITE], + NSEC_PER_MSEC)); + seq_put_decimal_ull(seqf, " ", inflight); + seq_put_decimal_ull(seqf, " ", jiffies_to_msecs(stat.io_ticks)); + seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_READ] + + stat.nsecs[STAT_WRITE] + + stat.nsecs[STAT_DISCARD] + + stat.nsecs[STAT_FLUSH], + NSEC_PER_MSEC)); + seq_put_decimal_ull(seqf, " ", stat.ios[STAT_DISCARD]); + seq_put_decimal_ull(seqf, " ", stat.merges[STAT_DISCARD]); + seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_DISCARD]); + seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], + NSEC_PER_MSEC)); + seq_put_decimal_ull(seqf, " ", stat.ios[STAT_FLUSH]); + seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], + NSEC_PER_MSEC)); + seq_putc(seqf, '\n'); } rcu_read_unlock(); @@ -1327,35 +1443,6 @@ dev_t part_devt(struct gendisk *disk, u8 partno) return devt; } -dev_t blk_lookup_devt(const char *name, int partno) -{ - dev_t devt = MKDEV(0, 0); - struct class_dev_iter iter; - struct device *dev; - - class_dev_iter_init(&iter, &block_class, NULL, &disk_type); - while ((dev = class_dev_iter_next(&iter))) { - struct gendisk *disk = dev_to_disk(dev); - - if (strcmp(dev_name(dev), name)) - continue; - - if (partno < disk->minors) { - /* We need to return the right devno, even - * if the partition doesn't exist yet. - */ - devt = MKDEV(MAJOR(dev->devt), - MINOR(dev->devt) + partno); - } else { - devt = part_devt(disk, partno); - if (devt) - break; - } - } - class_dev_iter_exit(&iter); - return devt; -} - struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass) { @@ -1388,6 +1475,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, if (blkcg_init_disk(disk)) goto out_erase_part0; + disk_init_zone_resources(disk); rand_initialize_disk(disk); disk_to_dev(disk)->class = &block_class; disk_to_dev(disk)->type = &disk_type; @@ -1398,6 +1486,8 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED INIT_LIST_HEAD(&disk->slave_bdevs); #endif + mutex_init(&disk->rqos_state_mutex); + kobject_init(&disk->queue_kobj, &blk_queue_ktype); return disk; out_erase_part0: @@ -1405,7 +1495,7 @@ out_erase_part0: out_destroy_part_tbl: xa_destroy(&disk->part_tbl); disk->part0->bd_disk = NULL; - iput(disk->part0->bd_inode); + bdev_drop(disk->part0); out_free_bdi: bdi_put(disk->bdi); out_free_bioset: @@ -1415,19 +1505,21 @@ out_free_disk: return NULL; } -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass) +struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, + struct lock_class_key *lkclass) { + struct queue_limits default_lim = { }; struct request_queue *q; struct gendisk *disk; - q = blk_alloc_queue(node); - if (!q) - return NULL; + q = blk_alloc_queue(lim ? lim : &default_lim, node); + if (IS_ERR(q)) + return ERR_CAST(q); disk = __alloc_disk_node(q, node, lkclass); if (!disk) { blk_put_queue(q); - return NULL; + return ERR_PTR(-ENOMEM); } set_bit(GD_OWNS_QUEUE, &disk->state); return disk; |
