summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2020-11-26 18:43:37 +0100
committerJens Axboe <axboe@kernel.dk>2020-12-01 14:53:40 -0700
commita782483cc1f875355690625d8253a232f2581418 (patch)
tree492d3bf67cdd2535afd826dde93aa35dcfb42180 /block
parente6cb53827ed60019bbbc5cf189dd204b3b0e8121 (diff)
block: remove the nr_sects field in struct hd_struct
Now that the hd_struct always has a block device attached to it, there is no need for having two size field that just get out of sync. Additionally the field in hd_struct did not use proper serialization, possibly allowing for torn writes. By only using the block_device field this problem also gets fixed. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Reviewed-by: Jan Kara <jack@suse.cz> Reviewed-by: Hannes Reinecke <hare@suse.de> Acked-by: Coly Li <colyli@suse.de> [bcache] Acked-by: Chao Yu <yuchao0@huawei.com> [f2fs] Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/bio.c4
-rw-r--r--block/blk-core.c2
-rw-r--r--block/blk.h53
-rw-r--r--block/genhd.c59
-rw-r--r--block/partitions/core.c17
5 files changed, 49 insertions, 86 deletions
diff --git a/block/bio.c b/block/bio.c
index fa01bef35bb1..669bb47a3198 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -613,8 +613,8 @@ void guard_bio_eod(struct bio *bio)
rcu_read_lock();
part = __disk_get_part(bio->bi_disk, bio->bi_partno);
if (part)
- maxsector = part_nr_sects_read(part);
- else
+ maxsector = bdev_nr_sectors(part->bdev);
+ else
maxsector = get_capacity(bio->bi_disk);
rcu_read_unlock();
diff --git a/block/blk-core.c b/block/blk-core.c
index 2db8bda43b6e..988f45094a38 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -755,7 +755,7 @@ static inline int blk_partition_remap(struct bio *bio)
goto out;
if (bio_sectors(bio)) {
- if (bio_check_eod(bio, part_nr_sects_read(p)))
+ if (bio_check_eod(bio, bdev_nr_sectors(p->bdev)))
goto out;
bio->bi_iter.bi_sector += p->start_sect;
trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
diff --git a/block/blk.h b/block/blk.h
index c4839abcfa27..09cee7024fb4 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -387,59 +387,6 @@ static inline void hd_free_part(struct hd_struct *part)
percpu_ref_exit(&part->ref);
}
-/*
- * Any access of part->nr_sects which is not protected by partition
- * bd_mutex or gendisk bdev bd_mutex, should be done using this
- * accessor function.
- *
- * Code written along the lines of i_size_read() and i_size_write().
- * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
- * on.
- */
-static inline sector_t part_nr_sects_read(struct hd_struct *part)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- sector_t nr_sects;
- unsigned seq;
- do {
- seq = read_seqcount_begin(&part->nr_sects_seq);
- nr_sects = part->nr_sects;
- } while (read_seqcount_retry(&part->nr_sects_seq, seq));
- return nr_sects;
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
- sector_t nr_sects;
-
- preempt_disable();
- nr_sects = part->nr_sects;
- preempt_enable();
- return nr_sects;
-#else
- return part->nr_sects;
-#endif
-}
-
-/*
- * Should be called with mutex lock held (typically bd_mutex) of partition
- * to provide mutual exlusion among writers otherwise seqcount might be
- * left in wrong state leaving the readers spinning infinitely.
- */
-static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- preempt_disable();
- write_seqcount_begin(&part->nr_sects_seq);
- part->nr_sects = size;
- write_seqcount_end(&part->nr_sects_seq);
- preempt_enable();
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
- preempt_disable();
- part->nr_sects = size;
- preempt_enable();
-#else
- part->nr_sects = size;
-#endif
-}
-
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page);
diff --git a/block/genhd.c b/block/genhd.c
index bf8fa82f135f..c65f485b9db5 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -40,6 +40,16 @@ static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);
+void set_capacity(struct gendisk *disk, sector_t sectors)
+{
+ struct block_device *bdev = disk->part0.bdev;
+
+ spin_lock(&bdev->bd_size_lock);
+ i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+ spin_unlock(&bdev->bd_size_lock);
+}
+EXPORT_SYMBOL(set_capacity);
+
/*
* Set disk capacity and notify if the size is not currently zero and will not
* be set to zero. Returns true if a uevent was sent, otherwise false.
@@ -47,18 +57,30 @@ static void disk_release_events(struct gendisk *disk);
bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
{
sector_t capacity = get_capacity(disk);
+ char *envp[] = { "RESIZE=1", NULL };
set_capacity(disk, size);
- revalidate_disk_size(disk, true);
- if (capacity != size && capacity != 0 && size != 0) {
- char *envp[] = { "RESIZE=1", NULL };
+ /*
+ * Only print a message and send a uevent if the gendisk is user visible
+ * and alive. This avoids spamming the log and udev when setting the
+ * initial capacity during probing.
+ */
+ if (size == capacity ||
+ (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
+ return false;
- kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
- return true;
- }
+ pr_info("%s: detected capacity change from %lld to %lld\n",
+ disk->disk_name, size, capacity);
- return false;
+ /*
+ * Historically we did not send a uevent for changes to/from an empty
+ * device.
+ */
+ if (!capacity || !size)
+ return false;
+ kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+ return true;
}
EXPORT_SYMBOL_GPL(set_capacity_and_notify);
@@ -247,7 +269,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
part = rcu_dereference(ptbl->part[piter->idx]);
if (!part)
continue;
- if (!part_nr_sects_read(part) &&
+ if (!bdev_nr_sectors(part->bdev) &&
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
piter->idx == 0))
@@ -284,7 +306,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
return part->start_sect <= sector &&
- sector < part->start_sect + part_nr_sects_read(part);
+ sector < part->start_sect + bdev_nr_sectors(part->bdev);
}
/**
@@ -986,8 +1008,8 @@ void __init printk_all_partitions(void)
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
bdevt_str(part_devt(part), devt_buf),
- (unsigned long long)part_nr_sects_read(part) >> 1
- , disk_name(disk, part->partno, name_buf),
+ bdev_nr_sectors(part->bdev) >> 1,
+ disk_name(disk, part->partno, name_buf),
part->info ? part->info->uuid : "");
if (is_part0) {
if (dev->parent && dev->parent->driver)
@@ -1079,7 +1101,7 @@ static int show_partition(struct seq_file *seqf, void *v)
while ((part = disk_part_iter_next(&piter)))
seq_printf(seqf, "%4d %7d %10llu %s\n",
MAJOR(part_devt(part)), MINOR(part_devt(part)),
- (unsigned long long)part_nr_sects_read(part) >> 1,
+ bdev_nr_sectors(part->bdev) >> 1,
disk_name(sgp, part->partno, buf));
disk_part_iter_exit(&piter);
@@ -1161,8 +1183,7 @@ ssize_t part_size_show(struct device *dev,
{
struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%llu\n",
- (unsigned long long)part_nr_sects_read(p));
+ return sprintf(buf, "%llu\n", bdev_nr_sectors(p->bdev));
}
ssize_t part_stat_show(struct device *dev,
@@ -1618,16 +1639,6 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
ptbl = rcu_dereference_protected(disk->part_tbl, 1);
rcu_assign_pointer(ptbl->part[0], &disk->part0);
- /*
- * set_capacity() and get_capacity() currently don't use
- * seqcounter to read/update the part0->nr_sects. Still init
- * the counter as we can read the sectors in IO submission
- * patch using seqence counters.
- *
- * TODO: Ideally set_capacity() and get_capacity() should be
- * converted to make use of bd_mutex and sequence counters.
- */
- hd_sects_seq_init(&disk->part0);
if (hd_ref_init(&disk->part0))
goto out_free_bdstats;
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 696bd9ff63c6..bcfa8215bd5e 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -85,6 +85,13 @@ static int (*check_part[])(struct parsed_partitions *) = {
NULL
};
+static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
+{
+ spin_lock(&bdev->bd_size_lock);
+ i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+ spin_unlock(&bdev->bd_size_lock);
+}
+
static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
{
struct parsed_partitions *state;
@@ -295,7 +302,7 @@ static void hd_struct_free_work(struct work_struct *work)
put_device(disk_to_dev(disk));
part->start_sect = 0;
- part->nr_sects = 0;
+ bdev_set_nr_sectors(part->bdev, 0);
part_stat_set_all(part, 0);
put_device(part_to_dev(part));
}
@@ -412,11 +419,10 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
goto out_free_stats;
p->bdev = bdev;
- hd_sects_seq_init(p);
pdev = part_to_dev(p);
p->start_sect = start;
- p->nr_sects = len;
+ bdev_set_nr_sectors(bdev, len);
p->partno = partno;
p->policy = get_disk_ro(disk);
@@ -509,7 +515,7 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start,
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
while ((part = disk_part_iter_next(&piter))) {
if (part->partno == skip_partno ||
- start >= part->start_sect + part->nr_sects ||
+ start >= part->start_sect + bdev_nr_sectors(part->bdev) ||
start + length <= part->start_sect)
continue;
overlap = true;
@@ -600,8 +606,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
if (partition_overlaps(bdev->bd_disk, start, length, partno))
goto out_unlock;
- part_nr_sects_write(part, length);
- bd_set_nr_sectors(bdevp, length);
+ bdev_set_nr_sectors(bdevp, length);
ret = 0;
out_unlock: