summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2018-12-19 09:13:34 -0700
committerJens Axboe <axboe@kernel.dk>2018-12-19 09:13:34 -0700
commitdbe3ece1287dafe4113c64ada3113c39f344c64a (patch)
tree380a25d45315b9d1d6fbf2695f09010b34013975
parent499aeb45b2e225e423ef333ec68bd9b33a42c8be (diff)
dm: don't reuse bio for flushes
DM currently has a statically allocated bio that it uses to issue empty flushes. It doesn't submit this bio, it just uses it for maintaining state while setting up clones. Multiple users can access this bio at the same time. This wasn't previously an issue, even if it was a bit iffy, but with the blkg associations it can become one. We setup the blkg association, then clone bio's and submit, then remove the blkg assocation again. But since we can have multiple tasks doing this at the same time, against multiple blkg's, then we can either lose references to a blkg, or put it twice. The latter causes complaints on the percpu ref being <= 0 when released, and can cause use-after-free as well. Ming reports that xfstest generic/475 triggers this: ------------[ cut here ]------------ percpu ref (blkg_release) <= 0 (0) after switching to atomic WARNING: CPU: 13 PID: 0 at lib/percpu-refcount.c:155 percpu_ref_switch_to_atomic_rcu+0x2c9/0x4a0 Switch to just using an on-stack bio for this, and get rid of the embedded bio. Fixes: 5cdf2e3fea5e ("blkcg: associate blkg when associating a device") Reported-by: Ming Lei <ming.lei@redhat.com> Tested-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/md/dm-core.h3
-rw-r--r--drivers/md/dm.c35
2 files changed, 25 insertions, 13 deletions
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 6fe883fac471..95c6d86ab5e8 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -106,9 +106,6 @@ struct mapped_device {
struct block_device *bdev;
- /* zero-length flush that will be cloned and submitted to targets */
- struct bio flush_bio;
-
struct dm_stats stats;
/* for blk-mq request-based DM support */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dddbca63e140..f588a6a83d80 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1420,11 +1420,11 @@ static int __send_empty_flush(struct clone_info *ci)
struct dm_target *ti;
/*
- * Empty flush uses a statically initialized bio, &md->flush_bio, as
- * the base for cloning. However, blkg association requires that a
- * bdev is associated with a gendisk, which doesn't happen until the
- * bdev is opened. So, blkg association is done at issue time of the
- * flush rather than when the device is created in alloc_dev().
+ * Empty flush uses a statically initialized bio, as the base for
+ * cloning. However, blkg association requires that a bdev is
+ * associated with a gendisk, which doesn't happen until the bdev is
+ * opened. So, blkg association is done at issue time of the flush
+ * rather than when the device is created in alloc_dev().
*/
bio_set_dev(ci->bio, ci->io->md->bdev);
@@ -1609,7 +1609,16 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
- ci.bio = &ci.io->md->flush_bio;
+ struct bio flush_bio;
+
+ /*
+ * Use an on-stack bio for this, it's safe since we don't
+ * need to reference it after submit. It's just used as
+ * the basis for the clone(s).
+ */
+ bio_init(&flush_bio, NULL, 0);
+ flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+ ci.bio = &flush_bio;
ci.sector_count = 0;
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
@@ -1665,7 +1674,16 @@ static blk_qc_t __process_bio(struct mapped_device *md,
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
- ci.bio = &ci.io->md->flush_bio;
+ struct bio flush_bio;
+
+ /*
+ * Use an on-stack bio for this, it's safe since we don't
+ * need to reference it after submit. It's just used as
+ * the basis for the clone(s).
+ */
+ bio_init(&flush_bio, NULL, 0);
+ flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+ ci.bio = &flush_bio;
ci.sector_count = 0;
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
@@ -1949,9 +1967,6 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->bdev)
goto bad;
- bio_init(&md->flush_bio, NULL, 0);
- md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
-
dm_stats_init(&md->stats);
/* Populate the mapping, nobody knows we exist yet */