diff options
Diffstat (limited to 'drivers/md/dm-log-writes.c')
| -rw-r--r-- | drivers/md/dm-log-writes.c | 304 |
1 files changed, 220 insertions, 84 deletions
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index a1da0eb58a93..7bb7174f8f4f 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2014 Facebook. All rights reserved. * @@ -10,9 +11,11 @@ #include <linux/init.h> #include <linux/blkdev.h> #include <linux/bio.h> +#include <linux/dax.h> #include <linux/slab.h> #include <linux/kthread.h> #include <linux/freezer.h> +#include <linux/uio.h> #define DM_MSG_PREFIX "log-writes" @@ -38,7 +41,7 @@ * * Would result in the log looking like this: * - * c,a,flush,fuad,b,<other writes>,<next flush> + * c,a,b,flush,fuad,<other writes>,<next flush> * * This is meant to help expose problems where file systems do not properly wait * on data being written before invoking a FLUSH. FUA bypasses cache so once it @@ -50,13 +53,15 @@ * in fact we want to do the data and the discard in the order that they * completed. */ -#define LOG_FLUSH_FLAG (1 << 0) -#define LOG_FUA_FLAG (1 << 1) -#define LOG_DISCARD_FLAG (1 << 2) -#define LOG_MARK_FLAG (1 << 3) +#define LOG_FLUSH_FLAG (1 << 0) +#define LOG_FUA_FLAG (1 << 1) +#define LOG_DISCARD_FLAG (1 << 2) +#define LOG_MARK_FLAG (1 << 3) +#define LOG_METADATA_FLAG (1 << 4) #define WRITE_LOG_VERSION 1ULL #define WRITE_LOG_MAGIC 0x6a736677736872ULL +#define WRITE_LOG_SUPER_SECTOR 0 /* * The disk format for this is braindead simple. @@ -100,6 +105,7 @@ struct log_writes_c { struct dm_dev *logdev; u64 logged_entries; u32 sectorsize; + u32 sectorshift; atomic_t io_blocks; atomic_t pending_blocks; sector_t next_sector; @@ -111,6 +117,7 @@ struct log_writes_c { struct list_head logging_blocks; wait_queue_head_t wait; struct task_struct *log_kthread; + struct completion super_done; }; struct pending_block { @@ -121,13 +128,25 @@ struct pending_block { char *data; u32 datalen; struct list_head list; - struct bio_vec vecs[0]; + struct bio_vec vecs[]; }; struct per_bio_data { struct pending_block *block; }; +static inline sector_t bio_to_dev_sectors(struct log_writes_c *lc, + sector_t sectors) +{ + return sectors >> (lc->sectorshift - SECTOR_SHIFT); +} + +static inline sector_t dev_to_bio_sectors(struct log_writes_c *lc, + sector_t sectors) +{ + return sectors << (lc->sectorshift - SECTOR_SHIFT); +} + static void put_pending_block(struct log_writes_c *lc) { if (atomic_dec_and_test(&lc->pending_blocks)) { @@ -164,6 +183,14 @@ static void log_end_io(struct bio *bio) bio_put(bio); } +static void log_end_super(struct bio *bio) +{ + struct log_writes_c *lc = bio->bi_private; + + complete(&lc->super_done); + log_end_io(bio); +} + /* * Meant to be called if there is an error, it will free all the pages * associated with the block. @@ -191,17 +218,12 @@ static int write_metadata(struct log_writes_c *lc, void *entry, void *ptr; size_t ret; - bio = bio_alloc(GFP_KERNEL, 1); - if (!bio) { - DMERR("Couldn't alloc log bio"); - goto error; - } + bio = bio_alloc(lc->logdev->bdev, 1, REQ_OP_WRITE, GFP_KERNEL); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio->bi_bdev = lc->logdev->bdev; - bio->bi_end_io = log_end_io; + bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ? + log_end_super : log_end_io; bio->bi_private = lc; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); page = alloc_page(GFP_KERNEL); if (!page) { @@ -210,13 +232,13 @@ static int write_metadata(struct log_writes_c *lc, void *entry, goto error; } - ptr = kmap_atomic(page); + ptr = kmap_local_page(page); memcpy(ptr, entry, entrylen); if (datalen) memcpy(ptr + entrylen, data, datalen); memset(ptr + entrylen + datalen, 0, lc->sectorsize - entrylen - datalen); - kunmap_atomic(ptr); + kunmap_local(ptr); ret = bio_add_page(bio, page, lc->sectorsize, 0); if (ret != lc->sectorsize) { @@ -233,40 +255,108 @@ error: return -1; } +static int write_inline_data(struct log_writes_c *lc, void *entry, + size_t entrylen, void *data, size_t datalen, + sector_t sector) +{ + int bio_pages, pg_datalen, pg_sectorlen, i; + struct page *page; + struct bio *bio; + size_t ret; + void *ptr; + + while (datalen) { + bio_pages = bio_max_segs(DIV_ROUND_UP(datalen, PAGE_SIZE)); + + atomic_inc(&lc->io_blocks); + + bio = bio_alloc(lc->logdev->bdev, bio_pages, REQ_OP_WRITE, + GFP_KERNEL); + bio->bi_iter.bi_size = 0; + bio->bi_iter.bi_sector = sector; + bio->bi_end_io = log_end_io; + bio->bi_private = lc; + + for (i = 0; i < bio_pages; i++) { + pg_datalen = min_t(int, datalen, PAGE_SIZE); + pg_sectorlen = ALIGN(pg_datalen, lc->sectorsize); + + page = alloc_page(GFP_KERNEL); + if (!page) { + DMERR("Couldn't alloc inline data page"); + goto error_bio; + } + + ptr = kmap_local_page(page); + memcpy(ptr, data, pg_datalen); + if (pg_sectorlen > pg_datalen) + memset(ptr + pg_datalen, 0, pg_sectorlen - pg_datalen); + kunmap_local(ptr); + + ret = bio_add_page(bio, page, pg_sectorlen, 0); + if (ret != pg_sectorlen) { + DMERR("Couldn't add page of inline data"); + __free_page(page); + goto error_bio; + } + + datalen -= pg_datalen; + data += pg_datalen; + } + submit_bio(bio); + + sector += bio_pages * PAGE_SECTORS; + } + return 0; +error_bio: + bio_free_pages(bio); + bio_put(bio); + put_io_block(lc); + return -1; +} + static int log_one_block(struct log_writes_c *lc, struct pending_block *block, sector_t sector) { struct bio *bio; struct log_write_entry entry; - size_t ret; + size_t metadatalen, ret; int i; entry.sector = cpu_to_le64(block->sector); entry.nr_sectors = cpu_to_le64(block->nr_sectors); entry.flags = cpu_to_le64(block->flags); entry.data_len = cpu_to_le64(block->datalen); + + metadatalen = (block->flags & LOG_MARK_FLAG) ? block->datalen : 0; if (write_metadata(lc, &entry, sizeof(entry), block->data, - block->datalen, sector)) { + metadatalen, sector)) { free_pending_block(lc, block); return -1; } + sector += dev_to_bio_sectors(lc, 1); + + if (block->datalen && metadatalen == 0) { + if (write_inline_data(lc, &entry, sizeof(entry), block->data, + block->datalen, sector)) { + free_pending_block(lc, block); + return -1; + } + /* we don't support both inline data & bio data */ + goto out; + } + if (!block->vec_cnt) goto out; - sector++; atomic_inc(&lc->io_blocks); - bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); - if (!bio) { - DMERR("Couldn't alloc log bio"); - goto error; - } + bio = bio_alloc(lc->logdev->bdev, bio_max_segs(block->vec_cnt), + REQ_OP_WRITE, GFP_KERNEL); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio->bi_bdev = lc->logdev->bdev; bio->bi_end_io = log_end_io; bio->bi_private = lc; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); for (i = 0; i < block->vec_cnt; i++) { /* @@ -278,17 +368,13 @@ static int log_one_block(struct log_writes_c *lc, if (ret != block->vecs[i].bv_len) { atomic_inc(&lc->io_blocks); submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES)); - if (!bio) { - DMERR("Couldn't alloc log bio"); - goto error; - } + bio = bio_alloc(lc->logdev->bdev, + bio_max_segs(block->vec_cnt - i), + REQ_OP_WRITE, GFP_KERNEL); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio->bi_bdev = lc->logdev->bdev; bio->bi_end_io = log_end_io; bio->bi_private = lc; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); ret = bio_add_page(bio, block->vecs[i].bv_page, block->vecs[i].bv_len, 0); @@ -321,22 +407,29 @@ static int log_super(struct log_writes_c *lc) super.nr_entries = cpu_to_le64(lc->logged_entries); super.sectorsize = cpu_to_le32(lc->sectorsize); - if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) { + if (write_metadata(lc, &super, sizeof(super), NULL, 0, + WRITE_LOG_SUPER_SECTOR)) { DMERR("Couldn't write super"); return -1; } + /* + * Super sector should be written in-order, otherwise the + * nr_entries could be rewritten incorrectly by an old bio. + */ + wait_for_completion_io(&lc->super_done); + return 0; } static inline sector_t logdev_last_sector(struct log_writes_c *lc) { - return i_size_read(lc->logdev->bdev->bd_inode) >> SECTOR_SHIFT; + return bdev_nr_sectors(lc->logdev->bdev); } static int log_writes_kthread(void *arg) { - struct log_writes_c *lc = (struct log_writes_c *)arg; + struct log_writes_c *lc = arg; sector_t sector = 0; while (!kthread_should_stop()) { @@ -354,10 +447,9 @@ static int log_writes_kthread(void *arg) goto next; sector = lc->next_sector; - if (block->flags & LOG_DISCARD_FLAG) - lc->next_sector++; - else - lc->next_sector += block->nr_sectors + 1; + if (!(block->flags & LOG_DISCARD_FLAG)) + lc->next_sector += dev_to_bio_sectors(lc, block->nr_sectors); + lc->next_sector += dev_to_bio_sectors(lc, 1); /* * Apparently the size of the device may not be known @@ -399,7 +491,7 @@ next: if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && - !atomic_read(&lc->pending_blocks)) + list_empty(&lc->logging_blocks)) schedule(); __set_current_state(TASK_RUNNING); } @@ -435,7 +527,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_LIST_HEAD(&lc->unflushed_blocks); INIT_LIST_HEAD(&lc->logging_blocks); init_waitqueue_head(&lc->wait); - lc->sectorsize = 1 << SECTOR_SHIFT; + init_completion(&lc->super_done); atomic_set(&lc->io_blocks, 0); atomic_set(&lc->pending_blocks, 0); @@ -455,6 +547,8 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + lc->sectorsize = bdev_logical_block_size(lc->dev->bdev); + lc->sectorshift = ilog2(lc->sectorsize); lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); if (IS_ERR(lc->log_kthread)) { ret = PTR_ERR(lc->log_kthread); @@ -464,8 +558,12 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - /* We put the super at sector 0, start logging at sector 1 */ - lc->next_sector = 1; + /* + * next_sector is in 512b sectors to correspond to what bi_sector expects. + * The super starts at sector 0, and the next_sector is the next logical + * one based on the sectorsize of the device. + */ + lc->next_sector = lc->sectorsize >> SECTOR_SHIFT; lc->logging_enabled = true; lc->end_sector = logdev_last_sector(lc); lc->device_supports_discard = true; @@ -494,7 +592,7 @@ static int log_mark(struct log_writes_c *lc, char *data) return -ENOMEM; } - block->data = kstrndup(data, maxsize, GFP_KERNEL); + block->data = kstrndup(data, maxsize - 1, GFP_KERNEL); if (!block->data) { DMERR("Error copying mark data"); kfree(block); @@ -539,7 +637,7 @@ static void normal_map_bio(struct dm_target *ti, struct bio *bio) { struct log_writes_c *lc = ti->private; - bio->bi_bdev = lc->dev->bdev; + bio_set_dev(bio, lc->dev->bdev); } static int log_writes_map(struct dm_target *ti, struct bio *bio) @@ -554,6 +652,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) bool flush_bio = (bio->bi_opf & REQ_PREFLUSH); bool fua_bio = (bio->bi_opf & REQ_FUA); bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD); + bool meta_bio = (bio->bi_opf & REQ_META); pb->block = NULL; @@ -578,7 +677,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) if (discard_bio) alloc_size = sizeof(struct pending_block); else - alloc_size = sizeof(struct pending_block) + sizeof(struct bio_vec) * bio_segments(bio); + alloc_size = struct_size(block, vecs, bio_segments(bio)); block = kzalloc(alloc_size, GFP_NOIO); if (!block) { @@ -598,9 +697,11 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) block->flags |= LOG_FUA_FLAG; if (discard_bio) block->flags |= LOG_DISCARD_FLAG; + if (meta_bio) + block->flags |= LOG_METADATA_FLAG; - block->sector = bio->bi_iter.bi_sector; - block->nr_sectors = bio_sectors(bio); + block->sector = bio_to_dev_sectors(lc, bio->bi_iter.bi_sector); + block->nr_sectors = bio_to_dev_sectors(lc, bio_sectors(bio)); /* We don't need the data, just submit */ if (discard_bio) { @@ -630,7 +731,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) */ bio_for_each_segment(bv, bio, iter) { struct page *page; - void *src, *dst; + void *dst; page = alloc_page(GFP_NOIO); if (!page) { @@ -642,11 +743,9 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_KILL; } - src = kmap_atomic(bv.bv_page); - dst = kmap_atomic(page); - memcpy(dst, src + bv.bv_offset, bv.bv_len); - kunmap_atomic(dst); - kunmap_atomic(src); + dst = kmap_local_page(page); + memcpy_from_bvec(dst, &bv); + kunmap_local(dst); block->vecs[i].bv_page = page; block->vecs[i].bv_len = bv.bv_len; block->vec_cnt++; @@ -694,10 +793,10 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, * INFO format: <logged entries> <highest allocated sector> */ static void log_writes_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, - unsigned maxlen) + unsigned int status_flags, char *result, + unsigned int maxlen) { - unsigned sz = 0; + unsigned int sz = 0; struct log_writes_c *lc = ti->private; switch (type) { @@ -711,11 +810,17 @@ static void log_writes_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: DMEMIT("%s %s", lc->dev->name, lc->logdev->name); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } static int log_writes_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev, fmode_t *mode) + struct block_device **bdev, + unsigned int cmd, unsigned long arg, + bool *forward) { struct log_writes_c *lc = ti->private; struct dm_dev *dev = lc->dev; @@ -724,7 +829,7 @@ static int log_writes_prepare_ioctl(struct dm_target *ti, /* * Only pass ioctls through if the device sizes match exactly. */ - if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) + if (ti->len != bdev_nr_sectors(dev->bdev)) return 1; return 0; } @@ -742,7 +847,8 @@ static int log_writes_iterate_devices(struct dm_target *ti, * Messages supported: * mark <mark data> - specify the marked data. */ -static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv) +static int log_writes_message(struct dm_target *ti, unsigned int argc, char **argv, + char *result, unsigned int maxlen) { int r = -EINVAL; struct log_writes_c *lc = ti->private; @@ -763,18 +869,62 @@ static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv) static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct log_writes_c *lc = ti->private; - struct request_queue *q = bdev_get_queue(lc->dev->bdev); - if (!q || !blk_queue_discard(q)) { + if (!bdev_max_discard_sectors(lc->dev->bdev)) { lc->device_supports_discard = false; - limits->discard_granularity = 1 << SECTOR_SHIFT; - limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT); + limits->discard_granularity = lc->sectorsize; + limits->max_hw_discard_sectors = (UINT_MAX >> SECTOR_SHIFT); } + limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev); + limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev); + limits->io_min = limits->physical_block_size; + limits->dma_alignment = limits->logical_block_size - 1; } +#if IS_ENABLED(CONFIG_FS_DAX) +static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti, + pgoff_t *pgoff) +{ + struct log_writes_c *lc = ti->private; + + *pgoff += (get_start_sect(lc->dev->bdev) >> PAGE_SECTORS_SHIFT); + return lc->dev->dax_dev; +} + +static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, + long nr_pages, enum dax_access_mode mode, void **kaddr, + unsigned long *pfn) +{ + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); + + return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn); +} + +static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, + size_t nr_pages) +{ + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); + + return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT); +} + +static size_t log_writes_dax_recovery_write(struct dm_target *ti, + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) +{ + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); + + return dax_recovery_write(dax_dev, pgoff, addr, bytes, i); +} + +#else +#define log_writes_dax_direct_access NULL +#define log_writes_dax_zero_page_range NULL +#define log_writes_dax_recovery_write NULL +#endif + static struct target_type log_writes_target = { .name = "log-writes", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = log_writes_ctr, .dtr = log_writes_dtr, @@ -785,25 +935,11 @@ static struct target_type log_writes_target = { .message = log_writes_message, .iterate_devices = log_writes_iterate_devices, .io_hints = log_writes_io_hints, + .direct_access = log_writes_dax_direct_access, + .dax_zero_page_range = log_writes_dax_zero_page_range, + .dax_recovery_write = log_writes_dax_recovery_write, }; - -static int __init dm_log_writes_init(void) -{ - int r = dm_register_target(&log_writes_target); - - if (r < 0) - DMERR("register failed %d", r); - - return r; -} - -static void __exit dm_log_writes_exit(void) -{ - dm_unregister_target(&log_writes_target); -} - -module_init(dm_log_writes_init); -module_exit(dm_log_writes_exit); +module_dm(log_writes); MODULE_DESCRIPTION(DM_NAME " log writes target"); MODULE_AUTHOR("Josef Bacik <jbacik@fb.com>"); |
