summaryrefslogtreecommitdiff
path: root/drivers/md/dm-log-writes.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-log-writes.c')
-rw-r--r--drivers/md/dm-log-writes.c304
1 files changed, 220 insertions, 84 deletions
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index a1da0eb58a93..7bb7174f8f4f 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2014 Facebook. All rights reserved.
*
@@ -10,9 +11,11 @@
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
+#include <linux/dax.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/uio.h>
#define DM_MSG_PREFIX "log-writes"
@@ -38,7 +41,7 @@
*
* Would result in the log looking like this:
*
- * c,a,flush,fuad,b,<other writes>,<next flush>
+ * c,a,b,flush,fuad,<other writes>,<next flush>
*
* This is meant to help expose problems where file systems do not properly wait
* on data being written before invoking a FLUSH. FUA bypasses cache so once it
@@ -50,13 +53,15 @@
* in fact we want to do the data and the discard in the order that they
* completed.
*/
-#define LOG_FLUSH_FLAG (1 << 0)
-#define LOG_FUA_FLAG (1 << 1)
-#define LOG_DISCARD_FLAG (1 << 2)
-#define LOG_MARK_FLAG (1 << 3)
+#define LOG_FLUSH_FLAG (1 << 0)
+#define LOG_FUA_FLAG (1 << 1)
+#define LOG_DISCARD_FLAG (1 << 2)
+#define LOG_MARK_FLAG (1 << 3)
+#define LOG_METADATA_FLAG (1 << 4)
#define WRITE_LOG_VERSION 1ULL
#define WRITE_LOG_MAGIC 0x6a736677736872ULL
+#define WRITE_LOG_SUPER_SECTOR 0
/*
* The disk format for this is braindead simple.
@@ -100,6 +105,7 @@ struct log_writes_c {
struct dm_dev *logdev;
u64 logged_entries;
u32 sectorsize;
+ u32 sectorshift;
atomic_t io_blocks;
atomic_t pending_blocks;
sector_t next_sector;
@@ -111,6 +117,7 @@ struct log_writes_c {
struct list_head logging_blocks;
wait_queue_head_t wait;
struct task_struct *log_kthread;
+ struct completion super_done;
};
struct pending_block {
@@ -121,13 +128,25 @@ struct pending_block {
char *data;
u32 datalen;
struct list_head list;
- struct bio_vec vecs[0];
+ struct bio_vec vecs[];
};
struct per_bio_data {
struct pending_block *block;
};
+static inline sector_t bio_to_dev_sectors(struct log_writes_c *lc,
+ sector_t sectors)
+{
+ return sectors >> (lc->sectorshift - SECTOR_SHIFT);
+}
+
+static inline sector_t dev_to_bio_sectors(struct log_writes_c *lc,
+ sector_t sectors)
+{
+ return sectors << (lc->sectorshift - SECTOR_SHIFT);
+}
+
static void put_pending_block(struct log_writes_c *lc)
{
if (atomic_dec_and_test(&lc->pending_blocks)) {
@@ -164,6 +183,14 @@ static void log_end_io(struct bio *bio)
bio_put(bio);
}
+static void log_end_super(struct bio *bio)
+{
+ struct log_writes_c *lc = bio->bi_private;
+
+ complete(&lc->super_done);
+ log_end_io(bio);
+}
+
/*
* Meant to be called if there is an error, it will free all the pages
* associated with the block.
@@ -191,17 +218,12 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
void *ptr;
size_t ret;
- bio = bio_alloc(GFP_KERNEL, 1);
- if (!bio) {
- DMERR("Couldn't alloc log bio");
- goto error;
- }
+ bio = bio_alloc(lc->logdev->bdev, 1, REQ_OP_WRITE, GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = lc->logdev->bdev;
- bio->bi_end_io = log_end_io;
+ bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
+ log_end_super : log_end_io;
bio->bi_private = lc;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
page = alloc_page(GFP_KERNEL);
if (!page) {
@@ -210,13 +232,13 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
goto error;
}
- ptr = kmap_atomic(page);
+ ptr = kmap_local_page(page);
memcpy(ptr, entry, entrylen);
if (datalen)
memcpy(ptr + entrylen, data, datalen);
memset(ptr + entrylen + datalen, 0,
lc->sectorsize - entrylen - datalen);
- kunmap_atomic(ptr);
+ kunmap_local(ptr);
ret = bio_add_page(bio, page, lc->sectorsize, 0);
if (ret != lc->sectorsize) {
@@ -233,40 +255,108 @@ error:
return -1;
}
+static int write_inline_data(struct log_writes_c *lc, void *entry,
+ size_t entrylen, void *data, size_t datalen,
+ sector_t sector)
+{
+ int bio_pages, pg_datalen, pg_sectorlen, i;
+ struct page *page;
+ struct bio *bio;
+ size_t ret;
+ void *ptr;
+
+ while (datalen) {
+ bio_pages = bio_max_segs(DIV_ROUND_UP(datalen, PAGE_SIZE));
+
+ atomic_inc(&lc->io_blocks);
+
+ bio = bio_alloc(lc->logdev->bdev, bio_pages, REQ_OP_WRITE,
+ GFP_KERNEL);
+ bio->bi_iter.bi_size = 0;
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_end_io = log_end_io;
+ bio->bi_private = lc;
+
+ for (i = 0; i < bio_pages; i++) {
+ pg_datalen = min_t(int, datalen, PAGE_SIZE);
+ pg_sectorlen = ALIGN(pg_datalen, lc->sectorsize);
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ DMERR("Couldn't alloc inline data page");
+ goto error_bio;
+ }
+
+ ptr = kmap_local_page(page);
+ memcpy(ptr, data, pg_datalen);
+ if (pg_sectorlen > pg_datalen)
+ memset(ptr + pg_datalen, 0, pg_sectorlen - pg_datalen);
+ kunmap_local(ptr);
+
+ ret = bio_add_page(bio, page, pg_sectorlen, 0);
+ if (ret != pg_sectorlen) {
+ DMERR("Couldn't add page of inline data");
+ __free_page(page);
+ goto error_bio;
+ }
+
+ datalen -= pg_datalen;
+ data += pg_datalen;
+ }
+ submit_bio(bio);
+
+ sector += bio_pages * PAGE_SECTORS;
+ }
+ return 0;
+error_bio:
+ bio_free_pages(bio);
+ bio_put(bio);
+ put_io_block(lc);
+ return -1;
+}
+
static int log_one_block(struct log_writes_c *lc,
struct pending_block *block, sector_t sector)
{
struct bio *bio;
struct log_write_entry entry;
- size_t ret;
+ size_t metadatalen, ret;
int i;
entry.sector = cpu_to_le64(block->sector);
entry.nr_sectors = cpu_to_le64(block->nr_sectors);
entry.flags = cpu_to_le64(block->flags);
entry.data_len = cpu_to_le64(block->datalen);
+
+ metadatalen = (block->flags & LOG_MARK_FLAG) ? block->datalen : 0;
if (write_metadata(lc, &entry, sizeof(entry), block->data,
- block->datalen, sector)) {
+ metadatalen, sector)) {
free_pending_block(lc, block);
return -1;
}
+ sector += dev_to_bio_sectors(lc, 1);
+
+ if (block->datalen && metadatalen == 0) {
+ if (write_inline_data(lc, &entry, sizeof(entry), block->data,
+ block->datalen, sector)) {
+ free_pending_block(lc, block);
+ return -1;
+ }
+ /* we don't support both inline data & bio data */
+ goto out;
+ }
+
if (!block->vec_cnt)
goto out;
- sector++;
atomic_inc(&lc->io_blocks);
- bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
- if (!bio) {
- DMERR("Couldn't alloc log bio");
- goto error;
- }
+ bio = bio_alloc(lc->logdev->bdev, bio_max_segs(block->vec_cnt),
+ REQ_OP_WRITE, GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = lc->logdev->bdev;
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
for (i = 0; i < block->vec_cnt; i++) {
/*
@@ -278,17 +368,13 @@ static int log_one_block(struct log_writes_c *lc,
if (ret != block->vecs[i].bv_len) {
atomic_inc(&lc->io_blocks);
submit_bio(bio);
- bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES));
- if (!bio) {
- DMERR("Couldn't alloc log bio");
- goto error;
- }
+ bio = bio_alloc(lc->logdev->bdev,
+ bio_max_segs(block->vec_cnt - i),
+ REQ_OP_WRITE, GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = lc->logdev->bdev;
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = bio_add_page(bio, block->vecs[i].bv_page,
block->vecs[i].bv_len, 0);
@@ -321,22 +407,29 @@ static int log_super(struct log_writes_c *lc)
super.nr_entries = cpu_to_le64(lc->logged_entries);
super.sectorsize = cpu_to_le32(lc->sectorsize);
- if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) {
+ if (write_metadata(lc, &super, sizeof(super), NULL, 0,
+ WRITE_LOG_SUPER_SECTOR)) {
DMERR("Couldn't write super");
return -1;
}
+ /*
+ * Super sector should be written in-order, otherwise the
+ * nr_entries could be rewritten incorrectly by an old bio.
+ */
+ wait_for_completion_io(&lc->super_done);
+
return 0;
}
static inline sector_t logdev_last_sector(struct log_writes_c *lc)
{
- return i_size_read(lc->logdev->bdev->bd_inode) >> SECTOR_SHIFT;
+ return bdev_nr_sectors(lc->logdev->bdev);
}
static int log_writes_kthread(void *arg)
{
- struct log_writes_c *lc = (struct log_writes_c *)arg;
+ struct log_writes_c *lc = arg;
sector_t sector = 0;
while (!kthread_should_stop()) {
@@ -354,10 +447,9 @@ static int log_writes_kthread(void *arg)
goto next;
sector = lc->next_sector;
- if (block->flags & LOG_DISCARD_FLAG)
- lc->next_sector++;
- else
- lc->next_sector += block->nr_sectors + 1;
+ if (!(block->flags & LOG_DISCARD_FLAG))
+ lc->next_sector += dev_to_bio_sectors(lc, block->nr_sectors);
+ lc->next_sector += dev_to_bio_sectors(lc, 1);
/*
* Apparently the size of the device may not be known
@@ -399,7 +491,7 @@ next:
if (!try_to_freeze()) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop() &&
- !atomic_read(&lc->pending_blocks))
+ list_empty(&lc->logging_blocks))
schedule();
__set_current_state(TASK_RUNNING);
}
@@ -435,7 +527,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
INIT_LIST_HEAD(&lc->unflushed_blocks);
INIT_LIST_HEAD(&lc->logging_blocks);
init_waitqueue_head(&lc->wait);
- lc->sectorsize = 1 << SECTOR_SHIFT;
+ init_completion(&lc->super_done);
atomic_set(&lc->io_blocks, 0);
atomic_set(&lc->pending_blocks, 0);
@@ -455,6 +547,8 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
+ lc->sectorsize = bdev_logical_block_size(lc->dev->bdev);
+ lc->sectorshift = ilog2(lc->sectorsize);
lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write");
if (IS_ERR(lc->log_kthread)) {
ret = PTR_ERR(lc->log_kthread);
@@ -464,8 +558,12 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- /* We put the super at sector 0, start logging at sector 1 */
- lc->next_sector = 1;
+ /*
+ * next_sector is in 512b sectors to correspond to what bi_sector expects.
+ * The super starts at sector 0, and the next_sector is the next logical
+ * one based on the sectorsize of the device.
+ */
+ lc->next_sector = lc->sectorsize >> SECTOR_SHIFT;
lc->logging_enabled = true;
lc->end_sector = logdev_last_sector(lc);
lc->device_supports_discard = true;
@@ -494,7 +592,7 @@ static int log_mark(struct log_writes_c *lc, char *data)
return -ENOMEM;
}
- block->data = kstrndup(data, maxsize, GFP_KERNEL);
+ block->data = kstrndup(data, maxsize - 1, GFP_KERNEL);
if (!block->data) {
DMERR("Error copying mark data");
kfree(block);
@@ -539,7 +637,7 @@ static void normal_map_bio(struct dm_target *ti, struct bio *bio)
{
struct log_writes_c *lc = ti->private;
- bio->bi_bdev = lc->dev->bdev;
+ bio_set_dev(bio, lc->dev->bdev);
}
static int log_writes_map(struct dm_target *ti, struct bio *bio)
@@ -554,6 +652,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
bool flush_bio = (bio->bi_opf & REQ_PREFLUSH);
bool fua_bio = (bio->bi_opf & REQ_FUA);
bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD);
+ bool meta_bio = (bio->bi_opf & REQ_META);
pb->block = NULL;
@@ -578,7 +677,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
if (discard_bio)
alloc_size = sizeof(struct pending_block);
else
- alloc_size = sizeof(struct pending_block) + sizeof(struct bio_vec) * bio_segments(bio);
+ alloc_size = struct_size(block, vecs, bio_segments(bio));
block = kzalloc(alloc_size, GFP_NOIO);
if (!block) {
@@ -598,9 +697,11 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
block->flags |= LOG_FUA_FLAG;
if (discard_bio)
block->flags |= LOG_DISCARD_FLAG;
+ if (meta_bio)
+ block->flags |= LOG_METADATA_FLAG;
- block->sector = bio->bi_iter.bi_sector;
- block->nr_sectors = bio_sectors(bio);
+ block->sector = bio_to_dev_sectors(lc, bio->bi_iter.bi_sector);
+ block->nr_sectors = bio_to_dev_sectors(lc, bio_sectors(bio));
/* We don't need the data, just submit */
if (discard_bio) {
@@ -630,7 +731,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
*/
bio_for_each_segment(bv, bio, iter) {
struct page *page;
- void *src, *dst;
+ void *dst;
page = alloc_page(GFP_NOIO);
if (!page) {
@@ -642,11 +743,9 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_KILL;
}
- src = kmap_atomic(bv.bv_page);
- dst = kmap_atomic(page);
- memcpy(dst, src + bv.bv_offset, bv.bv_len);
- kunmap_atomic(dst);
- kunmap_atomic(src);
+ dst = kmap_local_page(page);
+ memcpy_from_bvec(dst, &bv);
+ kunmap_local(dst);
block->vecs[i].bv_page = page;
block->vecs[i].bv_len = bv.bv_len;
block->vec_cnt++;
@@ -694,10 +793,10 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio,
* INFO format: <logged entries> <highest allocated sector>
*/
static void log_writes_status(struct dm_target *ti, status_type_t type,
- unsigned status_flags, char *result,
- unsigned maxlen)
+ unsigned int status_flags, char *result,
+ unsigned int maxlen)
{
- unsigned sz = 0;
+ unsigned int sz = 0;
struct log_writes_c *lc = ti->private;
switch (type) {
@@ -711,11 +810,17 @@ static void log_writes_status(struct dm_target *ti, status_type_t type,
case STATUSTYPE_TABLE:
DMEMIT("%s %s", lc->dev->name, lc->logdev->name);
break;
+
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
static int log_writes_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
+ struct block_device **bdev,
+ unsigned int cmd, unsigned long arg,
+ bool *forward)
{
struct log_writes_c *lc = ti->private;
struct dm_dev *dev = lc->dev;
@@ -724,7 +829,7 @@ static int log_writes_prepare_ioctl(struct dm_target *ti,
/*
* Only pass ioctls through if the device sizes match exactly.
*/
- if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
+ if (ti->len != bdev_nr_sectors(dev->bdev))
return 1;
return 0;
}
@@ -742,7 +847,8 @@ static int log_writes_iterate_devices(struct dm_target *ti,
* Messages supported:
* mark <mark data> - specify the marked data.
*/
-static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv)
+static int log_writes_message(struct dm_target *ti, unsigned int argc, char **argv,
+ char *result, unsigned int maxlen)
{
int r = -EINVAL;
struct log_writes_c *lc = ti->private;
@@ -763,18 +869,62 @@ static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv)
static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct log_writes_c *lc = ti->private;
- struct request_queue *q = bdev_get_queue(lc->dev->bdev);
- if (!q || !blk_queue_discard(q)) {
+ if (!bdev_max_discard_sectors(lc->dev->bdev)) {
lc->device_supports_discard = false;
- limits->discard_granularity = 1 << SECTOR_SHIFT;
- limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
+ limits->discard_granularity = lc->sectorsize;
+ limits->max_hw_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
}
+ limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev);
+ limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev);
+ limits->io_min = limits->physical_block_size;
+ limits->dma_alignment = limits->logical_block_size - 1;
}
+#if IS_ENABLED(CONFIG_FS_DAX)
+static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
+ pgoff_t *pgoff)
+{
+ struct log_writes_c *lc = ti->private;
+
+ *pgoff += (get_start_sect(lc->dev->bdev) >> PAGE_SECTORS_SHIFT);
+ return lc->dev->dax_dev;
+}
+
+static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
+ long nr_pages, enum dax_access_mode mode, void **kaddr,
+ unsigned long *pfn)
+{
+ struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
+
+ return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
+}
+
+static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
+ size_t nr_pages)
+{
+ struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
+
+ return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
+}
+
+static size_t log_writes_dax_recovery_write(struct dm_target *ti,
+ pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
+{
+ struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
+
+ return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
+}
+
+#else
+#define log_writes_dax_direct_access NULL
+#define log_writes_dax_zero_page_range NULL
+#define log_writes_dax_recovery_write NULL
+#endif
+
static struct target_type log_writes_target = {
.name = "log-writes",
- .version = {1, 0, 0},
+ .version = {1, 1, 0},
.module = THIS_MODULE,
.ctr = log_writes_ctr,
.dtr = log_writes_dtr,
@@ -785,25 +935,11 @@ static struct target_type log_writes_target = {
.message = log_writes_message,
.iterate_devices = log_writes_iterate_devices,
.io_hints = log_writes_io_hints,
+ .direct_access = log_writes_dax_direct_access,
+ .dax_zero_page_range = log_writes_dax_zero_page_range,
+ .dax_recovery_write = log_writes_dax_recovery_write,
};
-
-static int __init dm_log_writes_init(void)
-{
- int r = dm_register_target(&log_writes_target);
-
- if (r < 0)
- DMERR("register failed %d", r);
-
- return r;
-}
-
-static void __exit dm_log_writes_exit(void)
-{
- dm_unregister_target(&log_writes_target);
-}
-
-module_init(dm_log_writes_init);
-module_exit(dm_log_writes_exit);
+module_dm(log_writes);
MODULE_DESCRIPTION(DM_NAME " log writes target");
MODULE_AUTHOR("Josef Bacik <jbacik@fb.com>");