From b8a6176c214cf9aa2679131ed7e4515cddaadc33 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:45 +0100 Subject: ext4: Support for synchronous DAX faults We return IOMAP_F_DIRTY flag from ext4_iomap_begin() when asked to prepare blocks for writing and the inode has some uncommitted metadata changes. In the fault handler ext4_dax_fault() we then detect this case (through VM_FAULT_NEEDDSYNC return value) and call helper dax_finish_sync_fault() to flush metadata changes and insert page table entry. Note that this will also dirty corresponding radix tree entry which is what we want - fsync(2) will still provide data integrity guarantees for applications not using userspace flushing. And applications using userspace flushing can avoid calling fsync(2) and thus avoid the performance overhead. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/ext4/inode.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 31db875bc7a1..13a198924a0f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3394,6 +3394,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait) } #ifdef CONFIG_FS_DAX +static bool ext4_inode_datasync_dirty(struct inode *inode) +{ + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + + if (journal) + return !jbd2_transaction_committed(journal, + EXT4_I(inode)->i_datasync_tid); + /* Any metadata buffers to write? */ + if (!list_empty(&inode->i_mapping->private_list)) + return true; + return inode->i_state & I_DIRTY_DATASYNC; +} + static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned flags, struct iomap *iomap) { @@ -3466,6 +3479,8 @@ retry: } iomap->flags = 0; + if ((flags & IOMAP_WRITE) && ext4_inode_datasync_dirty(inode)) + iomap->flags |= IOMAP_F_DIRTY; iomap->bdev = inode->i_sb->s_bdev; iomap->dax_dev = sbi->s_daxdev; iomap->offset = first_block << blkbits; -- cgit From aaa422c4c3f6ee958ea9d6c9260ac40f90a3f4e9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Nov 2017 16:38:44 -0800 Subject: fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core While reviewing whether MAP_SYNC should strengthen its current guarantee of syncing writes from the initiating process to also include third-party readers observing dirty metadata, Dave pointed out that the check of IOMAP_WRITE is misplaced. The policy of what to with IOMAP_F_DIRTY should be separated from the generic filesystem mechanism of reporting dirty metadata. Move this policy to the fs-dax core to simplify the per-filesystem iomap handlers, and further centralize code that implements the MAP_SYNC policy. This otherwise should not change behavior, it just makes it easier to change behavior in the future. Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Cc: Ross Zwisler Reported-by: Dave Chinner Signed-off-by: Dan Williams --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 13a198924a0f..ee4d907a4251 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3479,7 +3479,7 @@ retry: } iomap->flags = 0; - if ((flags & IOMAP_WRITE) && ext4_inode_datasync_dirty(inode)) + if (ext4_inode_datasync_dirty(inode)) iomap->flags |= IOMAP_F_DIRTY; iomap->bdev = inode->i_sb->s_bdev; iomap->dax_dev = sbi->s_daxdev; -- cgit