summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-12 15:46:11 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-12 15:46:11 -0800
commit3acbdbf42e943d85174401357a6b6243479d4c76 (patch)
tree94d092eedc0e24f611a14a4fcceb9d3643b7ac25 /fs
parent8834147f9505661859ce44549bf601e2a06bba7c (diff)
parent9e05e95ca8dae8de4a7a1645014e1bbd9c8a4dab (diff)
Merge tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull dax and libnvdimm updates from Dan Williams: "The bulk of this is a rework of the dax_operations API after discovering the obstacles it posed to the work-in-progress DAX+reflink support for XFS and other copy-on-write filesystem mechanics. Primarily the need to plumb a block_device through the API to handle partition offsets was a sticking point and Christoph untangled that dependency in addition to other cleanups to make landing the DAX+reflink support easier. The DAX_PMEM_COMPAT option has been around for 4 years and not only are distributions shipping userspace that understand the current configuration API, but some are not even bothering to turn this option on anymore, so it seems a good time to remove it per the deprecation schedule. Recall that this was added after the device-dax subsystem moved from /sys/class/dax to /sys/bus/dax for its sysfs organization. All recent functionality depends on /sys/bus/dax. Some other miscellaneous cleanups and reflink prep patches are included as well. Summary: - Simplify the dax_operations API: - Eliminate bdev_dax_pgoff() in favor of the filesystem maintaining and applying a partition offset to all its DAX iomap operations. - Remove wrappers and device-mapper stacked callbacks for ->copy_from_iter() and ->copy_to_iter() in favor of moving block_device relative offset responsibility to the dax_direct_access() caller. - Remove the need for an @bdev in filesystem-DAX infrastructure - Remove unused uio helpers copy_from_iter_flushcache() and copy_mc_to_iter() as only the non-check_copy_size() versions are used for DAX. - Prepare XFS for the pending (next merge window) DAX+reflink support - Remove deprecated DEV_DAX_PMEM_COMPAT support - Cleanup a straggling misuse of the GUID api" * tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (38 commits) iomap: Fix error handling in iomap_zero_iter() ACPI: NFIT: Import GUID before use dax: remove the copy_from_iter and copy_to_iter methods dax: remove the DAXDEV_F_SYNC flag dax: simplify dax_synchronous and set_dax_synchronous uio: remove copy_from_iter_flushcache() and copy_mc_to_iter() iomap: turn the byte variable in iomap_zero_iter into a ssize_t memremap: remove support for external pgmap refcounts fsdax: don't require CONFIG_BLOCK iomap: build the block based code conditionally dax: fix up some of the block device related ifdefs fsdax: shift partition offset handling into the file systems dax: return the partition offset from fs_dax_get_by_bdev iomap: add a IOMAP_DAX flag xfs: pass the mapping flags to xfs_bmbt_to_iomap xfs: use xfs_direct_write_iomap_ops for DAX zeroing xfs: move dax device handling into xfs_{alloc,free}_buftarg ext4: cleanup the dax handling in ext4_fill_super ext2: cleanup the dax handling in ext2_fill_super fsdax: decouple zeroing from the iomap buffered I/O code ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig8
-rw-r--r--fs/dax.c157
-rw-r--r--fs/erofs/data.c11
-rw-r--r--fs/erofs/internal.h3
-rw-r--r--fs/erofs/super.c15
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/inode.c15
-rw-r--r--fs/ext2/super.c16
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/inode.c25
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/fuse/Kconfig2
-rw-r--r--fs/fuse/virtio_fs.c18
-rw-r--r--fs/iomap/Makefile4
-rw-r--r--fs/iomap/buffered-io.c10
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c4
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c7
-rw-r--r--fs/xfs/xfs_buf.c8
-rw-r--r--fs/xfs/xfs_buf.h5
-rw-r--r--fs/xfs/xfs_file.c3
-rw-r--r--fs/xfs/xfs_iomap.c84
-rw-r--r--fs/xfs/xfs_iomap.h12
-rw-r--r--fs/xfs/xfs_iops.c7
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_reflink.c3
-rw-r--r--fs/xfs/xfs_super.c80
27 files changed, 288 insertions, 228 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index a6313a969bc5..7a2b11c0b803 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -15,11 +15,11 @@ config VALIDATE_FS_PARSER
Enable this to perform validation of the parameter description for a
filesystem when it is registered.
-if BLOCK
-
config FS_IOMAP
bool
+if BLOCK
+
source "fs/ext2/Kconfig"
source "fs/ext4/Kconfig"
source "fs/jbd2/Kconfig"
@@ -42,6 +42,8 @@ source "fs/nilfs2/Kconfig"
source "fs/f2fs/Kconfig"
source "fs/zonefs/Kconfig"
+endif # BLOCK
+
config FS_DAX
bool "File system based Direct Access (DAX) support"
depends on MMU
@@ -89,8 +91,6 @@ config FS_DAX_PMD
config FS_DAX_LIMITED
bool
-endif # BLOCK
-
# Posix ACL utility routines
#
# Note: Posix ACLs can be implemented without these helpers. Never use
diff --git a/fs/dax.c b/fs/dax.c
index 4e3e5a283a91..cd03485867a7 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -709,26 +709,26 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
return __dax_invalidate_entry(mapping, index, false);
}
-static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev,
- sector_t sector, struct page *to, unsigned long vaddr)
+static pgoff_t dax_iomap_pgoff(const struct iomap *iomap, loff_t pos)
{
+ return PHYS_PFN(iomap->addr + (pos & PAGE_MASK) - iomap->offset);
+}
+
+static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter)
+{
+ pgoff_t pgoff = dax_iomap_pgoff(&iter->iomap, iter->pos);
void *vto, *kaddr;
- pgoff_t pgoff;
long rc;
int id;
- rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
- if (rc)
- return rc;
-
id = dax_read_lock();
- rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
+ rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
}
- vto = kmap_atomic(to);
- copy_user_page(vto, (void __force *)kaddr, vaddr, to);
+ vto = kmap_atomic(vmf->cow_page);
+ copy_user_page(vto, kaddr, vmf->address, vmf->cow_page);
kunmap_atomic(vto);
dax_read_unlock(id);
return 0;
@@ -1005,22 +1005,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
-static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos)
-{
- return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
-}
-
static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
pfn_t *pfnp)
{
- const sector_t sector = dax_iomap_sector(iomap, pos);
- pgoff_t pgoff;
+ pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
int id, rc;
long length;
- rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
- if (rc)
- return rc;
id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
NULL, pfnp);
@@ -1126,42 +1117,87 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
}
#endif /* CONFIG_FS_DAX_PMD */
-s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
+static int dax_memzero(struct dax_device *dax_dev, pgoff_t pgoff,
+ unsigned int offset, size_t size)
{
- sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
- pgoff_t pgoff;
- long rc, id;
void *kaddr;
- bool page_aligned = false;
- unsigned offset = offset_in_page(pos);
- unsigned size = min_t(u64, PAGE_SIZE - offset, length);
+ long ret;
- if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
- (size == PAGE_SIZE))
- page_aligned = true;
+ ret = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
+ if (ret > 0) {
+ memset(kaddr + offset, 0, size);
+ dax_flush(dax_dev, kaddr + offset, size);
+ }
+ return ret;
+}
- rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
- if (rc)
- return rc;
+static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
+{
+ const struct iomap *iomap = &iter->iomap;
+ const struct iomap *srcmap = iomap_iter_srcmap(iter);
+ loff_t pos = iter->pos;
+ u64 length = iomap_length(iter);
+ s64 written = 0;
+
+ /* already zeroed? we're done. */
+ if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
+ return length;
+
+ do {
+ unsigned offset = offset_in_page(pos);
+ unsigned size = min_t(u64, PAGE_SIZE - offset, length);
+ pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
+ long rc;
+ int id;
+
+ id = dax_read_lock();
+ if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE)
+ rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
+ else
+ rc = dax_memzero(iomap->dax_dev, pgoff, offset, size);
+ dax_read_unlock(id);
- id = dax_read_lock();
+ if (rc < 0)
+ return rc;
+ pos += size;
+ length -= size;
+ written += size;
+ if (did_zero)
+ *did_zero = true;
+ } while (length > 0);
- if (page_aligned)
- rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
- else
- rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
- if (rc < 0) {
- dax_read_unlock(id);
- return rc;
- }
+ return written;
+}
- if (!page_aligned) {
- memset(kaddr + offset, 0, size);
- dax_flush(iomap->dax_dev, kaddr + offset, size);
- }
- dax_read_unlock(id);
- return size;
+int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
+ const struct iomap_ops *ops)
+{
+ struct iomap_iter iter = {
+ .inode = inode,
+ .pos = pos,
+ .len = len,
+ .flags = IOMAP_DAX | IOMAP_ZERO,
+ };
+ int ret;
+
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = dax_zero_iter(&iter, did_zero);
+ return ret;
}
+EXPORT_SYMBOL_GPL(dax_zero_range);
+
+int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
+ const struct iomap_ops *ops)
+{
+ unsigned int blocksize = i_blocksize(inode);
+ unsigned int off = pos & (blocksize - 1);
+
+ /* Block boundary? Nothing to do */
+ if (!off)
+ return 0;
+ return dax_zero_range(inode, pos, blocksize - off, did_zero, ops);
+}
+EXPORT_SYMBOL_GPL(dax_truncate_page);
static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
struct iov_iter *iter)
@@ -1169,7 +1205,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
const struct iomap *iomap = &iomi->iomap;
loff_t length = iomap_length(iomi);
loff_t pos = iomi->pos;
- struct block_device *bdev = iomap->bdev;
struct dax_device *dax_dev = iomap->dax_dev;
loff_t end = pos + length, done = 0;
ssize_t ret = 0;
@@ -1203,9 +1238,8 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
while (pos < end) {
unsigned offset = pos & (PAGE_SIZE - 1);
const size_t size = ALIGN(length + offset, PAGE_SIZE);
- const sector_t sector = dax_iomap_sector(iomap, pos);
+ pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
ssize_t map_len;
- pgoff_t pgoff;
void *kaddr;
if (fatal_signal_pending(current)) {
@@ -1213,10 +1247,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
break;
}
- ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
- if (ret)
- break;
-
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
&kaddr, NULL);
if (map_len < 0) {
@@ -1230,11 +1260,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
if (map_len > end - pos)
map_len = end - pos;
- /*
- * The userspace address for the memory copy has already been
- * validated via access_ok() in either vfs_read() or
- * vfs_write(), depending on which operation we are doing.
- */
if (iov_iter_rw(iter) == WRITE)
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
map_len, iter);
@@ -1274,6 +1299,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
.inode = iocb->ki_filp->f_mapping->host,
.pos = iocb->ki_pos,
.len = iov_iter_count(iter),
+ .flags = IOMAP_DAX,
};
loff_t done = 0;
int ret;
@@ -1332,19 +1358,16 @@ static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf,
const struct iomap_iter *iter)
{
- sector_t sector = dax_iomap_sector(&iter->iomap, iter->pos);
- unsigned long vaddr = vmf->address;
vm_fault_t ret;
int error = 0;
switch (iter->iomap.type) {
case IOMAP_HOLE:
case IOMAP_UNWRITTEN:
- clear_user_highpage(vmf->cow_page, vaddr);
+ clear_user_highpage(vmf->cow_page, vmf->address);
break;
case IOMAP_MAPPED:
- error = copy_cow_page_dax(iter->iomap.bdev, iter->iomap.dax_dev,
- sector, vmf->cow_page, vaddr);
+ error = copy_cow_page_dax(vmf, iter);
break;
default:
WARN_ON_ONCE(1);
@@ -1430,7 +1453,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
.inode = mapping->host,
.pos = (loff_t)vmf->pgoff << PAGE_SHIFT,
.len = PAGE_SIZE,
- .flags = IOMAP_FAULT,
+ .flags = IOMAP_DAX | IOMAP_FAULT,
};
vm_fault_t ret = 0;
void *entry;
@@ -1539,7 +1562,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
struct iomap_iter iter = {
.inode = mapping->host,
.len = PMD_SIZE,
- .flags = IOMAP_FAULT,
+ .flags = IOMAP_DAX | IOMAP_FAULT,
};
vm_fault_t ret = VM_FAULT_FALLBACK;
pgoff_t max_pgoff;
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index e18476c85fa2..fa7ddb7ad980 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -192,6 +192,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
/* primary device by default */
map->m_bdev = sb->s_bdev;
map->m_daxdev = EROFS_SB(sb)->dax_dev;
+ map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
if (map->m_deviceid) {
down_read(&devs->rwsem);
@@ -202,6 +203,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
}
map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev;
+ map->m_dax_part_off = dif->dax_part_off;
up_read(&devs->rwsem);
} else if (devs->extra_devices) {
down_read(&devs->rwsem);
@@ -218,6 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
map->m_pa -= startoff;
map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev;
+ map->m_dax_part_off = dif->dax_part_off;
break;
}
}
@@ -248,9 +251,13 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret)
return ret;
- iomap->bdev = mdev.m_bdev;
- iomap->dax_dev = mdev.m_daxdev;
iomap->offset = map.m_la;
+ if (flags & IOMAP_DAX) {
+ iomap->dax_dev = mdev.m_daxdev;
+ iomap->offset += mdev.m_dax_part_off;
+ } else {
+ iomap->bdev = mdev.m_bdev;
+ }
iomap->length = map.m_llen;
iomap->flags = 0;
iomap->private = NULL;
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 3db494a398b2..b8272fb95fd6 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -51,6 +51,7 @@ struct erofs_device_info {
char *path;
struct block_device *bdev;
struct dax_device *dax_dev;
+ u64 dax_part_off;
u32 blocks;
u32 mapped_blkaddr;
@@ -115,6 +116,7 @@ struct erofs_sb_info {
#endif /* CONFIG_EROFS_FS_ZIP */
struct erofs_dev_context *devs;
struct dax_device *dax_dev;
+ u64 dax_part_off;
u64 total_blocks;
u32 primarydevice_blocks;
@@ -467,6 +469,7 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_dev {
struct block_device *m_bdev;
struct dax_device *m_daxdev;
+ u64 m_dax_part_off;
erofs_off_t m_pa;
unsigned int m_deviceid;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 5c137647fa8a..915eefe0d7e2 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -267,7 +267,7 @@ static int erofs_init_devices(struct super_block *sb,
break;
}
dif->bdev = bdev;
- dif->dax_dev = fs_dax_get_by_bdev(bdev);
+ dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off);
dif->blocks = le32_to_cpu(dis->blocks);
dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
sbi->total_blocks += dif->blocks;
@@ -597,7 +597,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_fs_info = sbi;
sbi->opt = ctx->opt;
- sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
+ sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off);
sbi->devs = ctx->devs;
ctx->devs = NULL;
@@ -605,10 +605,13 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
- if (test_opt(&sbi->opt, DAX_ALWAYS) &&
- !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
- errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
- clear_opt(&sbi->opt, DAX_ALWAYS);
+ if (test_opt(&sbi->opt, DAX_ALWAYS)) {
+ BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE);
+
+ if (!sbi->dax_dev) {
+ errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
+ clear_opt(&sbi->opt, DAX_ALWAYS);
+ }
}
sb->s_flags |= SB_RDONLY | SB_NOATIME;
sb->s_maxbytes = MAX_LFS_FILESIZE;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 3be9dd6412b7..d4f306aa5ace 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -118,6 +118,7 @@ struct ext2_sb_info {
spinlock_t s_lock;
struct mb_cache *s_ea_block_cache;
struct dax_device *s_daxdev;
+ u64 s_dax_part_off;
};
static inline spinlock_t *
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 333fa62661d5..602578b72d8c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -36,6 +36,7 @@
#include <linux/iomap.h>
#include <linux/namei.h>
#include <linux/uio.h>
+#include <linux/dax.h>
#include "ext2.h"
#include "acl.h"
#include "xattr.h"
@@ -816,9 +817,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return ret;
iomap->flags = 0;
- iomap->bdev = inode->i_sb->s_bdev;
iomap->offset = (u64)first_block << blkbits;
- iomap->dax_dev = sbi->s_daxdev;
+ if (flags & IOMAP_DAX)
+ iomap->dax_dev = sbi->s_daxdev;
+ else
+ iomap->bdev = inode->i_sb->s_bdev;
if (ret == 0) {
iomap->type = IOMAP_HOLE;
@@ -827,6 +830,8 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
} else {
iomap->type = IOMAP_MAPPED;
iomap->addr = (u64)bno << blkbits;
+ if (flags & IOMAP_DAX)
+ iomap->addr += sbi->s_dax_part_off;
iomap->length = (u64)ret << blkbits;
iomap->flags |= IOMAP_F_MERGED;
}
@@ -1297,9 +1302,9 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
inode_dio_wait(inode);
if (IS_DAX(inode)) {
- error = iomap_zero_range(inode, newsize,
- PAGE_ALIGN(newsize) - newsize, NULL,
- &ext2_iomap_ops);
+ error = dax_zero_range(inode, newsize,
+ PAGE_ALIGN(newsize) - newsize, NULL,
+ &ext2_iomap_ops);
} else if (test_opt(inode->i_sb, NOBH))
error = nobh_truncate_page(inode->i_mapping,
newsize, ext2_get_block);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d8d580b609ba..94f1fbd7d3ac 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -802,7 +802,6 @@ static unsigned long descriptor_loc(struct super_block *sb,
static int ext2_fill_super(struct super_block *sb, void *data, int silent)
{
- struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
struct buffer_head * bh;
struct ext2_sb_info * sbi;
struct ext2_super_block * es;
@@ -822,17 +821,17 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
- goto failed;
+ return -ENOMEM;
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
if (!sbi->s_blockgroup_lock) {
kfree(sbi);
- goto failed;
+ return -ENOMEM;
}
sb->s_fs_info = sbi;
sbi->s_sb_block = sb_block;
- sbi->s_daxdev = dax_dev;
+ sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off);
spin_lock_init(&sbi->s_lock);
ret = -EINVAL;
@@ -946,11 +945,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if (test_opt(sb, DAX)) {
- if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
- bdev_nr_sectors(sb->s_bdev))) {
+ if (!sbi->s_daxdev) {
ext2_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
clear_opt(sbi->s_mount_opt, DAX);
+ } else if (blocksize != PAGE_SIZE) {
+ ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
+ clear_opt(sbi->s_mount_opt, DAX);
}
}
@@ -1199,11 +1200,10 @@ failed_mount_group_desc:
failed_mount:
brelse(bh);
failed_sbi:
+ fs_put_dax(sbi->s_daxdev);
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
-failed:
- fs_put_dax(dax_dev);
return ret;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 715ee206dfe1..71a3cdceaa03 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1699,6 +1699,7 @@ struct ext4_sb_info {
*/
struct percpu_rw_semaphore s_writepages_rwsem;
struct dax_device *s_daxdev;
+ u64 s_dax_part_off;
#ifdef CONFIG_EXT4_DEBUG
unsigned long s_simulate_fail;
#endif
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9dbeb772de60..5f79d265d06a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -41,6 +41,7 @@
#include <linux/bitops.h>
#include <linux/iomap.h>
#include <linux/iversion.h>
+#include <linux/dax.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -3253,7 +3254,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
struct ext4_map_blocks *map, loff_t offset,
- loff_t length)
+ loff_t length, unsigned int flags)
{
u8 blkbits = inode->i_blkbits;
@@ -3270,8 +3271,10 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
if (map->m_flags & EXT4_MAP_NEW)
iomap->flags |= IOMAP_F_NEW;
- iomap->bdev = inode->i_sb->s_bdev;
- iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
+ if (flags & IOMAP_DAX)
+ iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
+ else
+ iomap->bdev = inode->i_sb->s_bdev;
iomap->offset = (u64) map->m_lblk << blkbits;
iomap->length = (u64) map->m_len << blkbits;
@@ -3291,9 +3294,13 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
if (map->m_flags & EXT4_MAP_UNWRITTEN) {
iomap->type = IOMAP_UNWRITTEN;
iomap->addr = (u64) map->m_pblk << blkbits;
+ if (flags & IOMAP_DAX)
+ iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
} else if (map->m_flags & EXT4_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
iomap->addr = (u64) map->m_pblk << blkbits;
+ if (flags & IOMAP_DAX)
+ iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
} else {
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
@@ -3330,8 +3337,8 @@ retry:
* DAX and direct I/O are the only two operations that are currently
* supported with IOMAP_WRITE.
*/
- WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT));
- if (IS_DAX(inode))
+ WARN_ON(!(flags & (IOMAP_DAX | IOMAP_DIRECT)));
+ if (flags & IOMAP_DAX)
m_flags = EXT4_GET_BLOCKS_CREATE_ZERO;
/*
* We use i_size instead of i_disksize here because delalloc writeback
@@ -3402,7 +3409,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret < 0)
return ret;
out:
- ext4_set_iomap(inode, iomap, &map, offset, length);
+ ext4_set_iomap(inode, iomap, &map, offset, length, flags);
return 0;
}
@@ -3522,7 +3529,7 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
delalloc = ext4_iomap_is_delalloc(inode, &map);
set_iomap:
- ext4_set_iomap(inode, iomap, &map, offset, length);
+ ext4_set_iomap(inode, iomap, &map, offset, length, flags);
if (delalloc && iomap->type == IOMAP_HOLE)
iomap->type = IOMAP_DELALLOC;
@@ -3762,8 +3769,8 @@ static int ext4_block_zero_page_range(handle_t *handle,
length = max;
if (IS_DAX(inode)) {
- return iomap_zero_range(inode, from, length, NULL,
- &ext4_iomap_ops);
+ return dax_zero_range(inode, from, length, NULL,
+ &ext4_iomap_ops);
}
return __ext4_block_zero_page_range(handle, mapping, from, length);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9a936ecbaa3b..0343f682504d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4338,7 +4338,7 @@ static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
if (!sbi)
return NULL;
- sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev);
+ sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off);
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
@@ -4756,9 +4756,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount;
}
- if (dax_supported(sbi->s_daxdev, sb->s_bdev, blocksize, 0,
- bdev_nr_sectors(sb->s_bdev)))
- set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
+ if (sbi->s_daxdev) {
+ if (blocksize == PAGE_SIZE)
+ set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
+ else
+ ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
+ }
if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 40ce9a1c12e5..038ed0b9aaa5 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -45,7 +45,7 @@ config FUSE_DAX
select INTERVAL_TREE
depends on VIRTIO_FS
depends on FS_DAX
- depends on DAX_DRIVER
+ depends on DAX
help
This allows bypassing guest page cache and allows mapping host page
cache directly in guest address space.
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index e54dc069587d..74e627109b79 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -765,20 +765,6 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
}
-static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev,
- pgoff_t pgoff, void *addr,
- size_t bytes, struct iov_iter *i)
-{
- return copy_from_iter(addr, bytes, i);
-}
-
-static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev,
- pgoff_t pgoff, void *addr,
- size_t bytes, struct iov_iter *i)
-{
- return copy_to_iter(addr, bytes, i);
-}
-
static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
pgoff_t pgoff, size_t nr_pages)
{
@@ -795,8 +781,6 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
static const struct dax_operations virtio_fs_dax_ops = {
.direct_access = virtio_fs_direct_access,
- .copy_from_iter = virtio_fs_copy_from_iter,
- .copy_to_iter = virtio_fs_copy_to_iter,
.zero_page_range = virtio_fs_zero_page_range,
};
@@ -862,7 +846,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
- fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0);
+ fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
if (IS_ERR(fs->dax_dev))
return PTR_ERR(fs->dax_dev);
diff --git a/fs/iomap/Makefile b/fs/iomap/Makefile
index 4143a3ff89db..fc070184b7fa 100644
--- a/fs/iomap/Makefile
+++ b/fs/iomap/Makefile
@@ -9,9 +9,9 @@ ccflags-y += -I $(srctree)/$(src) # needed for trace events
obj-$(CONFIG_FS_IOMAP) += iomap.o
iomap-y += trace.o \
- buffered-io.o \
+ iter.o
+iomap-$(CONFIG_BLOCK) += buffered-io.o \
direct-io.o \
fiemap.o \
- iter.o \
seek.o
iomap-$(CONFIG_SWAP) += swapfile.o
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index c6b3a148e898..c938bbad075e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -897,7 +897,6 @@ EXPORT_SYMBOL_GPL(iomap_file_unshare);
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
- struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
loff_t length = iomap_length(iter);
@@ -913,14 +912,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
size_t offset;
size_t bytes = min_t(u64, SIZE_MAX, length);
- if (IS_DAX(iter->inode)) {
- s64 tmp = dax_iomap_zero(pos, bytes, iomap);
- if (tmp < 0)
- return tmp;
- bytes = tmp;
- goto good;
- }
-
status = iomap_write_begin(iter, pos, bytes, &folio);
if (status)
return status;
@@ -933,7 +924,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
folio_mark_accessed(folio);
bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
-good:
if (WARN_ON_ONCE(bytes == 0))
return -EIO;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 4dccd4d90622..74198dd82b03 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4551,7 +4551,7 @@ xfs_bmapi_convert_delalloc(
* the extent. Just return the real extent at this offset.
*/
if (!isnullstartblock(bma.got.br_startblock)) {
- xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
+ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
*seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel;
}
@@ -4598,7 +4598,7 @@ xfs_bmapi_convert_delalloc(
XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock));
- xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
+ xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
*seq = READ_ONCE(ifp->if_seq);
if (whichfork == XFS_COW_FORK)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4098a9875c5b..2705f91bdd0d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -359,7 +359,7 @@ retry:
isnullstartblock(imap.br_startblock))
goto allocate_blocks;
- xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0);
+ xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0);
trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
return 0;
allocate_blocks:
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 73a36b7be3bd..797ea0c8b14e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1001,7 +1001,7 @@ xfs_free_file_space(
/*
* Now that we've unmap all full blocks we'll have to zero out any
- * partial block at the beginning and/or end. iomap_zero_range is smart
+ * partial block at the beginning and/or end. xfs_zero_range is smart
* enough to skip any holes, including those we just created, but we
* must take care not to zero beyond EOF and enlarge i_size.
*/
@@ -1009,15 +1009,14 @@ xfs_free_file_space(
return 0;
if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset;
- error = iomap_zero_range(VFS_I(ip), offset, len, NULL,
- &xfs_buffered_write_iomap_ops);
+ error = xfs_zero_range(ip, offset, len, NULL);
if (error)
return error;
/*
* If we zeroed right up to EOF and EOF straddles a page boundary we
* must make sure that the post-EOF area is also zeroed because the
- * page could be mmap'd and iomap_zero_range doesn't do that for us.
+ * page could be mmap'd and xfs_zero_range doesn't do that for us.
* Writeback of the eof page will do this, albeit clumsily.
*/
if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 631c5a61d89b..bbb0fbd34e64 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1892,6 +1892,7 @@ xfs_free_buftarg(
list_lru_destroy(&btp->bt_lru);
blkdev_issue_flush(btp->bt_bdev);
+ fs_put_dax(btp->bt_daxdev);
kmem_free(btp);
}
@@ -1932,11 +1933,10 @@ xfs_setsize_buftarg_early(
return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
}
-xfs_buftarg_t *
+struct xfs_buftarg *
xfs_alloc_buftarg(
struct xfs_mount *mp,
- struct block_device *bdev,
- struct dax_device *dax_dev)
+ struct block_device *bdev)
{
xfs_buftarg_t *btp;
@@ -1945,7 +1945,7 @@ xfs_alloc_buftarg(
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
- btp->bt_daxdev = dax_dev;
+ btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off);
/*
* Buffer IO error rate limiting. Limit it to no more than 10 messages
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 6b0200b8007d..edcb6254fa6a 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -89,6 +89,7 @@ typedef struct xfs_buftarg {
dev_t bt_dev;
struct block_device *bt_bdev;
struct dax_device *bt_daxdev;
+ u64 bt_dax_part_off;
struct xfs_mount *bt_mount;
unsigned int bt_meta_sectorsize;
size_t bt_meta_sectormask;
@@ -338,8 +339,8 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
/*
* Handling of buftargs.
*/
-extern struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *,
- struct block_device *, struct dax_device *);
+struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
+ struct block_device *bdev);
extern void xfs_free_buftarg(struct xfs_buftarg *);
extern void xfs_buftarg_wait(struct xfs_buftarg *);
extern void xfs_buftarg_drain(struct xfs_buftarg *);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 27594738b0d1..8d4c5ca261bd 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -437,8 +437,7 @@ restart:
}
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
- error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
- NULL, &xfs_buffered_write_iomap_ops);
+ error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
if (error)
return error;
} else
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 093758440ad5..e552ce541ec2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -28,7 +28,6 @@
#include "xfs_dquot.h"
#include "xfs_reflink.h"
-
#define XFS_ALLOC_ALIGN(mp, off) \
(((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
@@ -54,7 +53,8 @@ xfs_bmbt_to_iomap(
struct xfs_inode *ip,
struct iomap *iomap,
struct xfs_bmbt_irec *imap,
- u16 flags)
+ unsigned int mapping_flags,
+ u16 iomap_flags)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
@@ -71,16 +71,22 @@ xfs_bmbt_to_iomap(
iomap->type = IOMAP_DELALLOC;
} else {
iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock));
+ if (mapping_flags & IOMAP_DAX)
+ iomap->addr += target->bt_dax_part_off;
+
if (imap->br_state == XFS_EXT_UNWRITTEN)
iomap->type = IOMAP_UNWRITTEN;
else
iomap->type = IOMAP_MAPPED;
+
}
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
- iomap->bdev = target->bt_bdev;
- iomap->dax_dev = target->bt_daxdev;
- iomap->flags = flags;
+ if (mapping_flags & IOMAP_DAX)
+ iomap->dax_dev = target->bt_daxdev;
+ else
+ iomap->bdev = target->bt_bdev;
+ iomap->flags = iomap_flags;
if (xfs_ipincount(ip) &&
(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
@@ -188,6 +194,7 @@ xfs_iomap_write_direct(
struct xfs_inode *ip,
xfs_fileoff_t offset_fsb,
xfs_fileoff_t count_fsb,
+ unsigned int flags,
struct xfs_bmbt_irec *imap)
{
struct xfs_mount *mp = ip->i_mount;
@@ -229,7 +236,7 @@ xfs_iomap_write_direct(
* the reserve block pool for bmbt block allocation if there is no space
* left but we need to do unwritten extent conversion.
*/
- if (IS_DAX(VFS_I(ip))) {
+ if (flags & IOMAP_DAX) {
bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
if (imap->br_state == XFS_EXT_UNWRITTEN) {
force = true;
@@ -620,7 +627,7 @@ imap_needs_alloc(
imap->br_startblock == DELAYSTARTBLOCK)
return true;
/* we convert unwritten extents before copying the data for DAX */
- if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN)
+ if ((flags & IOMAP_DAX) && imap->br_state == XFS_EXT_UNWRITTEN)
return true;
return false;
}
@@ -800,7 +807,7 @@ xfs_direct_write_iomap_begin(
xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags);
allocate_blocks:
error = -EAGAIN;
@@ -826,23 +833,24 @@ allocate_blocks:
xfs_iunlock(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
- &imap);
+ flags, &imap);
if (error)
return error;
trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
+ iomap_flags | IOMAP_F_NEW);
out_found_cow:
xfs_iunlock(ip, lockmode);
length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
if (imap.br_startblock != HOLESTARTBLOCK) {
- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
if (error)
return error;
}
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED);
out_unlock:
if (lockmode)
@@ -1052,23 +1060,24 @@ retry:
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW);
found_imap:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
found_cow:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (imap.br_startoff <= offset_fsb) {
- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
if (error)
return error;
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
+ IOMAP_F_SHARED);
}
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1177,7 +1186,8 @@ xfs_read_iomap_begin(
if (error)
return error;
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
+ shared ? IOMAP_F_SHARED : 0);
}
const struct iomap_ops xfs_read_iomap_ops = {
@@ -1236,7 +1246,8 @@ xfs_seek_iomap_begin(
if (data_fsb < cow_fsb + cmap.br_blockcount)
end_fsb = min(end_fsb, data_fsb);
xfs_trim_extent(&cmap, offset_fsb, end_fsb);
- error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
+ error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
+ IOMAP_F_SHARED);
/*
* This is a COW extent, so we must probe the page cache
* because there could be dirty page cache being backed
@@ -1258,7 +1269,7 @@ xfs_seek_iomap_begin(
imap.br_state = XFS_EXT_NORM;
done:
xfs_trim_extent(&imap, offset_fsb, end_fsb);
- error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
+ error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
out_unlock:
xfs_iunlock(ip, lockmode);
return error;
@@ -1305,9 +1316,40 @@ out_unlock:
if (error)
return error;
ASSERT(nimaps);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
}
const struct iomap_ops xfs_xattr_iomap_ops = {
.iomap_begin = xfs_xattr_iomap_begin,
};
+
+int
+xfs_zero_range(
+ struct xfs_inode *ip,
+ loff_t pos,
+ loff_t len,
+ bool *did_zero)
+{
+ struct inode *inode = VFS_I(ip);
+
+ if (IS_DAX(inode))
+ return dax_zero_range(inode, pos, len, did_zero,
+ &xfs_direct_write_iomap_ops);
+ return iomap_zero_range(inode, pos, len, did_zero,
+ &xfs_buffered_write_iomap_ops);
+}
+
+int
+xfs_truncate_page(
+ struct xfs_inode *ip,
+ loff_t pos,
+ bool *did_zero)
+{
+ struct inode *inode = VFS_I(ip);
+
+ if (IS_DAX(inode))
+ return dax_truncate_page(inode, pos, did_zero,
+ &xfs_direct_write_iomap_ops);
+ return iomap_truncate_page(inode, pos, did_zero,
+ &xfs_buffered_write_iomap_ops);
+}
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 7d3703556d0e..e88dc162c785 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -12,13 +12,19 @@ struct xfs_inode;
struct xfs_bmbt_irec;
int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
- xfs_fileoff_t count_fsb, struct xfs_bmbt_irec *imap);
+ xfs_fileoff_t count_fsb, unsigned int flags,
+ struct xfs_bmbt_irec *imap);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
xfs_fileoff_t end_fsb);
-int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
- struct xfs_bmbt_irec *, u16);
+int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap,
+ struct xfs_bmbt_irec *imap, unsigned int mapping_flags,
+ u16 iomap_flags);
+
+int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len,
+ bool *did_zero);
+int xfs_truncate_page(struct xfs_inode *ip, loff_t pos, bool *did_zero);
static inline xfs_filblks_t
xfs_aligned_fsb_count(
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 3447c19e99da..b79b3846e71b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -890,8 +890,8 @@ xfs_setattr_size(
*/
if (newsize > oldsize) {
trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
- error = iomap_zero_range(inode, oldsize, newsize - oldsize,
- &did_zeroing, &xfs_buffered_write_iomap_ops);
+ error = xfs_zero_range(ip, oldsize, newsize - oldsize,
+ &did_zeroing);
} else {
/*
* iomap won't detect a dirty page over an unwritten block (or a
@@ -903,8 +903,7 @@ xfs_setattr_size(
newsize);
if (error)
return error;
- error = iomap_truncate_page(inode, newsize, &did_zeroing,
- &xfs_buffered_write_iomap_ops);
+ error = xfs_truncate_page(ip, newsize, &did_zeroing);
}
if (error)
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 5e1d29d8b2e7..d6334abbc0b3 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -155,7 +155,7 @@ xfs_fs_map_blocks(
xfs_iunlock(ip, lock_flags);
error = xfs_iomap_write_direct(ip, offset_fsb,
- end_fsb - offset_fsb, &imap);
+ end_fsb - offset_fsb, 0, &imap);
if (error)
goto out_unlock;
@@ -173,7 +173,7 @@ xfs_fs_map_blocks(
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
+ error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0);
*device_generation = mp->m_generation;
return error;
out_unlock:
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 8b6c7163f684..db70060e7bf6 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1272,8 +1272,7 @@ xfs_reflink_zero_posteof(
return 0;
trace_xfs_zero_eof(ip, isize, pos - isize);
- return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL,
- &xfs_buffered_write_iomap_ops);
+ return xfs_zero_range(ip, isize, pos - isize, NULL);
}
/*
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c7ac486ca5d3..e8f37bdc8354 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -331,13 +331,34 @@ xfs_set_inode_alloc(
return xfs_is_inode32(mp) ? maxagi : agcount;
}
-static bool
-xfs_buftarg_is_dax(
- struct super_block *sb,
- struct xfs_buftarg *bt)
+static int
+xfs_setup_dax_always(
+ struct xfs_mount *mp)
{
- return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0,
- bdev_nr_sectors(bt->bt_bdev));
+ if (!mp->m_ddev_targp->bt_daxdev &&
+ (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
+ xfs_alert(mp,
+ "DAX unsupported by block device. Turning off DAX.");
+ goto disable_dax;
+ }
+
+ if (mp->m_super->s_blocksize != PAGE_SIZE) {
+ xfs_alert(mp,
+ "DAX not supported for blocksize. Turning off DAX.");
+ goto disable_dax;
+ }
+
+ if (xfs_has_reflink(mp)) {
+ xfs_alert(mp, "DAX and reflink cannot be used together!");
+ return -EINVAL;
+ }
+
+ xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ return 0;
+
+disable_dax:
+ xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
+ return 0;
}
STATIC int
@@ -370,26 +391,19 @@ STATIC void
xfs_close_devices(
struct xfs_mount *mp)
{
- struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
-
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
- struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
xfs_free_buftarg(mp->m_logdev_targp);
xfs_blkdev_put(logdev);
- fs_put_dax(dax_logdev);
}
if (mp->m_rtdev_targp) {
struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
- struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
xfs_free_buftarg(mp->m_rtdev_targp);
xfs_blkdev_put(rtdev);
- fs_put_dax(dax_rtdev);
}
xfs_free_buftarg(mp->m_ddev_targp);
- fs_put_dax(dax_ddev);
}
/*
@@ -407,8 +421,6 @@ xfs_open_devices(
struct xfs_mount *mp)
{
struct block_device *ddev = mp->m_super->s_bdev;
- struct dax_device *dax_ddev = fs_dax_get_by_bdev(ddev);
- struct dax_device *dax_logdev = NULL, *dax_rtdev = NULL;
struct block_device *logdev = NULL, *rtdev = NULL;
int error;
@@ -418,8 +430,7 @@ xfs_open_devices(
if (mp->m_logname) {
error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
if (error)
- goto out;
- dax_logdev = fs_dax_get_by_bdev(logdev);
+ return error;
}
if (mp->m_rtname) {
@@ -433,25 +444,24 @@ xfs_open_devices(
error = -EINVAL;
goto out_close_rtdev;
}
- dax_rtdev = fs_dax_get_by_bdev(rtdev);
}
/*
* Setup xfs_mount buffer target pointers
*/
error = -ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
+ mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
if (!mp->m_ddev_targp)
goto out_close_rtdev;
if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
+ mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
if (!mp->m_rtdev_targp)
goto out_free_ddev_targ;
}
if (logdev && logdev != ddev) {
- mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
+ mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
if (!mp->m_logdev_targp)
goto out_free_rtdev_targ;
} else {
@@ -467,14 +477,9 @@ xfs_open_devices(
xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev:
xfs_blkdev_put(rtdev);
- fs_put_dax(dax_rtdev);
out_close_logdev:
- if (logdev && logdev != ddev) {
+ if (logdev && logdev != ddev)
xfs_blkdev_put(logdev);
- fs_put_dax(dax_logdev);
- }
- out:
- fs_put_dax(dax_ddev);
return error;
}
@@ -1593,26 +1598,9 @@ xfs_fs_fill_super(
sb->s_flags |= SB_I_VERSION;
if (xfs_has_dax_always(mp)) {
- bool rtdev_is_dax = false, datadev_is_dax;
-
- xfs_warn(mp,
- "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
-
- datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp);
- if (mp->m_rtdev_targp)
- rtdev_is_dax = xfs_buftarg_is_dax(sb,
- mp->m_rtdev_targp);
- if (!rtdev_is_dax && !datadev_is_dax) {
- xfs_alert(mp,
- "DAX unsupported by block device. Turning off DAX.");
- xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
- }
- if (xfs_has_reflink(mp)) {
- xfs_alert(mp,
- "DAX and reflink cannot be used together!");
- error = -EINVAL;
+ error = xfs_setup_dax_always(mp);
+ if (error)
goto out_filestream_unmount;
- }
}
if (xfs_has_discard(mp)) {