summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2025-03-06 11:00:18 +0100
committerChristian Brauner <brauner@kernel.org>2025-03-06 11:00:18 +0100
commitc7be0d72d5514caf0a64763d6c7a5503fb16acf1 (patch)
treede135086708083b658030fa37321bd5616acd373
parent1743d385e704c41ef028697ef44eeab987d5f5a2 (diff)
parent786e3080cbe916f6a4c0987124d4f930c1814a97 (diff)
Merge patch series "iomap preliminaries for large atomic write for xfs with CoW"
Bring in the iomap changes required for xfs to support large atomic write with CoW. * patches from https://lore.kernel.org/r/20250303171120.2837067-1-john.g.garry@oracle.com: iomap: Lift blocksize restriction on atomic writes iomap: Support SW-based atomic writes iomap: Rename IOMAP_ATOMIC -> IOMAP_ATOMIC_HW Link: https://lore.kernel.org/r/20250303171120.2837067-1-john.g.garry@oracle.com Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--Documentation/filesystems/iomap/operations.rst20
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/iomap/direct-io.c20
-rw-r--r--fs/iomap/trace.h2
-rw-r--r--include/linux/iomap.h8
5 files changed, 36 insertions, 16 deletions
diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst
index d1535109587a..b08a79d11d9f 100644
--- a/Documentation/filesystems/iomap/operations.rst
+++ b/Documentation/filesystems/iomap/operations.rst
@@ -514,8 +514,8 @@ IOMAP_WRITE`` with any combination of the following enhancements:
if the mapping is unwritten and the filesystem cannot handle zeroing
the unaligned regions without exposing stale contents.
- * ``IOMAP_ATOMIC``: This write is being issued with torn-write
- protection.
+ * ``IOMAP_ATOMIC_HW``: This write is being issued with torn-write
+ protection based on HW-offload support.
Only a single bio can be created for the write, and the write must
not be split into multiple I/O requests, i.e. flag REQ_ATOMIC must be
set.
@@ -526,8 +526,20 @@ IOMAP_WRITE`` with any combination of the following enhancements:
conversion or copy on write), all updates for the entire file range
must be committed atomically as well.
Only one space mapping is allowed per untorn write.
- Untorn writes must be aligned to, and must not be longer than, a
- single file block.
+ Untorn writes may be longer than a single file block. In all cases,
+ the mapping start disk block must have at least the same alignment as
+ the write offset.
+
+ * ``IOMAP_ATOMIC_SW``: This write is being issued with torn-write
+ protection via a software mechanism provided by the filesystem.
+ All the disk block alignment and single bio restrictions which apply
+ to IOMAP_ATOMIC_HW do not apply here.
+ SW-based untorn writes would typically be used as a fallback when
+ HW-based untorn writes may not be issued, e.g. the range of the write
+ covers multiple extents, meaning that it is not possible to issue
+ a single bio.
+ All filesystem metadata updates for the entire file range must be
+ committed atomically as well.
Callers commonly hold ``i_rwsem`` in shared or exclusive mode before
calling this function.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c54ae5fcbd4..ba2f1e3db7c7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3467,7 +3467,7 @@ static inline bool ext4_want_directio_fallback(unsigned flags, ssize_t written)
return false;
/* atomic writes are all-or-nothing */
- if (flags & IOMAP_ATOMIC)
+ if (flags & IOMAP_ATOMIC_HW)
return false;
/* can only try again if we wrote nothing */
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index e1e32e2bb0bf..5299f70428ef 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -317,7 +317,7 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
* clearing the WRITE_THROUGH flag in the dio request.
*/
static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
- const struct iomap *iomap, bool use_fua, bool atomic)
+ const struct iomap *iomap, bool use_fua, bool atomic_hw)
{
blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
@@ -329,7 +329,7 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
opflags |= REQ_FUA;
else
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
- if (atomic)
+ if (atomic_hw)
opflags |= REQ_ATOMIC;
return opflags;
@@ -340,8 +340,8 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
const struct iomap *iomap = &iter->iomap;
struct inode *inode = iter->inode;
unsigned int fs_block_size = i_blocksize(inode), pad;
+ bool atomic_hw = iter->flags & IOMAP_ATOMIC_HW;
const loff_t length = iomap_length(iter);
- bool atomic = iter->flags & IOMAP_ATOMIC;
loff_t pos = iter->pos;
blk_opf_t bio_opf;
struct bio *bio;
@@ -351,7 +351,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
u64 copied = 0;
size_t orig_count;
- if (atomic && length != fs_block_size)
+ if (atomic_hw && length != iter->len)
return -EINVAL;
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
@@ -428,7 +428,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
goto out;
}
- bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic);
+ bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic_hw);
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
do {
@@ -461,7 +461,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
}
n = bio->bi_iter.bi_size;
- if (WARN_ON_ONCE(atomic && n != length)) {
+ if (WARN_ON_ONCE(atomic_hw && n != length)) {
/*
* This bio should have covered the complete length,
* which it doesn't, so error. We may need to zero out
@@ -652,9 +652,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (iocb->ki_flags & IOCB_NOWAIT)
iomi.flags |= IOMAP_NOWAIT;
- if (iocb->ki_flags & IOCB_ATOMIC)
- iomi.flags |= IOMAP_ATOMIC;
-
if (iov_iter_rw(iter) == READ) {
/* reads can always complete inline */
dio->flags |= IOMAP_DIO_INLINE_COMP;
@@ -689,6 +686,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
iomi.flags |= IOMAP_OVERWRITE_ONLY;
}
+ if (dio_flags & IOMAP_DIO_ATOMIC_SW)
+ iomi.flags |= IOMAP_ATOMIC_SW;
+ else if (iocb->ki_flags & IOCB_ATOMIC)
+ iomi.flags |= IOMAP_ATOMIC_HW;
+
/* for data sync or sync, we need sync completion processing */
if (iocb_is_dsync(iocb)) {
dio->flags |= IOMAP_DIO_NEED_SYNC;
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index 9eab2c8ac3c5..69af89044ebd 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -99,7 +99,7 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
{ IOMAP_FAULT, "FAULT" }, \
{ IOMAP_DIRECT, "DIRECT" }, \
{ IOMAP_NOWAIT, "NOWAIT" }, \
- { IOMAP_ATOMIC, "ATOMIC" }
+ { IOMAP_ATOMIC_HW, "ATOMIC_HW" }
#define IOMAP_F_FLAGS_STRINGS \
{ IOMAP_F_NEW, "NEW" }, \
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index ea29388b2fba..9cd93530013c 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -189,8 +189,9 @@ struct iomap_folio_ops {
#else
#define IOMAP_DAX 0
#endif /* CONFIG_FS_DAX */
-#define IOMAP_ATOMIC (1 << 9)
+#define IOMAP_ATOMIC_HW (1 << 9) /* HW-based torn-write protection */
#define IOMAP_DONTCACHE (1 << 10)
+#define IOMAP_ATOMIC_SW (1 << 11)/* SW-based torn-write protection */
struct iomap_ops {
/*
@@ -502,6 +503,11 @@ struct iomap_dio_ops {
*/
#define IOMAP_DIO_PARTIAL (1 << 2)
+/*
+ * Use software-based torn-write protection.
+ */
+#define IOMAP_DIO_ATOMIC_SW (1 << 3)
+
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
unsigned int dio_flags, void *private, size_t done_before);