diff options
author | Christoph Hellwig <hch@lst.de> | 2025-07-10 15:33:28 +0200 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2025-07-14 10:51:31 +0200 |
commit | fb7399cf2d0b33825b8039f95c45395c7deba25c (patch) | |
tree | 849825cc5c90ac56c4a163db7eac3752e92d7f12 /fs/xfs/xfs_aops.c | |
parent | 40368a6acb95635824f6a45ec1de6233977309f3 (diff) |
iomap: refactor the writeback interface
Replace ->map_blocks with a new ->writeback_range, which differs in the
following ways:
- it must also queue up the I/O for writeback, that is called into the
slightly refactored and extended in scope iomap_add_to_ioend for
each region
- can handle only a part of the requested region, that is the retry
loop for partial mappings moves to the caller
- handles cleanup on failures as well, and thus also replaces the
discard_folio method only implemented by XFS.
This will allow to use the iomap writeback code also for file systems
that are not block based like fuse.
Co-developed-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/20250710133343.399917-5-hch@lst.de
Acked-by: Damien Le Moal <dlemoal@kernel.org> # zonefs
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/xfs/xfs_aops.c')
-rw-r--r-- | fs/xfs/xfs_aops.c | 128 |
1 files changed, 82 insertions, 46 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 65485a52df3b..f6d44ab78442 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -234,6 +234,47 @@ xfs_end_bio( } /* + * We cannot cancel the ioend directly on error. We may have already set other + * pages under writeback and hence we have to run I/O completion to mark the + * error state of the pages under writeback appropriately. + * + * If the folio has delalloc blocks on it, the caller is asking us to punch them + * out. If we don't, we can leave a stale delalloc mapping covered by a clean + * page that needs to be dirtied again before the delalloc mapping can be + * converted. This stale delalloc mapping can trip up a later direct I/O read + * operation on the same region. + * + * We prevent this by truncating away the delalloc regions on the folio. Because + * they are delalloc, we can do this without needing a transaction. Indeed - if + * we get ENOSPC errors, we have to be able to do this truncation without a + * transaction as there is no space left for block reservation (typically why + * we see a ENOSPC in writeback). + */ +static void +xfs_discard_folio( + struct folio *folio, + loff_t pos) +{ + struct xfs_inode *ip = XFS_I(folio->mapping->host); + struct xfs_mount *mp = ip->i_mount; + + if (xfs_is_shutdown(mp)) + return; + + xfs_alert_ratelimited(mp, + "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", + folio, ip->i_ino, pos); + + /* + * The end of the punch range is always the offset of the first + * byte of the next folio. Hence the end offset is only dependent on the + * folio itself and not the start offset that is passed in. + */ + xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos, + folio_pos(folio) + folio_size(folio), NULL); +} + +/* * Fast revalidation of the cached writeback mapping. Return true if the current * mapping is valid, false otherwise. */ @@ -278,13 +319,12 @@ xfs_imap_valid( static int xfs_map_blocks( struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset, unsigned int len) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(wpc->inode); struct xfs_mount *mp = ip->i_mount; - ssize_t count = i_blocksize(inode); + ssize_t count = i_blocksize(wpc->inode); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_fileoff_t cow_fsb; @@ -436,6 +476,24 @@ allocate_blocks: return 0; } +static ssize_t +xfs_writeback_range( + struct iomap_writepage_ctx *wpc, + struct folio *folio, + u64 offset, + unsigned int len, + u64 end_pos) +{ + ssize_t ret; + + ret = xfs_map_blocks(wpc, offset, len); + if (!ret) + ret = iomap_add_to_ioend(wpc, folio, offset, end_pos, len); + if (ret < 0) + xfs_discard_folio(folio, offset); + return ret; +} + static bool xfs_ioend_needs_wq_completion( struct iomap_ioend *ioend) @@ -488,47 +546,9 @@ xfs_submit_ioend( return 0; } -/* - * If the folio has delalloc blocks on it, the caller is asking us to punch them - * out. If we don't, we can leave a stale delalloc mapping covered by a clean - * page that needs to be dirtied again before the delalloc mapping can be - * converted. This stale delalloc mapping can trip up a later direct I/O read - * operation on the same region. - * - * We prevent this by truncating away the delalloc regions on the folio. Because - * they are delalloc, we can do this without needing a transaction. Indeed - if - * we get ENOSPC errors, we have to be able to do this truncation without a - * transaction as there is no space left for block reservation (typically why - * we see a ENOSPC in writeback). - */ -static void -xfs_discard_folio( - struct folio *folio, - loff_t pos) -{ - struct xfs_inode *ip = XFS_I(folio->mapping->host); - struct xfs_mount *mp = ip->i_mount; - - if (xfs_is_shutdown(mp)) - return; - - xfs_alert_ratelimited(mp, - "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", - folio, ip->i_ino, pos); - - /* - * The end of the punch range is always the offset of the first - * byte of the next folio. Hence the end offset is only dependent on the - * folio itself and not the start offset that is passed in. - */ - xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos, - folio_pos(folio) + folio_size(folio), NULL); -} - static const struct iomap_writeback_ops xfs_writeback_ops = { - .map_blocks = xfs_map_blocks, + .writeback_range = xfs_writeback_range, .submit_ioend = xfs_submit_ioend, - .discard_folio = xfs_discard_folio, }; struct xfs_zoned_writepage_ctx { @@ -545,11 +565,10 @@ XFS_ZWPC(struct iomap_writepage_ctx *ctx) static int xfs_zoned_map_blocks( struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset, unsigned int len) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(wpc->inode); struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + len); @@ -608,6 +627,24 @@ xfs_zoned_map_blocks( return 0; } +static ssize_t +xfs_zoned_writeback_range( + struct iomap_writepage_ctx *wpc, + struct folio *folio, + u64 offset, + unsigned int len, + u64 end_pos) +{ + ssize_t ret; + + ret = xfs_zoned_map_blocks(wpc, offset, len); + if (!ret) + ret = iomap_add_to_ioend(wpc, folio, offset, end_pos, len); + if (ret < 0) + xfs_discard_folio(folio, offset); + return ret; +} + static int xfs_zoned_submit_ioend( struct iomap_writepage_ctx *wpc, @@ -621,9 +658,8 @@ xfs_zoned_submit_ioend( } static const struct iomap_writeback_ops xfs_zoned_writeback_ops = { - .map_blocks = xfs_zoned_map_blocks, + .writeback_range = xfs_zoned_writeback_range, .submit_ioend = xfs_zoned_submit_ioend, - .discard_folio = xfs_discard_folio, }; STATIC int |