From f74681ba2006434be195402e0b15fc5763cddd7e Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 29 Jun 2020 14:44:36 -0700 Subject: xfs: preserve rmapbt swapext block reservation from freed blocks The rmapbt extent swap algorithm remaps individual extents between the source inode and the target to trigger reverse mapping metadata updates. If either inode straddles a format or other bmap allocation boundary, the individual unmap and map cycles can trigger repeated bmap block allocations and frees as the extent count bounces back and forth across the boundary. While net block usage is bound across the swap operation, this behavior can prematurely exhaust the transaction block reservation because it continuously drains as the transaction rolls. Each allocation accounts against the reservation and each free returns to global free space on transaction roll. The previous workaround to this problem attempted to detect this boundary condition and provide surplus block reservation to acommodate it. This is insufficient because more remaps can occur than implied by the extent counts; if start offset boundaries are not aligned between the two inodes, for example. To address this problem more generically and dynamically, add a transaction accounting mode that returns freed blocks to the transaction reservation instead of the superblock counters on transaction roll and use it when the rmapbt based algorithm is active. This allows the chain of remap transactions to preserve the block reservation based own its own frees and prevent premature exhaustion regardless of the remap pattern. Note that this is only safe for superblocks with lazy sb accounting, but the latter is required for v5 supers and the rmap feature depends on v5. Fixes: b3fed434822d0 ("xfs: account format bouncing into rmapbt swapext tx reservation") Root-caused-by: Darrick J. Wong Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_trans.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 3c94e5ff4316..0ad72a83edac 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -107,7 +107,8 @@ xfs_trans_dup( ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE) | - (tp->t_flags & XFS_TRANS_NO_WRITECOUNT); + (tp->t_flags & XFS_TRANS_NO_WRITECOUNT) | + (tp->t_flags & XFS_TRANS_RES_FDBLKS); /* We gave our writer reference to the new transaction */ tp->t_flags |= XFS_TRANS_NO_WRITECOUNT; ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); @@ -272,6 +273,8 @@ xfs_trans_alloc( */ WARN_ON(resp->tr_logres > 0 && mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); + ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || + xfs_sb_version_haslazysbcount(&mp->m_sb)); tp->t_magic = XFS_TRANS_HEADER_MAGIC; tp->t_flags = flags; @@ -365,6 +368,20 @@ xfs_trans_mod_sb( tp->t_blk_res_used += (uint)-delta; if (tp->t_blk_res_used > tp->t_blk_res) xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + } else if (delta > 0 && (tp->t_flags & XFS_TRANS_RES_FDBLKS)) { + int64_t blkres_delta; + + /* + * Return freed blocks directly to the reservation + * instead of the global pool, being careful not to + * overflow the trans counter. This is used to preserve + * reservation across chains of transaction rolls that + * repeatedly free and allocate blocks. + */ + blkres_delta = min_t(int64_t, delta, + UINT_MAX - tp->t_blk_res); + tp->t_blk_res += blkres_delta; + delta -= blkres_delta; } tp->t_fdblocks_delta += delta; if (xfs_sb_version_haslazysbcount(&mp->m_sb)) -- cgit From 32a2b11f467642ea700bc0b01f4693e52ec0fabd Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 22 Jul 2020 09:23:10 -0700 Subject: xfs: Remove kmem_zone_zalloc() usage Use kmem_cache_zalloc() directly. With the exception of xlog_ticket_alloc() which will be dealt on the next patch for readability. Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_trans.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_trans.c') diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 0ad72a83edac..ed72867b1a19 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -90,7 +90,7 @@ xfs_trans_dup( trace_xfs_trans_dup(tp, _RET_IP_); - ntp = kmem_zone_zalloc(xfs_trans_zone, 0); + ntp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); /* * Initialize the new transaction structure. @@ -263,7 +263,7 @@ xfs_trans_alloc( * GFP_NOFS allocation context so that we avoid lockdep false positives * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ - tp = kmem_zone_zalloc(xfs_trans_zone, 0); + tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); -- cgit