diff options
| author | Ingo Molnar <mingo@kernel.org> | 2025-03-08 00:54:06 +0100 | 
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2025-03-08 00:54:06 +0100 | 
| commit | f23ecef20af6fbd489e0362d33cdf8d9429fa901 (patch) | |
| tree | 713f06d8335b7c3388bbfbc46cb6d2a568951252 /fs/xfs/xfs_aops.c | |
| parent | c929d08df8bee855528b9d15b853c892c54e1eee (diff) | |
| parent | 85b2b9c16d053364e2004883140538e73b333cdb (diff) | |
Merge branch 'locking/urgent' into locking/core, to pick up locking fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs/xfs/xfs_aops.c')
| -rw-r--r-- | fs/xfs/xfs_aops.c | 41 | 
1 files changed, 37 insertions, 4 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 67877c36ed11..6d9965b546cb 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -19,6 +19,7 @@  #include "xfs_reflink.h"  #include "xfs_errortag.h"  #include "xfs_error.h" +#include "xfs_icache.h"  struct xfs_writepage_ctx {  	struct iomap_writepage_ctx ctx; @@ -528,12 +529,44 @@ xfs_vm_readahead(  }  static int -xfs_iomap_swapfile_activate( +xfs_vm_swap_activate(  	struct swap_info_struct		*sis,  	struct file			*swap_file,  	sector_t			*span)  { -	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; +	struct xfs_inode		*ip = XFS_I(file_inode(swap_file)); + +	/* +	 * Swap file activation can race against concurrent shared extent +	 * removal in files that have been cloned.  If this happens, +	 * iomap_swapfile_iter() can fail because it encountered a shared +	 * extent even though an operation is in progress to remove those +	 * shared extents. +	 * +	 * This race becomes problematic when we defer extent removal +	 * operations beyond the end of a syscall (i.e. use async background +	 * processing algorithms).  Users think the extents are no longer +	 * shared, but iomap_swapfile_iter() still sees them as shared +	 * because the refcountbt entries for the extents being removed have +	 * not yet been updated.  Hence the swapon call fails unexpectedly. +	 * +	 * The race condition is currently most obvious from the unlink() +	 * operation as extent removal is deferred until after the last +	 * reference to the inode goes away.  We then process the extent +	 * removal asynchronously, hence triggers the "syscall completed but +	 * work not done" condition mentioned above.  To close this race +	 * window, we need to flush any pending inodegc operations to ensure +	 * they have updated the refcountbt records before we try to map the +	 * swapfile. +	 */ +	xfs_inodegc_flush(ip->i_mount); + +	/* +	 * Direct the swap code to the correct block device when this file +	 * sits on the RT device. +	 */ +	sis->bdev = xfs_inode_buftarg(ip)->bt_bdev; +  	return iomap_swapfile_activate(sis, swap_file, span,  			&xfs_read_iomap_ops);  } @@ -549,11 +582,11 @@ const struct address_space_operations xfs_address_space_operations = {  	.migrate_folio		= filemap_migrate_folio,  	.is_partially_uptodate  = iomap_is_partially_uptodate,  	.error_remove_folio	= generic_error_remove_folio, -	.swap_activate		= xfs_iomap_swapfile_activate, +	.swap_activate		= xfs_vm_swap_activate,  };  const struct address_space_operations xfs_dax_aops = {  	.writepages		= xfs_dax_writepages,  	.dirty_folio		= noop_dirty_folio, -	.swap_activate		= xfs_iomap_swapfile_activate, +	.swap_activate		= xfs_vm_swap_activate,  };  | 
