diff options
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r-- | fs/xfs/xfs_reflink.c | 240 |
1 files changed, 164 insertions, 76 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 3246815c24d6..cdbd342a5249 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -49,8 +49,6 @@ #include "xfs_alloc.h" #include "xfs_quota_defs.h" #include "xfs_quota.h" -#include "xfs_btree.h" -#include "xfs_bmap_btree.h" #include "xfs_reflink.h" #include "xfs_iomap.h" #include "xfs_rmap_btree.h" @@ -273,7 +271,7 @@ xfs_reflink_reserve_cow( struct xfs_bmbt_irec got; int error = 0; bool eof = false, trimmed; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; /* * Search the COW fork extent list first. This serves two purposes: @@ -284,7 +282,7 @@ xfs_reflink_reserve_cow( * tree. */ - if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got)) + if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got)) eof = true; if (!eof && got.br_startoff <= imap->br_startoff) { trace_xfs_reflink_cow_found(ip, imap); @@ -312,7 +310,7 @@ xfs_reflink_reserve_cow( return error; error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, - imap->br_blockcount, 0, &got, &idx, eof); + imap->br_blockcount, 0, &got, &icur, eof); if (error == -ENOSPC || error == -EDQUOT) trace_xfs_reflink_cow_enospc(ip, imap); if (error) @@ -353,29 +351,22 @@ xfs_reflink_convert_cow( xfs_off_t offset, xfs_off_t count) { - struct xfs_bmbt_irec got; - struct xfs_defer_ops dfops; struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); - xfs_extnum_t idx; - bool found; - int error = 0; + xfs_filblks_t count_fsb = end_fsb - offset_fsb; + struct xfs_bmbt_irec imap; + struct xfs_defer_ops dfops; + xfs_fsblock_t first_block = NULLFSBLOCK; + int nimaps = 1, error = 0; - xfs_ilock(ip, XFS_ILOCK_EXCL); + ASSERT(count != 0); - /* Convert all the extents to real from unwritten. */ - for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); - found && got.br_startoff < end_fsb; - found = xfs_iext_get_extent(ifp, ++idx, &got)) { - error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, - end_fsb - offset_fsb, &dfops); - if (error) - break; - } - - /* Finish up. */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb, + XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT | + XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap, &nimaps, + &dfops); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -399,17 +390,17 @@ xfs_reflink_allocate_cow( bool trimmed; xfs_filblks_t resaligned; xfs_extlen_t resblks = 0; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; retry: ASSERT(xfs_is_reflink_inode(ip)); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); /* * Even if the extent is not shared we might have a preallocation for * it in the COW fork. If so use it. */ - if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) && + if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) && got.br_startoff <= offset_fsb) { *shared = true; @@ -463,6 +454,8 @@ retry: if (error) goto out_bmap_cancel; + xfs_inode_set_cowblocks_tag(ip); + /* Finish up. */ error = xfs_defer_finish(&tp, &dfops); if (error) @@ -471,6 +464,13 @@ retry: error = xfs_trans_commit(tp); if (error) return error; + + /* + * Allocation succeeded but the requested range was not even partially + * satisfied? Bail out! + */ + if (nimaps == 0) + return -ENOSPC; convert: return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, &dfops); @@ -496,13 +496,14 @@ xfs_reflink_find_cow_mapping( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); xfs_fileoff_t offset_fsb; struct xfs_bmbt_irec got; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); - ASSERT(xfs_is_reflink_inode(ip)); + if (!xfs_is_reflink_inode(ip)) + return false; offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); - if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) return false; if (got.br_startoff > offset_fsb) return false; @@ -524,18 +525,18 @@ xfs_reflink_trim_irec_to_next_cow( { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; if (!xfs_is_reflink_inode(ip)) return; /* Find the extent in the CoW fork. */ - if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) + if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) return; /* This is the extent before; try sliding up one. */ if (got.br_startoff < offset_fsb) { - if (!xfs_iext_get_extent(ifp, idx + 1, &got)) + if (!xfs_iext_next_extent(ifp, &icur, &got)) return; } @@ -562,24 +563,32 @@ xfs_reflink_cancel_cow_blocks( { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; int error = 0; if (!xfs_is_reflink_inode(ip)) return 0; - if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) + if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) return 0; - while (got.br_startoff < end_fsb) { + /* Walk backwards until we're out of the I/O range... */ + while (got.br_startoff + got.br_blockcount > offset_fsb) { del = got; xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); + + /* Extent delete may have bumped ext forward */ + if (!del.br_blockcount) { + xfs_iext_prev(ifp, &icur); + goto next_extent; + } + trace_xfs_reflink_cancel_cow(ip, &del); if (isnullstartblock(del.br_startblock)) { error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, - &idx, &got, &del); + &icur, &got, &del); if (error) break; } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { @@ -597,10 +606,6 @@ xfs_reflink_cancel_cow_blocks( del.br_startblock, del.br_blockcount, NULL); - /* Update quota accounting */ - xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, - -(long)del.br_blockcount); - /* Roll the transaction */ xfs_defer_ijoin(&dfops, ip); error = xfs_defer_finish(tpp, &dfops); @@ -610,10 +615,20 @@ xfs_reflink_cancel_cow_blocks( } /* Remove the mapping from the CoW fork. */ - xfs_bmap_del_extent_cow(ip, &idx, &got, &del); - } + xfs_bmap_del_extent_cow(ip, &icur, &got, &del); - if (!xfs_iext_get_extent(ifp, ++idx, &got)) + /* Remove the quota reservation */ + error = xfs_trans_reserve_quota_nblks(NULL, ip, + -(long)del.br_blockcount, 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + break; + } else { + /* Didn't do anything, push cursor back. */ + xfs_iext_prev(ifp, &icur); + } +next_extent: + if (!xfs_iext_get_extent(ifp, &icur, &got)) break; } @@ -653,7 +668,7 @@ xfs_reflink_cancel_cow_range( /* Start a rolling transaction to remove the mappings */ error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - 0, 0, 0, &tp); + 0, 0, XFS_TRANS_NOFS, &tp); if (error) goto out; @@ -698,7 +713,7 @@ xfs_reflink_end_cow( int error; unsigned int resblks; xfs_filblks_t rlen; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; trace_xfs_reflink_end_cow(ip, offset, count); @@ -726,30 +741,29 @@ xfs_reflink_end_cow( (unsigned int)(end_fsb - offset_fsb), XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - resblks, 0, 0, &tp); + resblks, 0, XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); if (error) goto out; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - /* If there is a hole at end_fsb - 1 go to the previous extent */ - if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || - got.br_startoff > end_fsb) { - ASSERT(idx > 0); - xfs_iext_get_extent(ifp, --idx, &got); - } + /* + * In case of racing, overlapping AIO writes no COW extents might be + * left by the time I/O completes for the loser of the race. In that + * case we are done. + */ + if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) + goto out_cancel; /* Walk backwards until we're out of the I/O range... */ while (got.br_startoff + got.br_blockcount > offset_fsb) { del = got; xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); - /* Extent delete may have bumped idx forward */ - if (!del.br_blockcount) { - idx--; - goto next_extent; - } + /* Extent delete may have bumped ext forward */ + if (!del.br_blockcount) + goto prev_extent; ASSERT(!isnullstartblock(got.br_startblock)); @@ -758,10 +772,8 @@ xfs_reflink_end_cow( * speculatively preallocated CoW extents that have been * allocated but have not yet been involved in a write. */ - if (got.br_state == XFS_EXT_UNWRITTEN) { - idx--; - goto next_extent; - } + if (got.br_state == XFS_EXT_UNWRITTEN) + goto prev_extent; /* Unmap the old blocks in the data fork. */ xfs_defer_init(&dfops, &firstfsb); @@ -789,15 +801,22 @@ xfs_reflink_end_cow( if (error) goto out_defer; + /* Charge this new data fork mapping to the on-disk quota. */ + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, + (long)del.br_blockcount); + /* Remove the mapping from the CoW fork. */ - xfs_bmap_del_extent_cow(ip, &idx, &got, &del); + xfs_bmap_del_extent_cow(ip, &icur, &got, &del); xfs_defer_ijoin(&dfops, ip); error = xfs_defer_finish(&tp, &dfops); if (error) goto out_defer; -next_extent: - if (!xfs_iext_get_extent(ifp, idx, &got)) + if (!xfs_iext_get_extent(ifp, &icur, &got)) + break; + continue; +prev_extent: + if (!xfs_iext_prev_extent(ifp, &icur, &got)) break; } @@ -809,6 +828,7 @@ next_extent: out_defer: xfs_defer_cancel(&dfops); +out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: @@ -937,7 +957,7 @@ xfs_reflink_set_inode_flag( if (src->i_ino == dest->i_ino) xfs_ilock(src, XFS_ILOCK_EXCL); else - xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL); if (!xfs_is_reflink_inode(src)) { trace_xfs_reflink_set_inode_flag(src); @@ -1040,7 +1060,7 @@ xfs_reflink_ag_has_free_space( return 0; pag = xfs_perag_get(mp, agno); - if (xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) || + if (xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) || xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA)) error = -ENOSPC; xfs_perag_put(pag); @@ -1195,13 +1215,16 @@ xfs_reflink_remap_blocks( /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ while (len) { + uint lock_mode; + trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, dest, destoff); + /* Read extent from the source file */ nimaps = 1; - xfs_ilock(src, XFS_ILOCK_EXCL); + lock_mode = xfs_ilock_data_map_shared(src); error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); - xfs_iunlock(src, XFS_ILOCK_EXCL); + xfs_iunlock(src, lock_mode); if (error) goto err; ASSERT(nimaps == 1); @@ -1238,6 +1261,50 @@ err: } /* + * Grab the exclusive iolock for a data copy from src to dest, making + * sure to abide vfs locking order (lowest pointer value goes first) and + * breaking the pnfs layout leases on dest before proceeding. The loop + * is needed because we cannot call the blocking break_layout() with the + * src iolock held, and therefore have to back out both locks. + */ +static int +xfs_iolock_two_inodes_and_break_layout( + struct inode *src, + struct inode *dest) +{ + int error; + +retry: + if (src < dest) { + inode_lock_shared(src); + inode_lock_nested(dest, I_MUTEX_NONDIR2); + } else { + /* src >= dest */ + inode_lock(dest); + } + + error = break_layout(dest, false); + if (error == -EWOULDBLOCK) { + inode_unlock(dest); + if (src < dest) + inode_unlock_shared(src); + error = break_layout(dest, true); + if (error) + return error; + goto retry; + } + if (error) { + inode_unlock(dest); + if (src < dest) + inode_unlock_shared(src); + return error; + } + if (src > dest) + inode_lock_shared_nested(src, I_MUTEX_NONDIR2); + return 0; +} + +/* * Link a range of blocks from one file to another. */ int @@ -1267,11 +1334,14 @@ xfs_reflink_remap_range( return -EIO; /* Lock both files against IO */ - lock_two_nondirectories(inode_in, inode_out); + ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); + if (ret) + return ret; if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else - xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); + xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, + XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; @@ -1288,8 +1358,24 @@ xfs_reflink_remap_range( if (ret <= 0) goto out_unlock; + /* Attach dquots to dest inode before changing block map */ + ret = xfs_qm_dqattach(dest, 0); + if (ret) + goto out_unlock; + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + /* + * Clear out post-eof preallocations because we don't have page cache + * backing the delayed allocations and they'll never get freed on + * their own. + */ + if (xfs_can_free_eofblocks(dest, true)) { + ret = xfs_free_eofblocks(dest); + if (ret) + goto out_unlock; + } + /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) @@ -1323,10 +1409,12 @@ xfs_reflink_remap_range( is_dedupe); out_unlock: - xfs_iunlock(src, XFS_MMAPLOCK_EXCL); + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + if (!same_inode) + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + inode_unlock(inode_out); if (!same_inode) - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - unlock_two_nondirectories(inode_in, inode_out); + inode_unlock_shared(inode_in); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; @@ -1426,7 +1514,7 @@ xfs_reflink_inode_has_shared_extents( xfs_extlen_t aglen; xfs_agblock_t rbno; xfs_extlen_t rlen; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; bool found; int error; @@ -1438,7 +1526,7 @@ xfs_reflink_inode_has_shared_extents( } *has_shared = false; - found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got); + found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got); while (found) { if (isnullstartblock(got.br_startblock) || got.br_state != XFS_EXT_NORM) @@ -1457,7 +1545,7 @@ xfs_reflink_inode_has_shared_extents( return 0; } next: - found = xfs_iext_get_extent(ifp, ++idx, &got); + found = xfs_iext_next_extent(ifp, &icur, &got); } return 0; |