summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_reflink.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r--fs/xfs/xfs_reflink.c240
1 files changed, 164 insertions, 76 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3246815c24d6..cdbd342a5249 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -49,8 +49,6 @@
#include "xfs_alloc.h"
#include "xfs_quota_defs.h"
#include "xfs_quota.h"
-#include "xfs_btree.h"
-#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
#include "xfs_iomap.h"
#include "xfs_rmap_btree.h"
@@ -273,7 +271,7 @@ xfs_reflink_reserve_cow(
struct xfs_bmbt_irec got;
int error = 0;
bool eof = false, trimmed;
- xfs_extnum_t idx;
+ struct xfs_iext_cursor icur;
/*
* Search the COW fork extent list first. This serves two purposes:
@@ -284,7 +282,7 @@ xfs_reflink_reserve_cow(
* tree.
*/
- if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got))
+ if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got))
eof = true;
if (!eof && got.br_startoff <= imap->br_startoff) {
trace_xfs_reflink_cow_found(ip, imap);
@@ -312,7 +310,7 @@ xfs_reflink_reserve_cow(
return error;
error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
- imap->br_blockcount, 0, &got, &idx, eof);
+ imap->br_blockcount, 0, &got, &icur, eof);
if (error == -ENOSPC || error == -EDQUOT)
trace_xfs_reflink_cow_enospc(ip, imap);
if (error)
@@ -353,29 +351,22 @@ xfs_reflink_convert_cow(
xfs_off_t offset,
xfs_off_t count)
{
- struct xfs_bmbt_irec got;
- struct xfs_defer_ops dfops;
struct xfs_mount *mp = ip->i_mount;
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
- xfs_extnum_t idx;
- bool found;
- int error = 0;
+ xfs_filblks_t count_fsb = end_fsb - offset_fsb;
+ struct xfs_bmbt_irec imap;
+ struct xfs_defer_ops dfops;
+ xfs_fsblock_t first_block = NULLFSBLOCK;
+ int nimaps = 1, error = 0;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ ASSERT(count != 0);
- /* Convert all the extents to real from unwritten. */
- for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
- found && got.br_startoff < end_fsb;
- found = xfs_iext_get_extent(ifp, ++idx, &got)) {
- error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
- end_fsb - offset_fsb, &dfops);
- if (error)
- break;
- }
-
- /* Finish up. */
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb,
+ XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT |
+ XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap, &nimaps,
+ &dfops);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
@@ -399,17 +390,17 @@ xfs_reflink_allocate_cow(
bool trimmed;
xfs_filblks_t resaligned;
xfs_extlen_t resblks = 0;
- xfs_extnum_t idx;
+ struct xfs_iext_cursor icur;
retry:
ASSERT(xfs_is_reflink_inode(ip));
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
* Even if the extent is not shared we might have a preallocation for
* it in the COW fork. If so use it.
*/
- if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
+ if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) &&
got.br_startoff <= offset_fsb) {
*shared = true;
@@ -463,6 +454,8 @@ retry:
if (error)
goto out_bmap_cancel;
+ xfs_inode_set_cowblocks_tag(ip);
+
/* Finish up. */
error = xfs_defer_finish(&tp, &dfops);
if (error)
@@ -471,6 +464,13 @@ retry:
error = xfs_trans_commit(tp);
if (error)
return error;
+
+ /*
+ * Allocation succeeded but the requested range was not even partially
+ * satisfied? Bail out!
+ */
+ if (nimaps == 0)
+ return -ENOSPC;
convert:
return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
&dfops);
@@ -496,13 +496,14 @@ xfs_reflink_find_cow_mapping(
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
xfs_fileoff_t offset_fsb;
struct xfs_bmbt_irec got;
- xfs_extnum_t idx;
+ struct xfs_iext_cursor icur;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
- ASSERT(xfs_is_reflink_inode(ip));
+ if (!xfs_is_reflink_inode(ip))
+ return false;
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
- if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
+ if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
return false;
if (got.br_startoff > offset_fsb)
return false;
@@ -524,18 +525,18 @@ xfs_reflink_trim_irec_to_next_cow(
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got;
- xfs_extnum_t idx;
+ struct xfs_iext_cursor icur;
if (!xfs_is_reflink_inode(ip))
return;
/* Find the extent in the CoW fork. */
- if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
+ if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
return;
/* This is the extent before; try sliding up one. */
if (got.br_startoff < offset_fsb) {
- if (!xfs_iext_get_extent(ifp, idx + 1, &got))
+ if (!xfs_iext_next_extent(ifp, &icur, &got))
return;
}
@@ -562,24 +563,32 @@ xfs_reflink_cancel_cow_blocks(
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got, del;
- xfs_extnum_t idx;
+ struct xfs_iext_cursor icur;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
int error = 0;
if (!xfs_is_reflink_inode(ip))
return 0;
- if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
+ if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
return 0;
- while (got.br_startoff < end_fsb) {
+ /* Walk backwards until we're out of the I/O range... */
+ while (got.br_startoff + got.br_blockcount > offset_fsb) {
del = got;
xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+
+ /* Extent delete may have bumped ext forward */
+ if (!del.br_blockcount) {
+ xfs_iext_prev(ifp, &icur);
+ goto next_extent;
+ }
+
trace_xfs_reflink_cancel_cow(ip, &del);
if (isnullstartblock(del.br_startblock)) {
error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
- &idx, &got, &del);
+ &icur, &got, &del);
if (error)
break;
} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
@@ -597,10 +606,6 @@ xfs_reflink_cancel_cow_blocks(
del.br_startblock, del.br_blockcount,
NULL);
- /* Update quota accounting */
- xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
- -(long)del.br_blockcount);
-
/* Roll the transaction */
xfs_defer_ijoin(&dfops, ip);
error = xfs_defer_finish(tpp, &dfops);
@@ -610,10 +615,20 @@ xfs_reflink_cancel_cow_blocks(
}
/* Remove the mapping from the CoW fork. */
- xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
- }
+ xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
- if (!xfs_iext_get_extent(ifp, ++idx, &got))
+ /* Remove the quota reservation */
+ error = xfs_trans_reserve_quota_nblks(NULL, ip,
+ -(long)del.br_blockcount, 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ break;
+ } else {
+ /* Didn't do anything, push cursor back. */
+ xfs_iext_prev(ifp, &icur);
+ }
+next_extent:
+ if (!xfs_iext_get_extent(ifp, &icur, &got))
break;
}
@@ -653,7 +668,7 @@ xfs_reflink_cancel_cow_range(
/* Start a rolling transaction to remove the mappings */
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
- 0, 0, 0, &tp);
+ 0, 0, XFS_TRANS_NOFS, &tp);
if (error)
goto out;
@@ -698,7 +713,7 @@ xfs_reflink_end_cow(
int error;
unsigned int resblks;
xfs_filblks_t rlen;
- xfs_extnum_t idx;
+ struct xfs_iext_cursor icur;
trace_xfs_reflink_end_cow(ip, offset, count);
@@ -726,30 +741,29 @@ xfs_reflink_end_cow(
(unsigned int)(end_fsb - offset_fsb),
XFS_DATA_FORK);
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
- resblks, 0, 0, &tp);
+ resblks, 0, XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
if (error)
goto out;
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- /* If there is a hole at end_fsb - 1 go to the previous extent */
- if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
- got.br_startoff > end_fsb) {
- ASSERT(idx > 0);
- xfs_iext_get_extent(ifp, --idx, &got);
- }
+ /*
+ * In case of racing, overlapping AIO writes no COW extents might be
+ * left by the time I/O completes for the loser of the race. In that
+ * case we are done.
+ */
+ if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
+ goto out_cancel;
/* Walk backwards until we're out of the I/O range... */
while (got.br_startoff + got.br_blockcount > offset_fsb) {
del = got;
xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
- /* Extent delete may have bumped idx forward */
- if (!del.br_blockcount) {
- idx--;
- goto next_extent;
- }
+ /* Extent delete may have bumped ext forward */
+ if (!del.br_blockcount)
+ goto prev_extent;
ASSERT(!isnullstartblock(got.br_startblock));
@@ -758,10 +772,8 @@ xfs_reflink_end_cow(
* speculatively preallocated CoW extents that have been
* allocated but have not yet been involved in a write.
*/
- if (got.br_state == XFS_EXT_UNWRITTEN) {
- idx--;
- goto next_extent;
- }
+ if (got.br_state == XFS_EXT_UNWRITTEN)
+ goto prev_extent;
/* Unmap the old blocks in the data fork. */
xfs_defer_init(&dfops, &firstfsb);
@@ -789,15 +801,22 @@ xfs_reflink_end_cow(
if (error)
goto out_defer;
+ /* Charge this new data fork mapping to the on-disk quota. */
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT,
+ (long)del.br_blockcount);
+
/* Remove the mapping from the CoW fork. */
- xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
+ xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
xfs_defer_ijoin(&dfops, ip);
error = xfs_defer_finish(&tp, &dfops);
if (error)
goto out_defer;
-next_extent:
- if (!xfs_iext_get_extent(ifp, idx, &got))
+ if (!xfs_iext_get_extent(ifp, &icur, &got))
+ break;
+ continue;
+prev_extent:
+ if (!xfs_iext_prev_extent(ifp, &icur, &got))
break;
}
@@ -809,6 +828,7 @@ next_extent:
out_defer:
xfs_defer_cancel(&dfops);
+out_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
@@ -937,7 +957,7 @@ xfs_reflink_set_inode_flag(
if (src->i_ino == dest->i_ino)
xfs_ilock(src, XFS_ILOCK_EXCL);
else
- xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL);
if (!xfs_is_reflink_inode(src)) {
trace_xfs_reflink_set_inode_flag(src);
@@ -1040,7 +1060,7 @@ xfs_reflink_ag_has_free_space(
return 0;
pag = xfs_perag_get(mp, agno);
- if (xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) ||
+ if (xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) ||
xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA))
error = -ENOSPC;
xfs_perag_put(pag);
@@ -1195,13 +1215,16 @@ xfs_reflink_remap_blocks(
/* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
while (len) {
+ uint lock_mode;
+
trace_xfs_reflink_remap_blocks_loop(src, srcoff, len,
dest, destoff);
+
/* Read extent from the source file */
nimaps = 1;
- xfs_ilock(src, XFS_ILOCK_EXCL);
+ lock_mode = xfs_ilock_data_map_shared(src);
error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
- xfs_iunlock(src, XFS_ILOCK_EXCL);
+ xfs_iunlock(src, lock_mode);
if (error)
goto err;
ASSERT(nimaps == 1);
@@ -1238,6 +1261,50 @@ err:
}
/*
+ * Grab the exclusive iolock for a data copy from src to dest, making
+ * sure to abide vfs locking order (lowest pointer value goes first) and
+ * breaking the pnfs layout leases on dest before proceeding. The loop
+ * is needed because we cannot call the blocking break_layout() with the
+ * src iolock held, and therefore have to back out both locks.
+ */
+static int
+xfs_iolock_two_inodes_and_break_layout(
+ struct inode *src,
+ struct inode *dest)
+{
+ int error;
+
+retry:
+ if (src < dest) {
+ inode_lock_shared(src);
+ inode_lock_nested(dest, I_MUTEX_NONDIR2);
+ } else {
+ /* src >= dest */
+ inode_lock(dest);
+ }
+
+ error = break_layout(dest, false);
+ if (error == -EWOULDBLOCK) {
+ inode_unlock(dest);
+ if (src < dest)
+ inode_unlock_shared(src);
+ error = break_layout(dest, true);
+ if (error)
+ return error;
+ goto retry;
+ }
+ if (error) {
+ inode_unlock(dest);
+ if (src < dest)
+ inode_unlock_shared(src);
+ return error;
+ }
+ if (src > dest)
+ inode_lock_shared_nested(src, I_MUTEX_NONDIR2);
+ return 0;
+}
+
+/*
* Link a range of blocks from one file to another.
*/
int
@@ -1267,11 +1334,14 @@ xfs_reflink_remap_range(
return -EIO;
/* Lock both files against IO */
- lock_two_nondirectories(inode_in, inode_out);
+ ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
+ if (ret)
+ return ret;
if (same_inode)
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
else
- xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
+ xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest,
+ XFS_MMAPLOCK_EXCL);
/* Check file eligibility and prepare for block sharing. */
ret = -EINVAL;
@@ -1288,8 +1358,24 @@ xfs_reflink_remap_range(
if (ret <= 0)
goto out_unlock;
+ /* Attach dquots to dest inode before changing block map */
+ ret = xfs_qm_dqattach(dest, 0);
+ if (ret)
+ goto out_unlock;
+
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+ /*
+ * Clear out post-eof preallocations because we don't have page cache
+ * backing the delayed allocations and they'll never get freed on
+ * their own.
+ */
+ if (xfs_can_free_eofblocks(dest, true)) {
+ ret = xfs_free_eofblocks(dest);
+ if (ret)
+ goto out_unlock;
+ }
+
/* Set flags and remap blocks. */
ret = xfs_reflink_set_inode_flag(src, dest);
if (ret)
@@ -1323,10 +1409,12 @@ xfs_reflink_remap_range(
is_dedupe);
out_unlock:
- xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
+ xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
+ if (!same_inode)
+ xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
+ inode_unlock(inode_out);
if (!same_inode)
- xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
- unlock_two_nondirectories(inode_in, inode_out);
+ inode_unlock_shared(inode_in);
if (ret)
trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
return ret;
@@ -1426,7 +1514,7 @@ xfs_reflink_inode_has_shared_extents(
xfs_extlen_t aglen;
xfs_agblock_t rbno;
xfs_extlen_t rlen;
- xfs_extnum_t idx;
+ struct xfs_iext_cursor icur;
bool found;
int error;
@@ -1438,7 +1526,7 @@ xfs_reflink_inode_has_shared_extents(
}
*has_shared = false;
- found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got);
+ found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got);
while (found) {
if (isnullstartblock(got.br_startblock) ||
got.br_state != XFS_EXT_NORM)
@@ -1457,7 +1545,7 @@ xfs_reflink_inode_has_shared_extents(
return 0;
}
next:
- found = xfs_iext_get_extent(ifp, ++idx, &got);
+ found = xfs_iext_next_extent(ifp, &icur, &got);
}
return 0;