summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_bmap_util.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_bmap_util.c')
-rw-r--r--fs/xfs/xfs_bmap_util.c240
1 files changed, 137 insertions, 103 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 19e11d1da660..06ca11731e43 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -29,6 +29,8 @@
#include "xfs_iomap.h"
#include "xfs_reflink.h"
#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
+#include "xfs_zone_alloc.h"
/* Kernel only BMAP related definitions and functions */
@@ -41,16 +43,12 @@ xfs_daddr_t
xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
{
if (XFS_IS_REALTIME_INODE(ip))
- return XFS_FSB_TO_BB(ip->i_mount, fsb);
+ return xfs_rtb_to_daddr(ip->i_mount, fsb);
return XFS_FSB_TO_DADDR(ip->i_mount, fsb);
}
/*
* Routine to zero an extent on disk allocated to the specific inode.
- *
- * The VFS functions take a linearised filesystem block offset, so we have to
- * convert the sparse xfs fsb to the right format first.
- * VFS types are real funky, too.
*/
int
xfs_zero_extent(
@@ -58,15 +56,10 @@ xfs_zero_extent(
xfs_fsblock_t start_fsb,
xfs_off_t count_fsb)
{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_buftarg *target = xfs_inode_buftarg(ip);
- xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
- sector_t block = XFS_BB_TO_FSBT(mp, sector);
-
- return blkdev_issue_zeroout(target->bt_bdev,
- block << (mp->m_super->s_blocksize_bits - 9),
- count_fsb << (mp->m_super->s_blocksize_bits - 9),
- GFP_KERNEL, 0);
+ return blkdev_issue_zeroout(xfs_inode_buftarg(ip)->bt_bdev,
+ xfs_fsb_to_db(ip, start_fsb),
+ XFS_FSB_TO_BB(ip->i_mount, count_fsb),
+ GFP_KERNEL, 0);
}
/*
@@ -111,7 +104,7 @@ xfs_bmap_count_blocks(
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
struct xfs_btree_cur *cur;
- xfs_extlen_t btblocks = 0;
+ xfs_filblks_t btblocks = 0;
int error;
*nextents = 0;
@@ -331,8 +324,7 @@ xfs_getbmap(
}
if (xfs_get_extsz_hint(ip) ||
- (ip->i_diflags &
- (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
+ (ip->i_diflags & XFS_DIFLAG_PREALLOC))
max_len = mp->m_super->s_maxbytes;
else
max_len = XFS_ISIZE(ip);
@@ -440,19 +432,20 @@ out_unlock_iolock:
* if the ranges only partially overlap them, so it is up to the caller to
* ensure that partial blocks are not passed in.
*/
-int
+void
xfs_bmap_punch_delalloc_range(
struct xfs_inode *ip,
+ int whichfork,
xfs_off_t start_byte,
- xfs_off_t end_byte)
+ xfs_off_t end_byte,
+ struct xfs_zone_alloc_ctx *ac)
{
struct xfs_mount *mp = ip->i_mount;
- struct xfs_ifork *ifp = &ip->i_df;
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
struct xfs_bmbt_irec got, del;
struct xfs_iext_cursor icur;
- int error = 0;
ASSERT(!xfs_need_iread_extents(ifp));
@@ -476,33 +469,46 @@ xfs_bmap_punch_delalloc_range(
continue;
}
- error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur,
- &got, &del);
- if (error || !xfs_iext_get_extent(ifp, &icur, &got))
+ if (xfs_is_zoned_inode(ip) && ac) {
+ /*
+ * In a zoned buffered write context we need to return
+ * the punched delalloc allocations to the allocation
+ * context. This allows reusing them in the following
+ * iomap iterations.
+ */
+ xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got,
+ &del, XFS_BMAPI_REMAP);
+ ac->reserved_blocks += del.br_blockcount;
+ } else {
+ xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got,
+ &del, 0);
+ }
+
+ if (!xfs_iext_get_extent(ifp, &icur, &got))
break;
}
+ if (whichfork == XFS_COW_FORK && !ifp->if_bytes)
+ xfs_inode_clear_cowblocks_tag(ip);
+
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
}
/*
* Test whether it is appropriate to check an inode for and free post EOF
- * blocks. The 'force' parameter determines whether we should also consider
- * regular files that are marked preallocated or append-only.
+ * blocks.
*/
bool
xfs_can_free_eofblocks(
- struct xfs_inode *ip,
- bool force)
+ struct xfs_inode *ip)
{
- struct xfs_bmbt_irec imap;
struct xfs_mount *mp = ip->i_mount;
+ bool found_blocks = false;
xfs_fileoff_t end_fsb;
xfs_fileoff_t last_fsb;
- int nimaps = 1;
- int error;
+ struct xfs_bmbt_irec imap;
+ struct xfs_iext_cursor icur;
/*
* Caller must either hold the exclusive io lock; or be inactivating
@@ -529,12 +535,11 @@ xfs_can_free_eofblocks(
return false;
/*
- * Do not free real preallocated or append-only files unless the file
- * has delalloc blocks and we are forced to remove them.
+ * Do not free real extents in preallocated files unless the file has
+ * delalloc blocks and we are forced to remove them.
*/
- if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
- if (!force || ip->i_delayed_blks == 0)
- return false;
+ if ((ip->i_diflags & XFS_DIFLAG_PREALLOC) && !ip->i_delayed_blks)
+ return false;
/*
* Do not try to free post-EOF blocks if EOF is beyond the end of the
@@ -542,28 +547,24 @@ xfs_can_free_eofblocks(
* forever.
*/
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
- if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1)
- end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb);
+ if (xfs_inode_has_bigrtalloc(ip))
+ end_fsb = xfs_fileoff_roundup_rtx(mp, end_fsb);
last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (last_fsb <= end_fsb)
return false;
/*
- * Look up the mapping for the first block past EOF. If we can't find
- * it, there's nothing to free.
+ * Check if there is an post-EOF extent to free. If there are any
+ * delalloc blocks attached to the inode (data fork delalloc
+ * reservations or CoW extents of any kind), we need to free them so
+ * that inactivation doesn't fail to erase them.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
- error = xfs_bmapi_read(ip, end_fsb, last_fsb - end_fsb, &imap, &nimaps,
- 0);
+ if (ip->i_delayed_blks ||
+ xfs_iext_lookup_extent(ip, &ip->i_df, end_fsb, &icur, &imap))
+ found_blocks = true;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- if (error || nimaps == 0)
- return false;
-
- /*
- * If there's a real mapping there or there are delayed allocation
- * reservations, then we have post-EOF blocks to try to free.
- */
- return imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks;
+ return found_blocks;
}
/*
@@ -587,6 +588,22 @@ xfs_free_eofblocks(
/* Wait on dio to ensure i_size has settled. */
inode_dio_wait(VFS_I(ip));
+ /*
+ * For preallocated files only free delayed allocations.
+ *
+ * Note that this means we also leave speculative preallocations in
+ * place for preallocated files.
+ */
+ if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
+ if (ip->i_delayed_blks) {
+ xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK,
+ round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
+ LLONG_MAX, NULL);
+ }
+ xfs_inode_clear_eofblocks_tag(ip);
+ return 0;
+ }
+
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
if (error) {
ASSERT(xfs_is_shutdown(mp));
@@ -642,6 +659,9 @@ xfs_alloc_file_space(
xfs_bmbt_irec_t imaps[1], *imapp;
int error;
+ if (xfs_is_always_cow_inode(ip))
+ return 0;
+
trace_xfs_alloc_file_space(ip);
if (xfs_is_shutdown(mp))
@@ -713,41 +733,37 @@ xfs_alloc_file_space(
if (error)
break;
- error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
+ error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
XFS_IEXT_ADD_NOSPLIT_CNT);
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(tp, ip,
- XFS_IEXT_ADD_NOSPLIT_CNT);
- if (error)
- goto error;
-
- error = xfs_bmapi_write(tp, ip, startoffset_fsb,
- allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
- &nimaps);
if (error)
goto error;
- ip->i_diflags |= XFS_DIFLAG_PREALLOC;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- error = xfs_trans_commit(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (error)
- break;
-
/*
* If the allocator cannot find a single free extent large
* enough to cover the start block of the requested range,
- * xfs_bmapi_write will return 0 but leave *nimaps set to 0.
+ * xfs_bmapi_write will return -ENOSR.
*
* In that case we simply need to keep looping with the same
* startoffset_fsb so that one of the following allocations
* will eventually reach the requested range.
*/
- if (nimaps) {
+ error = xfs_bmapi_write(tp, ip, startoffset_fsb,
+ allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
+ &nimaps);
+ if (error) {
+ if (error != -ENOSR)
+ goto error;
+ error = 0;
+ } else {
startoffset_fsb += imapp->br_blockcount;
allocatesize_fsb -= imapp->br_blockcount;
}
+
+ ip->i_diflags |= XFS_DIFLAG_PREALLOC;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
return error;
@@ -775,10 +791,8 @@ xfs_unmap_extent(
if (error)
return error;
- error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
+ error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
XFS_IEXT_PUNCH_HOLE_CNT);
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT);
if (error)
goto out_trans_cancel;
@@ -803,14 +817,18 @@ xfs_flush_unmap_range(
xfs_off_t offset,
xfs_off_t len)
{
- struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
xfs_off_t rounding, start, end;
int error;
- rounding = max_t(xfs_off_t, mp->m_sb.sb_blocksize, PAGE_SIZE);
- start = round_down(offset, rounding);
- end = round_up(offset + len, rounding) - 1;
+ /*
+ * Make sure we extend the flush out to extent alignment
+ * boundaries so any extent range overlapping the start/end
+ * of the modification we are about to do is clean and idle.
+ */
+ rounding = max_t(xfs_off_t, xfs_inode_alloc_unitsize(ip), PAGE_SIZE);
+ start = rounddown_64(offset, rounding);
+ end = roundup_64(offset + len, rounding) - 1;
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (error)
@@ -823,7 +841,8 @@ int
xfs_free_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t len)
+ xfs_off_t len,
+ struct xfs_zone_alloc_ctx *ac)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t startoffset_fsb;
@@ -839,13 +858,21 @@ xfs_free_file_space(
if (len <= 0) /* if nothing being freed */
return 0;
+ /*
+ * Now AIO and DIO has drained we flush and (if necessary) invalidate
+ * the cached range over the first operation we are about to run.
+ */
+ error = xfs_flush_unmap_range(ip, offset, len);
+ if (error)
+ return error;
+
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
/* We can only free complete realtime extents. */
- if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) {
- startoffset_fsb = xfs_rtb_roundup_rtx(mp, startoffset_fsb);
- endoffset_fsb = xfs_rtb_rounddown_rtx(mp, endoffset_fsb);
+ if (xfs_inode_has_bigrtalloc(ip)) {
+ startoffset_fsb = xfs_fileoff_roundup_rtx(mp, startoffset_fsb);
+ endoffset_fsb = xfs_fileoff_rounddown_rtx(mp, endoffset_fsb);
}
/*
@@ -870,7 +897,7 @@ xfs_free_file_space(
return 0;
if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset;
- error = xfs_zero_range(ip, offset, len, NULL);
+ error = xfs_zero_range(ip, offset, len, ac, NULL);
if (error)
return error;
@@ -893,14 +920,14 @@ xfs_prepare_shift(
struct xfs_inode *ip,
loff_t offset)
{
- struct xfs_mount *mp = ip->i_mount;
+ unsigned int rounding;
int error;
/*
* Trim eofblocks to avoid shifting uninitialized post-eof preallocation
* into the accessible region of the file.
*/
- if (xfs_can_free_eofblocks(ip, true)) {
+ if (xfs_can_free_eofblocks(ip)) {
error = xfs_free_eofblocks(ip);
if (error)
return error;
@@ -911,11 +938,13 @@ xfs_prepare_shift(
* with the full range of the operation. If we don't, a COW writeback
* completion could race with an insert, front merge with the start
* extent (after split) during the shift and corrupt the file. Start
- * with the block just prior to the start to stabilize the boundary.
+ * with the allocation unit just prior to the start to stabilize the
+ * boundary.
*/
- offset = round_down(offset, mp->m_sb.sb_blocksize);
+ rounding = xfs_inode_alloc_unitsize(ip);
+ offset = rounddown_64(offset, rounding);
if (offset)
- offset -= mp->m_sb.sb_blocksize;
+ offset -= rounding;
/*
* Writeback and invalidate cache for the remainder of the file as we're
@@ -956,7 +985,8 @@ int
xfs_collapse_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t len)
+ xfs_off_t len,
+ struct xfs_zone_alloc_ctx *ac)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
@@ -969,7 +999,7 @@ xfs_collapse_file_space(
trace_xfs_collapse_file_space(ip);
- error = xfs_free_file_space(ip, offset, len);
+ error = xfs_free_file_space(ip, offset, len, ac);
if (error)
return error;
@@ -1054,10 +1084,8 @@ xfs_insert_file_space(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
+ error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
XFS_IEXT_PUNCH_HOLE_CNT);
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT);
if (error)
goto out_trans_cancel;
@@ -1175,7 +1203,7 @@ xfs_swap_extents_check_format(
*/
if (tifp->if_format == XFS_DINODE_FMT_BTREE) {
if (xfs_inode_has_attr_fork(ip) &&
- XFS_BMAP_BMDR_SPACE(tifp->if_broot) > xfs_inode_fork_boff(ip))
+ xfs_bmap_bmdr_space(tifp->if_broot) > xfs_inode_fork_boff(ip))
return -EINVAL;
if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
return -EINVAL;
@@ -1184,7 +1212,7 @@ xfs_swap_extents_check_format(
/* Reciprocal target->temp btree format checks */
if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
if (xfs_inode_has_attr_fork(tip) &&
- XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > xfs_inode_fork_boff(tip))
+ xfs_bmap_bmdr_space(ip->i_df.if_broot) > xfs_inode_fork_boff(tip))
return -EINVAL;
if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
return -EINVAL;
@@ -1283,23 +1311,17 @@ xfs_swap_extent_rmap(
trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
if (xfs_bmap_is_real_extent(&uirec)) {
- error = xfs_iext_count_may_overflow(ip,
+ error = xfs_iext_count_extend(tp, ip,
XFS_DATA_FORK,
XFS_IEXT_SWAP_RMAP_CNT);
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(tp, ip,
- XFS_IEXT_SWAP_RMAP_CNT);
if (error)
goto out;
}
if (xfs_bmap_is_real_extent(&irec)) {
- error = xfs_iext_count_may_overflow(tip,
+ error = xfs_iext_count_extend(tp, tip,
XFS_DATA_FORK,
XFS_IEXT_SWAP_RMAP_CNT);
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(tp, ip,
- XFS_IEXT_SWAP_RMAP_CNT);
if (error)
goto out;
}
@@ -1519,6 +1541,18 @@ xfs_swap_extents(
goto out_unlock;
}
+ /*
+ * The rmapbt implementation is unable to resume a swapext operation
+ * after a crash if the allocation unit size is larger than a block.
+ * This (deprecated) interface will not be upgraded to handle this
+ * situation. Defragmentation must be performed with the commit range
+ * ioctl.
+ */
+ if (XFS_IS_REALTIME_INODE(ip) && xfs_has_rtgroups(ip->i_mount)) {
+ error = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
error = xfs_qm_dqattach(ip);
if (error)
goto out_unlock;