diff options
Diffstat (limited to 'fs/xfs/xfs_bmap_util.c')
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 240 |
1 files changed, 137 insertions, 103 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 19e11d1da660..06ca11731e43 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -29,6 +29,8 @@ #include "xfs_iomap.h" #include "xfs_reflink.h" #include "xfs_rtbitmap.h" +#include "xfs_rtgroup.h" +#include "xfs_zone_alloc.h" /* Kernel only BMAP related definitions and functions */ @@ -41,16 +43,12 @@ xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) { if (XFS_IS_REALTIME_INODE(ip)) - return XFS_FSB_TO_BB(ip->i_mount, fsb); + return xfs_rtb_to_daddr(ip->i_mount, fsb); return XFS_FSB_TO_DADDR(ip->i_mount, fsb); } /* * Routine to zero an extent on disk allocated to the specific inode. - * - * The VFS functions take a linearised filesystem block offset, so we have to - * convert the sparse xfs fsb to the right format first. - * VFS types are real funky, too. */ int xfs_zero_extent( @@ -58,15 +56,10 @@ xfs_zero_extent( xfs_fsblock_t start_fsb, xfs_off_t count_fsb) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_buftarg *target = xfs_inode_buftarg(ip); - xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); - sector_t block = XFS_BB_TO_FSBT(mp, sector); - - return blkdev_issue_zeroout(target->bt_bdev, - block << (mp->m_super->s_blocksize_bits - 9), - count_fsb << (mp->m_super->s_blocksize_bits - 9), - GFP_KERNEL, 0); + return blkdev_issue_zeroout(xfs_inode_buftarg(ip)->bt_bdev, + xfs_fsb_to_db(ip, start_fsb), + XFS_FSB_TO_BB(ip->i_mount, count_fsb), + GFP_KERNEL, 0); } /* @@ -111,7 +104,7 @@ xfs_bmap_count_blocks( struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); struct xfs_btree_cur *cur; - xfs_extlen_t btblocks = 0; + xfs_filblks_t btblocks = 0; int error; *nextents = 0; @@ -331,8 +324,7 @@ xfs_getbmap( } if (xfs_get_extsz_hint(ip) || - (ip->i_diflags & - (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) + (ip->i_diflags & XFS_DIFLAG_PREALLOC)) max_len = mp->m_super->s_maxbytes; else max_len = XFS_ISIZE(ip); @@ -440,19 +432,20 @@ out_unlock_iolock: * if the ranges only partially overlap them, so it is up to the caller to * ensure that partial blocks are not passed in. */ -int +void xfs_bmap_punch_delalloc_range( struct xfs_inode *ip, + int whichfork, xfs_off_t start_byte, - xfs_off_t end_byte) + xfs_off_t end_byte, + struct xfs_zone_alloc_ctx *ac) { struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = &ip->i_df; + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; - int error = 0; ASSERT(!xfs_need_iread_extents(ifp)); @@ -476,33 +469,46 @@ xfs_bmap_punch_delalloc_range( continue; } - error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, - &got, &del); - if (error || !xfs_iext_get_extent(ifp, &icur, &got)) + if (xfs_is_zoned_inode(ip) && ac) { + /* + * In a zoned buffered write context we need to return + * the punched delalloc allocations to the allocation + * context. This allows reusing them in the following + * iomap iterations. + */ + xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, + &del, XFS_BMAPI_REMAP); + ac->reserved_blocks += del.br_blockcount; + } else { + xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, + &del, 0); + } + + if (!xfs_iext_get_extent(ifp, &icur, &got)) break; } + if (whichfork == XFS_COW_FORK && !ifp->if_bytes) + xfs_inode_clear_cowblocks_tag(ip); + out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; } /* * Test whether it is appropriate to check an inode for and free post EOF - * blocks. The 'force' parameter determines whether we should also consider - * regular files that are marked preallocated or append-only. + * blocks. */ bool xfs_can_free_eofblocks( - struct xfs_inode *ip, - bool force) + struct xfs_inode *ip) { - struct xfs_bmbt_irec imap; struct xfs_mount *mp = ip->i_mount; + bool found_blocks = false; xfs_fileoff_t end_fsb; xfs_fileoff_t last_fsb; - int nimaps = 1; - int error; + struct xfs_bmbt_irec imap; + struct xfs_iext_cursor icur; /* * Caller must either hold the exclusive io lock; or be inactivating @@ -529,12 +535,11 @@ xfs_can_free_eofblocks( return false; /* - * Do not free real preallocated or append-only files unless the file - * has delalloc blocks and we are forced to remove them. + * Do not free real extents in preallocated files unless the file has + * delalloc blocks and we are forced to remove them. */ - if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) - if (!force || ip->i_delayed_blks == 0) - return false; + if ((ip->i_diflags & XFS_DIFLAG_PREALLOC) && !ip->i_delayed_blks) + return false; /* * Do not try to free post-EOF blocks if EOF is beyond the end of the @@ -542,28 +547,24 @@ xfs_can_free_eofblocks( * forever. */ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); - if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) - end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb); + if (xfs_inode_has_bigrtalloc(ip)) + end_fsb = xfs_fileoff_roundup_rtx(mp, end_fsb); last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); if (last_fsb <= end_fsb) return false; /* - * Look up the mapping for the first block past EOF. If we can't find - * it, there's nothing to free. + * Check if there is an post-EOF extent to free. If there are any + * delalloc blocks attached to the inode (data fork delalloc + * reservations or CoW extents of any kind), we need to free them so + * that inactivation doesn't fail to erase them. */ xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmapi_read(ip, end_fsb, last_fsb - end_fsb, &imap, &nimaps, - 0); + if (ip->i_delayed_blks || + xfs_iext_lookup_extent(ip, &ip->i_df, end_fsb, &icur, &imap)) + found_blocks = true; xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (error || nimaps == 0) - return false; - - /* - * If there's a real mapping there or there are delayed allocation - * reservations, then we have post-EOF blocks to try to free. - */ - return imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks; + return found_blocks; } /* @@ -587,6 +588,22 @@ xfs_free_eofblocks( /* Wait on dio to ensure i_size has settled. */ inode_dio_wait(VFS_I(ip)); + /* + * For preallocated files only free delayed allocations. + * + * Note that this means we also leave speculative preallocations in + * place for preallocated files. + */ + if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) { + if (ip->i_delayed_blks) { + xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, + round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize), + LLONG_MAX, NULL); + } + xfs_inode_clear_eofblocks_tag(ip); + return 0; + } + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { ASSERT(xfs_is_shutdown(mp)); @@ -642,6 +659,9 @@ xfs_alloc_file_space( xfs_bmbt_irec_t imaps[1], *imapp; int error; + if (xfs_is_always_cow_inode(ip)) + return 0; + trace_xfs_alloc_file_space(ip); if (xfs_is_shutdown(mp)) @@ -713,41 +733,37 @@ xfs_alloc_file_space( if (error) break; - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_ADD_NOSPLIT_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_ADD_NOSPLIT_CNT); - if (error) - goto error; - - error = xfs_bmapi_write(tp, ip, startoffset_fsb, - allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp, - &nimaps); if (error) goto error; - ip->i_diflags |= XFS_DIFLAG_PREALLOC; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - error = xfs_trans_commit(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (error) - break; - /* * If the allocator cannot find a single free extent large * enough to cover the start block of the requested range, - * xfs_bmapi_write will return 0 but leave *nimaps set to 0. + * xfs_bmapi_write will return -ENOSR. * * In that case we simply need to keep looping with the same * startoffset_fsb so that one of the following allocations * will eventually reach the requested range. */ - if (nimaps) { + error = xfs_bmapi_write(tp, ip, startoffset_fsb, + allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp, + &nimaps); + if (error) { + if (error != -ENOSR) + goto error; + error = 0; + } else { startoffset_fsb += imapp->br_blockcount; allocatesize_fsb -= imapp->br_blockcount; } + + ip->i_diflags |= XFS_DIFLAG_PREALLOC; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); } return error; @@ -775,10 +791,8 @@ xfs_unmap_extent( if (error) return error; - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_PUNCH_HOLE_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT); if (error) goto out_trans_cancel; @@ -803,14 +817,18 @@ xfs_flush_unmap_range( xfs_off_t offset, xfs_off_t len) { - struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); xfs_off_t rounding, start, end; int error; - rounding = max_t(xfs_off_t, mp->m_sb.sb_blocksize, PAGE_SIZE); - start = round_down(offset, rounding); - end = round_up(offset + len, rounding) - 1; + /* + * Make sure we extend the flush out to extent alignment + * boundaries so any extent range overlapping the start/end + * of the modification we are about to do is clean and idle. + */ + rounding = max_t(xfs_off_t, xfs_inode_alloc_unitsize(ip), PAGE_SIZE); + start = rounddown_64(offset, rounding); + end = roundup_64(offset + len, rounding) - 1; error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) @@ -823,7 +841,8 @@ int xfs_free_file_space( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len) + xfs_off_t len, + struct xfs_zone_alloc_ctx *ac) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t startoffset_fsb; @@ -839,13 +858,21 @@ xfs_free_file_space( if (len <= 0) /* if nothing being freed */ return 0; + /* + * Now AIO and DIO has drained we flush and (if necessary) invalidate + * the cached range over the first operation we are about to run. + */ + error = xfs_flush_unmap_range(ip, offset, len); + if (error) + return error; + startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); /* We can only free complete realtime extents. */ - if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) { - startoffset_fsb = xfs_rtb_roundup_rtx(mp, startoffset_fsb); - endoffset_fsb = xfs_rtb_rounddown_rtx(mp, endoffset_fsb); + if (xfs_inode_has_bigrtalloc(ip)) { + startoffset_fsb = xfs_fileoff_roundup_rtx(mp, startoffset_fsb); + endoffset_fsb = xfs_fileoff_rounddown_rtx(mp, endoffset_fsb); } /* @@ -870,7 +897,7 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - error = xfs_zero_range(ip, offset, len, NULL); + error = xfs_zero_range(ip, offset, len, ac, NULL); if (error) return error; @@ -893,14 +920,14 @@ xfs_prepare_shift( struct xfs_inode *ip, loff_t offset) { - struct xfs_mount *mp = ip->i_mount; + unsigned int rounding; int error; /* * Trim eofblocks to avoid shifting uninitialized post-eof preallocation * into the accessible region of the file. */ - if (xfs_can_free_eofblocks(ip, true)) { + if (xfs_can_free_eofblocks(ip)) { error = xfs_free_eofblocks(ip); if (error) return error; @@ -911,11 +938,13 @@ xfs_prepare_shift( * with the full range of the operation. If we don't, a COW writeback * completion could race with an insert, front merge with the start * extent (after split) during the shift and corrupt the file. Start - * with the block just prior to the start to stabilize the boundary. + * with the allocation unit just prior to the start to stabilize the + * boundary. */ - offset = round_down(offset, mp->m_sb.sb_blocksize); + rounding = xfs_inode_alloc_unitsize(ip); + offset = rounddown_64(offset, rounding); if (offset) - offset -= mp->m_sb.sb_blocksize; + offset -= rounding; /* * Writeback and invalidate cache for the remainder of the file as we're @@ -956,7 +985,8 @@ int xfs_collapse_file_space( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len) + xfs_off_t len, + struct xfs_zone_alloc_ctx *ac) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; @@ -969,7 +999,7 @@ xfs_collapse_file_space( trace_xfs_collapse_file_space(ip); - error = xfs_free_file_space(ip, offset, len); + error = xfs_free_file_space(ip, offset, len, ac); if (error) return error; @@ -1054,10 +1084,8 @@ xfs_insert_file_space( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_PUNCH_HOLE_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT); if (error) goto out_trans_cancel; @@ -1175,7 +1203,7 @@ xfs_swap_extents_check_format( */ if (tifp->if_format == XFS_DINODE_FMT_BTREE) { if (xfs_inode_has_attr_fork(ip) && - XFS_BMAP_BMDR_SPACE(tifp->if_broot) > xfs_inode_fork_boff(ip)) + xfs_bmap_bmdr_space(tifp->if_broot) > xfs_inode_fork_boff(ip)) return -EINVAL; if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) return -EINVAL; @@ -1184,7 +1212,7 @@ xfs_swap_extents_check_format( /* Reciprocal target->temp btree format checks */ if (ifp->if_format == XFS_DINODE_FMT_BTREE) { if (xfs_inode_has_attr_fork(tip) && - XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > xfs_inode_fork_boff(tip)) + xfs_bmap_bmdr_space(ip->i_df.if_broot) > xfs_inode_fork_boff(tip)) return -EINVAL; if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) return -EINVAL; @@ -1283,23 +1311,17 @@ xfs_swap_extent_rmap( trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); if (xfs_bmap_is_real_extent(&uirec)) { - error = xfs_iext_count_may_overflow(ip, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_SWAP_RMAP_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_SWAP_RMAP_CNT); if (error) goto out; } if (xfs_bmap_is_real_extent(&irec)) { - error = xfs_iext_count_may_overflow(tip, + error = xfs_iext_count_extend(tp, tip, XFS_DATA_FORK, XFS_IEXT_SWAP_RMAP_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_SWAP_RMAP_CNT); if (error) goto out; } @@ -1519,6 +1541,18 @@ xfs_swap_extents( goto out_unlock; } + /* + * The rmapbt implementation is unable to resume a swapext operation + * after a crash if the allocation unit size is larger than a block. + * This (deprecated) interface will not be upgraded to handle this + * situation. Defragmentation must be performed with the commit range + * ioctl. + */ + if (XFS_IS_REALTIME_INODE(ip) && xfs_has_rtgroups(ip->i_mount)) { + error = -EOPNOTSUPP; + goto out_unlock; + } + error = xfs_qm_dqattach(ip); if (error) goto out_unlock; |