diff options
Diffstat (limited to 'fs/xfs/libxfs/xfs_bmap.c')
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 1056 |
1 files changed, 464 insertions, 592 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 656c95a22f2e..d954f9b8071f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -34,11 +34,13 @@ #include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_refcount.h" -#include "xfs_icache.h" #include "xfs_iomap.h" #include "xfs_health.h" #include "xfs_bmap_item.h" #include "xfs_symlink_remote.h" +#include "xfs_inode_util.h" +#include "xfs_rtgroup.h" +#include "xfs_zone_alloc.h" struct kmem_cache *xfs_bmap_intent_cache; @@ -78,9 +80,9 @@ xfs_bmap_compute_maxlevels( maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp), whichfork); if (whichfork == XFS_DATA_FORK) - sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); + sz = xfs_bmdr_space_calc(MINDBTPTRS); else - sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); + sz = xfs_bmdr_space_calc(MINABTPTRS); maxrootrecs = xfs_bmdr_maxrecs(sz, 0); minleafrecs = mp->m_bmap_dmnr[0]; @@ -101,8 +103,8 @@ xfs_bmap_compute_attr_offset( struct xfs_mount *mp) { if (mp->m_sb.sb_inodesize == 256) - return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); - return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); + return XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS); + return xfs_bmdr_space_calc(6 * MINABTPTRS); } STATIC int /* error */ @@ -169,18 +171,16 @@ xfs_bmbt_update( * Compute the worst-case number of indirect blocks that will be used * for ip's delayed extent of length "len". */ -STATIC xfs_filblks_t +xfs_filblks_t xfs_bmap_worst_indlen( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_filblks_t len) /* delayed extent length */ + struct xfs_inode *ip, /* incore inode pointer */ + xfs_filblks_t len) /* delayed extent length */ { - int level; /* btree level number */ - int maxrecs; /* maximum record count at this level */ - xfs_mount_t *mp; /* mount structure */ - xfs_filblks_t rval; /* return value */ + struct xfs_mount *mp = ip->i_mount; + int maxrecs = mp->m_bmap_dmxr[0]; + int level; + xfs_filblks_t rval; - mp = ip->i_mount; - maxrecs = mp->m_bmap_dmxr[0]; for (level = 0, rval = 0; level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); level++) { @@ -297,7 +297,7 @@ xfs_check_block( prevp = NULL; for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { dmxr = mp->m_bmap_dmxr[0]; - keyp = XFS_BMBT_KEY_ADDR(mp, block, i); + keyp = xfs_bmbt_key_addr(mp, block, i); if (prevp) { ASSERT(be64_to_cpu(prevp->br_startoff) < @@ -309,15 +309,15 @@ xfs_check_block( * Compare the block numbers to see if there are dups. */ if (root) - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); + pp = xfs_bmap_broot_ptr_addr(mp, block, i, sz); else - pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); + pp = xfs_bmbt_ptr_addr(mp, block, i, dmxr); for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { if (root) - thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); + thispa = xfs_bmap_broot_ptr_addr(mp, block, j, sz); else - thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); + thispa = xfs_bmbt_ptr_addr(mp, block, j, dmxr); if (*thispa == *pp) { xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld", __func__, j, i, @@ -372,7 +372,7 @@ xfs_bmap_check_leaf_extents( level = be16_to_cpu(block->bb_level); ASSERT(level > 0); xfs_check_block(block, mp, 1, ifp->if_broot_bytes); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + pp = xfs_bmap_broot_ptr_addr(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLFSBLOCK); @@ -405,7 +405,7 @@ xfs_bmap_check_leaf_extents( */ xfs_check_block(block, mp, 0, 0); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); + pp = xfs_bmbt_ptr_addr(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) { xfs_btree_mark_sick(cur); @@ -445,14 +445,14 @@ xfs_bmap_check_leaf_extents( * conform with the first entry in this one. */ - ep = XFS_BMBT_REC_ADDR(mp, block, 1); + ep = xfs_bmbt_rec_addr(mp, block, 1); if (i) { ASSERT(xfs_bmbt_disk_get_startoff(&last) + xfs_bmbt_disk_get_blockcount(&last) <= xfs_bmbt_disk_get_startoff(ep)); } for (j = 1; j < num_recs; j++) { - nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); + nextp = xfs_bmbt_rec_addr(mp, block, j + 1); ASSERT(xfs_bmbt_disk_get_startoff(ep) + xfs_bmbt_disk_get_blockcount(ep) <= xfs_bmbt_disk_get_startoff(nextp)); @@ -583,9 +583,9 @@ xfs_bmap_btree_to_extents( ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); ASSERT(be16_to_cpu(rblock->bb_level) == 1); ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); - ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); + ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false) == 1); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); + pp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, ifp->if_broot_bytes); cbno = be64_to_cpu(*pp); #ifdef DEBUG if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) { @@ -604,7 +604,7 @@ xfs_bmap_btree_to_extents( xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, - XFS_AG_RESV_NONE, false); + XFS_AG_RESV_NONE, 0); if (error) return error; @@ -613,7 +613,7 @@ xfs_bmap_btree_to_extents( xfs_trans_binval(tp, cbp); if (cur->bc_levels[0].bp == cbp) cur->bc_levels[0].bp = NULL; - xfs_iroot_realloc(ip, -1, whichfork); + xfs_bmap_broot_realloc(ip, whichfork, 0); ASSERT(ifp->if_broot == NULL); ifp->if_format = XFS_DINODE_FMT_EXTENTS; *logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork); @@ -657,12 +657,11 @@ xfs_bmap_extents_to_btree( * Make space in the inode incore. This needs to be undone if we fail * to expand the root. */ - xfs_iroot_realloc(ip, 1, whichfork); + block = xfs_bmap_broot_realloc(ip, whichfork, 1); /* * Fill in the root. */ - block = ifp->if_broot; xfs_bmbt_init_block(ip, block, NULL, 1, 1); /* * Need a cursor. Can't allocate until bb_level is filled in. @@ -713,7 +712,7 @@ xfs_bmap_extents_to_btree( for_each_xfs_iext(ifp, &icur, &rec) { if (isnullstartblock(rec.br_startblock)) continue; - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt); + arp = xfs_bmbt_rec_addr(mp, ablock, 1 + cnt); xfs_bmbt_disk_set_all(arp, &rec); cnt++; } @@ -723,10 +722,10 @@ xfs_bmap_extents_to_btree( /* * Fill in the root key and pointer. */ - kp = XFS_BMBT_KEY_ADDR(mp, block, 1); - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); + kp = xfs_bmbt_key_addr(mp, block, 1); + arp = xfs_bmbt_rec_addr(mp, ablock, 1); kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, + pp = xfs_bmbt_ptr_addr(mp, block, 1, xfs_bmbt_get_maxrecs(cur, be16_to_cpu(block->bb_level))); *pp = cpu_to_be64(args.fsbno); @@ -744,7 +743,7 @@ xfs_bmap_extents_to_btree( out_unreserve_dquot: xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); out_root_realloc: - xfs_iroot_realloc(ip, -1, whichfork); + xfs_bmap_broot_realloc(ip, whichfork, 0); ifp->if_format = XFS_DINODE_FMT_EXTENTS; ASSERT(ifp->if_broot == NULL); xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); @@ -779,7 +778,7 @@ xfs_bmap_local_to_extents_empty( } -STATIC int /* error */ +int /* error */ xfs_bmap_local_to_extents( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ @@ -789,7 +788,8 @@ xfs_bmap_local_to_extents( void (*init_fn)(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, - struct xfs_ifork *ifp)) + struct xfs_ifork *ifp, void *priv), + void *priv) { int error = 0; int flags; /* logging flags returned */ @@ -850,7 +850,7 @@ xfs_bmap_local_to_extents( * log here. Note that init_fn must also set the buffer log item type * correctly. */ - init_fn(tp, bp, ip, ifp); + init_fn(tp, bp, ip, ifp, priv); /* account for the change in fork size */ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); @@ -894,7 +894,7 @@ xfs_bmap_add_attrfork_btree( mp = ip->i_mount; - if (XFS_BMAP_BMDR_SPACE(block) <= xfs_inode_data_fork_size(ip)) + if (xfs_bmap_bmdr_space(block) <= xfs_inode_data_fork_size(ip)) *flags |= XFS_ILOG_DBROOT; else { cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); @@ -976,13 +976,14 @@ xfs_bmap_add_attrfork_local( dargs.total = dargs.geo->fsbcount; dargs.whichfork = XFS_DATA_FORK; dargs.trans = tp; + dargs.owner = ip->i_ino; return xfs_dir2_sf_to_block(&dargs); } if (S_ISLNK(VFS_I(ip)->i_mode)) return xfs_bmap_local_to_extents(tp, ip, 1, flags, - XFS_DATA_FORK, - xfs_symlink_local_to_remote); + XFS_DATA_FORK, xfs_symlink_local_to_remote, + NULL); /* should only be called for types that support local format data */ ASSERT(0); @@ -1023,40 +1024,30 @@ xfs_bmap_set_attrforkoff( } /* - * Convert inode from non-attributed to attributed. - * Must not be in a transaction, ip must not be locked. + * Convert inode from non-attributed to attributed. Caller must hold the + * ILOCK_EXCL and the file cannot have an attr fork. */ int /* error code */ xfs_bmap_add_attrfork( - xfs_inode_t *ip, /* incore inode pointer */ + struct xfs_trans *tp, + struct xfs_inode *ip, /* incore inode pointer */ int size, /* space new attribute needs */ int rsvd) /* xact may use reserved blks */ { - xfs_mount_t *mp; /* mount structure */ - xfs_trans_t *tp; /* transaction pointer */ - int blks; /* space reservation */ + struct xfs_mount *mp = tp->t_mountp; int version = 1; /* superblock attr version */ int logflags; /* logging flags */ int error; /* error return value */ - ASSERT(xfs_inode_has_attr_fork(ip) == 0); - - mp = ip->i_mount; - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); - - blks = XFS_ADDAFORK_SPACE_RES(mp); - - error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0, - rsvd, &tp); - if (error) - return error; - if (xfs_inode_has_attr_fork(ip)) - goto trans_cancel; + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); + if (!xfs_is_metadir_inode(ip)) + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + ASSERT(!xfs_inode_has_attr_fork(ip)); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_bmap_set_attrforkoff(ip, size, &version); if (error) - goto trans_cancel; + return error; xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); logflags = 0; @@ -1077,7 +1068,7 @@ xfs_bmap_add_attrfork( if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (error) - goto trans_cancel; + return error; if (!xfs_has_attr(mp) || (!xfs_has_attr2(mp) && version == 2)) { bool log_sb = false; @@ -1096,14 +1087,7 @@ xfs_bmap_add_attrfork( xfs_log_sb(tp); } - error = xfs_trans_commit(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; - -trans_cancel: - xfs_trans_cancel(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; + return 0; } /* @@ -1175,7 +1159,7 @@ xfs_iread_bmbt_block( } /* Copy records into the incore cache. */ - frp = XFS_BMBT_REC_ADDR(mp, block, 1); + frp = xfs_bmbt_rec_addr(mp, block, 1); for (j = 0; j < num_recs; j++, frp++, ir->loaded++) { struct xfs_bmbt_irec new; xfs_failaddr_t fa; @@ -1438,6 +1422,24 @@ xfs_bmap_last_offset( * Extent tree manipulation functions used during allocation. */ +static inline bool +xfs_bmap_same_rtgroup( + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *left, + struct xfs_bmbt_irec *right) +{ + struct xfs_mount *mp = ip->i_mount; + + if (xfs_ifork_is_realtime(ip, whichfork) && xfs_has_rtgroups(mp)) { + if (xfs_rtb_to_rgno(mp, left->br_startblock) != + xfs_rtb_to_rgno(mp, right->br_startblock)) + return false; + } + + return true; +} + /* * Convert a delayed allocation to a real allocation. */ @@ -1507,7 +1509,8 @@ xfs_bmap_add_extent_delay_real( LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && LEFT.br_state == new->br_state && - LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) + LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && + xfs_bmap_same_rtgroup(bma->ip, whichfork, &LEFT, new)) state |= BMAP_LEFT_CONTIG; /* @@ -1531,7 +1534,8 @@ xfs_bmap_add_extent_delay_real( (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING) || LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= XFS_MAX_BMBT_EXTLEN)) + <= XFS_MAX_BMBT_EXTLEN) && + xfs_bmap_same_rtgroup(bma->ip, whichfork, new, &RIGHT)) state |= BMAP_RIGHT_CONTIG; error = 0; @@ -1586,6 +1590,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: @@ -1616,6 +1621,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: @@ -1650,6 +1656,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: @@ -1684,6 +1691,7 @@ xfs_bmap_add_extent_delay_real( goto done; } } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: @@ -1722,6 +1730,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + ASSERT(da_new <= da_old); break; case BMAP_LEFT_FILLING: @@ -1812,6 +1821,7 @@ xfs_bmap_add_extent_delay_real( xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); xfs_iext_next(ifp, &bma->icur); xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT); + ASSERT(da_new <= da_old); break; case BMAP_RIGHT_FILLING: @@ -1861,6 +1871,7 @@ xfs_bmap_add_extent_delay_real( PREV.br_blockcount = temp; xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); xfs_iext_next(ifp, &bma->icur); + ASSERT(da_new <= da_old); break; case 0: @@ -1975,7 +1986,7 @@ xfs_bmap_add_extent_delay_real( } if (da_new != da_old) - xfs_mod_delalloc(mp, (int64_t)da_new - da_old); + xfs_mod_delalloc(bma->ip, 0, (int64_t)da_new - da_old); if (bma->cur) { da_new += bma->cur->bc_bmap.allocated; @@ -1983,11 +1994,10 @@ xfs_bmap_add_extent_delay_real( } /* adjust for changes in reserved delayed indirect blocks */ - if (da_new != da_old) { - ASSERT(state == 0 || da_new < da_old); - error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), - false); - } + if (da_new < da_old) + xfs_add_fdblocks(mp, da_old - da_new); + else if (da_new > da_old) + error = xfs_dec_fdblocks(mp, da_new - da_old, true); xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); done: @@ -2070,7 +2080,8 @@ xfs_bmap_add_extent_unwritten_real( LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && LEFT.br_state == new->br_state && - LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) + LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && + xfs_bmap_same_rtgroup(ip, whichfork, &LEFT, new)) state |= BMAP_LEFT_CONTIG; /* @@ -2094,7 +2105,8 @@ xfs_bmap_add_extent_unwritten_real( (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING) || LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= XFS_MAX_BMBT_EXTLEN)) + <= XFS_MAX_BMBT_EXTLEN) && + xfs_bmap_same_rtgroup(ip, whichfork, new, &RIGHT)) state |= BMAP_RIGHT_CONTIG; /* @@ -2558,146 +2570,6 @@ done: } /* - * Convert a hole to a delayed allocation. - */ -STATIC void -xfs_bmap_add_extent_hole_delay( - xfs_inode_t *ip, /* incore inode pointer */ - int whichfork, - struct xfs_iext_cursor *icur, - xfs_bmbt_irec_t *new) /* new data to add to file extents */ -{ - struct xfs_ifork *ifp; /* inode fork pointer */ - xfs_bmbt_irec_t left; /* left neighbor extent entry */ - xfs_filblks_t newlen=0; /* new indirect size */ - xfs_filblks_t oldlen=0; /* old indirect size */ - xfs_bmbt_irec_t right; /* right neighbor extent entry */ - uint32_t state = xfs_bmap_fork_to_state(whichfork); - xfs_filblks_t temp; /* temp for indirect calculations */ - - ifp = xfs_ifork_ptr(ip, whichfork); - ASSERT(isnullstartblock(new->br_startblock)); - - /* - * Check and set flags if this segment has a left neighbor - */ - if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { - state |= BMAP_LEFT_VALID; - if (isnullstartblock(left.br_startblock)) - state |= BMAP_LEFT_DELAY; - } - - /* - * Check and set flags if the current (right) segment exists. - * If it doesn't exist, we're converting the hole at end-of-file. - */ - if (xfs_iext_get_extent(ifp, icur, &right)) { - state |= BMAP_RIGHT_VALID; - if (isnullstartblock(right.br_startblock)) - state |= BMAP_RIGHT_DELAY; - } - - /* - * Set contiguity flags on the left and right neighbors. - * Don't let extents get too large, even if the pieces are contiguous. - */ - if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && - left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) - state |= BMAP_LEFT_CONTIG; - - if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && - new->br_startoff + new->br_blockcount == right.br_startoff && - new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && - (!(state & BMAP_LEFT_CONTIG) || - (left.br_blockcount + new->br_blockcount + - right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))) - state |= BMAP_RIGHT_CONTIG; - - /* - * Switch out based on the contiguity flags. - */ - switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with delayed allocations - * on the left and on the right. - * Merge all three into a single extent record. - */ - temp = left.br_blockcount + new->br_blockcount + - right.br_blockcount; - - oldlen = startblockval(left.br_startblock) + - startblockval(new->br_startblock) + - startblockval(right.br_startblock); - newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - oldlen); - left.br_startblock = nullstartblock(newlen); - left.br_blockcount = temp; - - xfs_iext_remove(ip, icur, state); - xfs_iext_prev(ifp, icur); - xfs_iext_update_extent(ip, state, icur, &left); - break; - - case BMAP_LEFT_CONTIG: - /* - * New allocation is contiguous with a delayed allocation - * on the left. - * Merge the new allocation with the left neighbor. - */ - temp = left.br_blockcount + new->br_blockcount; - - oldlen = startblockval(left.br_startblock) + - startblockval(new->br_startblock); - newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - oldlen); - left.br_blockcount = temp; - left.br_startblock = nullstartblock(newlen); - - xfs_iext_prev(ifp, icur); - xfs_iext_update_extent(ip, state, icur, &left); - break; - - case BMAP_RIGHT_CONTIG: - /* - * New allocation is contiguous with a delayed allocation - * on the right. - * Merge the new allocation with the right neighbor. - */ - temp = new->br_blockcount + right.br_blockcount; - oldlen = startblockval(new->br_startblock) + - startblockval(right.br_startblock); - newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - oldlen); - right.br_startoff = new->br_startoff; - right.br_startblock = nullstartblock(newlen); - right.br_blockcount = temp; - xfs_iext_update_extent(ip, state, icur, &right); - break; - - case 0: - /* - * New allocation is not contiguous with another - * delayed allocation. - * Insert a new entry. - */ - oldlen = newlen = 0; - xfs_iext_insert(ip, icur, new, state); - break; - } - if (oldlen != newlen) { - ASSERT(oldlen > newlen); - xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen), - false); - /* - * Nothing to do for disk quota accounting here. - */ - xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen); - } -} - -/* * Convert a hole to a real allocation. */ STATIC int /* error */ @@ -2754,7 +2626,8 @@ xfs_bmap_add_extent_hole_real( left.br_startoff + left.br_blockcount == new->br_startoff && left.br_startblock + left.br_blockcount == new->br_startblock && left.br_state == new->br_state && - left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) + left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && + xfs_bmap_same_rtgroup(ip, whichfork, &left, new)) state |= BMAP_LEFT_CONTIG; if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && @@ -2764,7 +2637,8 @@ xfs_bmap_add_extent_hole_real( new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && (!(state & BMAP_LEFT_CONTIG) || left.br_blockcount + new->br_blockcount + - right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)) + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN) && + xfs_bmap_same_rtgroup(ip, whichfork, new, &right)) state |= BMAP_RIGHT_CONTIG; error = 0; @@ -3121,6 +2995,30 @@ xfs_bmap_extsize_align( return 0; } +static inline bool +xfs_bmap_adjacent_valid( + struct xfs_bmalloca *ap, + xfs_fsblock_t x, + xfs_fsblock_t y) +{ + struct xfs_mount *mp = ap->ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ap->ip) && + (ap->datatype & XFS_ALLOC_USERDATA)) { + if (!xfs_has_rtgroups(mp)) + return x < mp->m_sb.sb_rblocks; + + return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) && + xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount && + xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents; + + } + + return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && + XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && + XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks; +} + #define XFS_ALLOC_GAP_UNITS 4 /* returns true if ap->blkno was modified */ @@ -3128,36 +3026,25 @@ bool xfs_bmap_adjacent( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { - xfs_fsblock_t adjust; /* adjustment to block numbers */ - xfs_mount_t *mp; /* mount point structure */ - int rt; /* true if inode is realtime */ - -#define ISVALID(x,y) \ - (rt ? \ - (x) < mp->m_sb.sb_rblocks : \ - XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ - XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ - XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) - - mp = ap->ip->i_mount; - rt = XFS_IS_REALTIME_INODE(ap->ip) && - (ap->datatype & XFS_ALLOC_USERDATA); + xfs_fsblock_t adjust; /* adjustment to block numbers */ + /* * If allocating at eof, and there's a previous real block, * try to use its last block as our starting point. */ if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && !isnullstartblock(ap->prev.br_startblock) && - ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, - ap->prev.br_startblock)) { + xfs_bmap_adjacent_valid(ap, + ap->prev.br_startblock + ap->prev.br_blockcount, + ap->prev.br_startblock)) { ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; /* * Adjust for the gap between prevp and us. */ adjust = ap->offset - (ap->prev.br_startoff + ap->prev.br_blockcount); - if (adjust && - ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) + if (adjust && xfs_bmap_adjacent_valid(ap, ap->blkno + adjust, + ap->prev.br_startblock)) ap->blkno += adjust; return true; } @@ -3180,7 +3067,8 @@ xfs_bmap_adjacent( !isnullstartblock(ap->prev.br_startblock) && (prevbno = ap->prev.br_startblock + ap->prev.br_blockcount) && - ISVALID(prevbno, ap->prev.br_startblock)) { + xfs_bmap_adjacent_valid(ap, prevbno, + ap->prev.br_startblock)) { /* * Calculate gap to end of previous block. */ @@ -3196,8 +3084,8 @@ xfs_bmap_adjacent( * number, then just use the end of the previous block. */ if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(prevbno + prevdiff, - ap->prev.br_startblock)) + xfs_bmap_adjacent_valid(ap, prevbno + prevdiff, + ap->prev.br_startblock)) prevbno += adjust; else prevdiff += adjust; @@ -3229,9 +3117,11 @@ xfs_bmap_adjacent( * offset by our length. */ if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(gotbno - gotdiff, gotbno)) + xfs_bmap_adjacent_valid(ap, gotbno - gotdiff, + gotbno)) gotbno -= adjust; - else if (ISVALID(gotbno - ap->length, gotbno)) { + else if (xfs_bmap_adjacent_valid(ap, gotbno - ap->length, + gotbno)) { gotbno -= ap->length; gotdiff += adjust - ap->length; } else @@ -3259,7 +3149,7 @@ xfs_bmap_adjacent( return true; } } -#undef ISVALID + return false; } @@ -3280,7 +3170,7 @@ xfs_bmap_longest_free_extent( } longest = xfs_alloc_longest_free_extent(pag, - xfs_alloc_min_freelist(pag->pag_mount, pag), + xfs_alloc_min_freelist(pag_mount(pag), pag), xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); if (*blen < longest) *blen = longest; @@ -3370,7 +3260,7 @@ xfs_bmap_alloc_account( * yet. */ if (ap->wasdel) { - xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length); + xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0); return; } @@ -3394,7 +3284,7 @@ xfs_bmap_alloc_account( xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) { ap->ip->i_delayed_blks -= ap->length; - xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length); + xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0); fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT; } else { fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; @@ -3422,6 +3312,11 @@ xfs_bmap_compute_alignments( align = xfs_get_cowextsz_hint(ap->ip); else if (ap->datatype & XFS_ALLOC_USERDATA) align = xfs_get_extsz_hint(ap->ip); + + /* Try to align start block to any minimum allocation alignment */ + if (align > 1 && (ap->flags & XFS_BMAPI_EXTSZALIGN)) + args->alignment = align; + if (align) { if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, &ap->offset, @@ -3477,31 +3372,19 @@ xfs_bmap_process_allocated_extent( xfs_bmap_alloc_account(ap); } -#ifdef DEBUG static int xfs_bmap_exact_minlen_extent_alloc( - struct xfs_bmalloca *ap) + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args) { - struct xfs_mount *mp = ap->ip->i_mount; - struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp }; - xfs_fileoff_t orig_offset; - xfs_extlen_t orig_length; - int error; - - ASSERT(ap->length); - if (ap->minlen != 1) { - ap->blkno = NULLFSBLOCK; - ap->length = 0; + args->fsbno = NULLFSBLOCK; return 0; } - orig_offset = ap->offset; - orig_length = ap->length; - - args.alloc_minlen_only = 1; - - xfs_bmap_compute_alignments(ap, &args); + args->alloc_minlen_only = 1; + args->minlen = args->maxlen = ap->minlen; + args->total = ap->total; /* * Unlike the longest extent available in an AG, we don't track @@ -3511,39 +3394,16 @@ xfs_bmap_exact_minlen_extent_alloc( * we need not be concerned about a drop in performance in * "debug only" code paths. */ - ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0); - - args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; - args.minlen = args.maxlen = ap->minlen; - args.total = ap->total; - - args.alignment = 1; - args.minalignslop = 0; - - args.minleft = ap->minleft; - args.wasdel = ap->wasdel; - args.resv = XFS_AG_RESV_NONE; - args.datatype = ap->datatype; - - error = xfs_alloc_vextent_first_ag(&args, ap->blkno); - if (error) - return error; - - if (args.fsbno != NULLFSBLOCK) { - xfs_bmap_process_allocated_extent(ap, &args, orig_offset, - orig_length); - } else { - ap->blkno = NULLFSBLOCK; - ap->length = 0; - } + ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0); - return 0; + /* + * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG + * iteration and then drops args->total to args->minlen, which might be + * required to find an allocation for the transaction reservation when + * the file system is very full. + */ + return xfs_bmap_btalloc_low_space(ap, args); } -#else - -#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED) - -#endif /* * If we are not low on available data blocks and we are allocating at @@ -3566,12 +3426,12 @@ xfs_bmap_btalloc_at_eof( int error; /* - * If there are already extents in the file, try an exact EOF block - * allocation to extend the file as a contiguous extent. If that fails, - * or it's the first allocation in a file, just try for a stripe aligned - * allocation. + * If there are already extents in the file, and xfs_bmap_adjacent() has + * given a better blkno, try an exact EOF block allocation to extend the + * file as a contiguous extent. If that fails, or it's the first + * allocation in a file, just try for a stripe aligned allocation. */ - if (ap->offset) { + if (ap->eof) { xfs_extlen_t nextminlen = 0; /* @@ -3739,7 +3599,8 @@ xfs_bmap_btalloc_best_length( int error; ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); - xfs_bmap_adjacent(ap); + if (!xfs_bmap_adjacent(ap)) + ap->eof = false; /* * Search for an allocation group with a single extent large enough for @@ -3801,8 +3662,11 @@ xfs_bmap_btalloc( /* Trim the allocation back to the maximum an AG can fit. */ args.maxlen = min(ap->length, mp->m_ag_max_usable); - if ((ap->datatype & XFS_ALLOC_USERDATA) && - xfs_inode_is_filestream(ap->ip)) + if (unlikely(XFS_TEST_ERROR(false, mp, + XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) + error = xfs_bmap_exact_minlen_extent_alloc(ap, &args); + else if ((ap->datatype & XFS_ALLOC_USERDATA) && + xfs_inode_is_filestream(ap->ip)) error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align); else error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align); @@ -4038,152 +3902,6 @@ xfs_bmapi_read( return 0; } -/* - * Add a delayed allocation extent to an inode. Blocks are reserved from the - * global pool and the extent inserted into the inode in-core extent tree. - * - * On entry, got refers to the first extent beyond the offset of the extent to - * allocate or eof is specified if no such extent exists. On return, got refers - * to the extent record that was inserted to the inode fork. - * - * Note that the allocated extent may have been merged with contiguous extents - * during insertion into the inode fork. Thus, got does not reflect the current - * state of the inode fork on return. If necessary, the caller can use lastx to - * look up the updated record in the inode fork. - */ -int -xfs_bmapi_reserve_delalloc( - struct xfs_inode *ip, - int whichfork, - xfs_fileoff_t off, - xfs_filblks_t len, - xfs_filblks_t prealloc, - struct xfs_bmbt_irec *got, - struct xfs_iext_cursor *icur, - int eof) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); - xfs_extlen_t alen; - xfs_extlen_t indlen; - int error; - xfs_fileoff_t aoff = off; - - /* - * Cap the alloc length. Keep track of prealloc so we know whether to - * tag the inode before we return. - */ - alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN); - if (!eof) - alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); - if (prealloc && alen >= len) - prealloc = alen - len; - - /* Figure out the extent size, adjust alen */ - if (whichfork == XFS_COW_FORK) { - struct xfs_bmbt_irec prev; - xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); - - if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) - prev.br_startoff = NULLFILEOFF; - - error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, - 1, 0, &aoff, &alen); - ASSERT(!error); - } - - /* - * Make a transaction-less quota reservation for delayed allocation - * blocks. This number gets adjusted later. We return if we haven't - * allocated blocks already inside this loop. - */ - error = xfs_quota_reserve_blkres(ip, alen); - if (error) - return error; - - /* - * Split changing sb for alen and indlen since they could be coming - * from different places. - */ - indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); - ASSERT(indlen > 0); - - error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); - if (error) - goto out_unreserve_quota; - - error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false); - if (error) - goto out_unreserve_blocks; - - - ip->i_delayed_blks += alen; - xfs_mod_delalloc(ip->i_mount, alen + indlen); - - got->br_startoff = aoff; - got->br_startblock = nullstartblock(indlen); - got->br_blockcount = alen; - got->br_state = XFS_EXT_NORM; - - xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got); - - /* - * Tag the inode if blocks were preallocated. Note that COW fork - * preallocation can occur at the start or end of the extent, even when - * prealloc == 0, so we must also check the aligned offset and length. - */ - if (whichfork == XFS_DATA_FORK && prealloc) - xfs_inode_set_eofblocks_tag(ip); - if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) - xfs_inode_set_cowblocks_tag(ip); - - return 0; - -out_unreserve_blocks: - xfs_mod_fdblocks(mp, alen, false); -out_unreserve_quota: - if (XFS_IS_QUOTA_ON(mp)) - xfs_quota_unreserve_blkres(ip, alen); - return error; -} - -static int -xfs_bmap_alloc_userdata( - struct xfs_bmalloca *bma) -{ - struct xfs_mount *mp = bma->ip->i_mount; - int whichfork = xfs_bmapi_whichfork(bma->flags); - int error; - - /* - * Set the data type being allocated. For the data fork, the first data - * in the file is treated differently to all other allocations. For the - * attribute fork, we only need to ensure the allocated range is not on - * the busy list. - */ - bma->datatype = XFS_ALLOC_NOBUSY; - if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { - bma->datatype |= XFS_ALLOC_USERDATA; - if (bma->offset == 0) - bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; - - if (mp->m_dalign && bma->length >= mp->m_dalign) { - error = xfs_bmap_isaeof(bma, whichfork); - if (error) - return error; - } - - if (XFS_IS_REALTIME_INODE(bma->ip)) - return xfs_bmap_rtalloc(bma); - } - - if (unlikely(XFS_TEST_ERROR(false, mp, - XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) - return xfs_bmap_exact_minlen_extent_alloc(bma); - - return xfs_bmap_btalloc(bma); -} - static int xfs_bmapi_allocate( struct xfs_bmalloca *bma) @@ -4191,43 +3909,51 @@ xfs_bmapi_allocate( struct xfs_mount *mp = bma->ip->i_mount; int whichfork = xfs_bmapi_whichfork(bma->flags); struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork); - int tmp_logflags = 0; int error; ASSERT(bma->length > 0); - - /* - * For the wasdelay case, we could also just allocate the stuff asked - * for in this bmap call but that wouldn't be as good. - */ - if (bma->wasdel) { - bma->length = (xfs_extlen_t)bma->got.br_blockcount; - bma->offset = bma->got.br_startoff; - if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev)) - bma->prev.br_startoff = NULLFILEOFF; - } else { - bma->length = XFS_FILBLKS_MIN(bma->length, XFS_MAX_BMBT_EXTLEN); - if (!bma->eof) - bma->length = XFS_FILBLKS_MIN(bma->length, - bma->got.br_startoff - bma->offset); - } + ASSERT(bma->length <= XFS_MAX_BMBT_EXTLEN); if (bma->flags & XFS_BMAPI_CONTIG) bma->minlen = bma->length; else bma->minlen = 1; - if (bma->flags & XFS_BMAPI_METADATA) { - if (unlikely(XFS_TEST_ERROR(false, mp, - XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) - error = xfs_bmap_exact_minlen_extent_alloc(bma); - else - error = xfs_bmap_btalloc(bma); - } else { - error = xfs_bmap_alloc_userdata(bma); + if (!(bma->flags & XFS_BMAPI_METADATA)) { + /* + * For the data and COW fork, the first data in the file is + * treated differently to all other allocations. For the + * attribute fork, we only need to ensure the allocated range + * is not on the busy list. + */ + bma->datatype = XFS_ALLOC_NOBUSY; + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { + bma->datatype |= XFS_ALLOC_USERDATA; + if (bma->offset == 0) + bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; + + if (mp->m_dalign && bma->length >= mp->m_dalign) { + error = xfs_bmap_isaeof(bma, whichfork); + if (error) + return error; + } + } } - if (error || bma->blkno == NULLFSBLOCK) + + if ((bma->datatype & XFS_ALLOC_USERDATA) && + XFS_IS_REALTIME_INODE(bma->ip)) + error = xfs_bmap_rtalloc(bma); + else + error = xfs_bmap_btalloc(bma); + if (error) return error; + if (bma->blkno == NULLFSBLOCK) + return -ENOSPC; + + if (WARN_ON_ONCE(!xfs_valid_startblock(bma->ip, bma->blkno))) { + xfs_bmap_mark_sick(bma->ip, whichfork); + return -EFSCORRUPTED; + } if (bma->flags & XFS_BMAPI_ZERO) { error = xfs_zero_extent(bma->ip, bma->blkno, bma->length); @@ -4260,8 +3986,6 @@ xfs_bmapi_allocate( error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, whichfork, &bma->icur, &bma->cur, &bma->got, &bma->logflags, bma->flags); - - bma->logflags |= tmp_logflags; if (error) return error; @@ -4406,6 +4130,15 @@ xfs_bmapi_finish( * extent state if necessary. Details behaviour is controlled by the flags * parameter. Only allocates blocks from a single allocation group, to avoid * locking problems. + * + * Returns 0 on success and places the extent mappings in mval. nmaps is used + * as an input/output parameter where the caller specifies the maximum number + * of mappings that may be returned and xfs_bmapi_write passes back the number + * of mappings (including existing mappings) it found. + * + * Returns a negative error code on failure, including -ENOSPC when it could not + * allocate any blocks and -ENOSR when it did allocate blocks to convert a + * delalloc range, but those blocks were before the passed in range. */ int xfs_bmapi_write( @@ -4524,28 +4257,42 @@ xfs_bmapi_write( * allocation length request (which can be 64 bits in * length) and the bma length request, which is * xfs_extlen_t and therefore 32 bits. Hence we have to - * check for 32-bit overflows and handle them here. + * be careful and do the min() using the larger type to + * avoid overflows. */ - if (len > (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN) - bma.length = XFS_MAX_BMBT_EXTLEN; - else - bma.length = len; + bma.length = XFS_FILBLKS_MIN(len, XFS_MAX_BMBT_EXTLEN); + + if (wasdelay) { + bma.length = XFS_FILBLKS_MIN(bma.length, + bma.got.br_blockcount - + (bno - bma.got.br_startoff)); + } else { + if (!eof) + bma.length = XFS_FILBLKS_MIN(bma.length, + bma.got.br_startoff - bno); + } - ASSERT(len > 0); ASSERT(bma.length > 0); error = xfs_bmapi_allocate(&bma); - if (error) + if (error) { + /* + * If we already allocated space in a previous + * iteration return what we go so far when + * running out of space. + */ + if (error == -ENOSPC && bma.nallocs) + break; goto error0; - if (bma.blkno == NULLFSBLOCK) - break; + } /* * If this is a CoW allocation, record the data in * the refcount btree for orphan recovery. */ if (whichfork == XFS_COW_FORK) - xfs_refcount_alloc_cow_extent(tp, bma.blkno, - bma.length); + xfs_refcount_alloc_cow_extent(tp, + XFS_IS_REALTIME_INODE(ip), + bma.blkno, bma.length); } /* Deal with the allocated space we found. */ @@ -4575,7 +4322,6 @@ xfs_bmapi_write( if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got)) eof = true; } - *nmap = n; error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, whichfork); @@ -4586,7 +4332,22 @@ xfs_bmapi_write( ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork)); xfs_bmapi_finish(&bma, whichfork, 0); xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, - orig_nmap, *nmap); + orig_nmap, n); + + /* + * When converting delayed allocations, xfs_bmapi_allocate ignores + * the passed in bno and always converts from the start of the found + * delalloc extent. + * + * To avoid a successful return with *nmap set to 0, return the magic + * -ENOSR error code for this particular case so that the caller can + * handle it. + */ + if (!n) { + ASSERT(bma.nallocs >= *nmap); + return -ENOSR; + } + *nmap = n; return 0; error0: xfs_bmapi_finish(&bma, whichfork, error); @@ -4599,8 +4360,8 @@ error0: * invocations to allocate the target offset if a large enough physical extent * is not available. */ -int -xfs_bmapi_convert_delalloc( +static int +xfs_bmapi_convert_one_delalloc( struct xfs_inode *ip, int whichfork, xfs_off_t offset, @@ -4630,11 +4391,8 @@ xfs_bmapi_convert_delalloc( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - error = xfs_iext_count_may_overflow(ip, whichfork, + error = xfs_iext_count_extend(tp, ip, whichfork, XFS_IEXT_ADD_NOSPLIT_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_ADD_NOSPLIT_CNT); if (error) goto out_trans_cancel; @@ -4657,19 +4415,25 @@ xfs_bmapi_convert_delalloc( if (!isnullstartblock(bma.got.br_startblock)) { xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, xfs_iomap_inode_sequence(ip, flags)); - *seq = READ_ONCE(ifp->if_seq); + if (seq) + *seq = READ_ONCE(ifp->if_seq); goto out_trans_cancel; } bma.tp = tp; bma.ip = ip; bma.wasdel = true; - bma.offset = bma.got.br_startoff; - bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, - XFS_MAX_BMBT_EXTLEN); bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); /* + * Always allocate convert from the start of the delalloc extent even if + * that is outside the passed in range to create large contiguous + * extents on disk. + */ + bma.offset = bma.got.br_startoff; + bma.length = bma.got.br_blockcount; + + /* * When we're converting the delalloc reservations backing dirty pages * in the page cache, we must be careful about how we create the new * extents: @@ -4693,25 +4457,18 @@ xfs_bmapi_convert_delalloc( if (error) goto out_finish; - error = -ENOSPC; - if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK)) - goto out_finish; - if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) { - xfs_bmap_mark_sick(ip, whichfork); - error = -EFSCORRUPTED; - goto out_finish; - } - XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); XFS_STATS_INC(mp, xs_xstrat_quick); ASSERT(!isnullstartblock(bma.got.br_startblock)); xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, xfs_iomap_inode_sequence(ip, flags)); - *seq = READ_ONCE(ifp->if_seq); + if (seq) + *seq = READ_ONCE(ifp->if_seq); if (whichfork == XFS_COW_FORK) - xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length); + xfs_refcount_alloc_cow_extent(tp, XFS_IS_REALTIME_INODE(ip), + bma.blkno, bma.length); error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, whichfork); @@ -4731,6 +4488,36 @@ out_trans_cancel: return error; } +/* + * Pass in a dellalloc extent and convert it to real extents, return the real + * extent that maps offset_fsb in iomap. + */ +int +xfs_bmapi_convert_delalloc( + struct xfs_inode *ip, + int whichfork, + loff_t offset, + struct iomap *iomap, + unsigned int *seq) +{ + int error; + + /* + * Attempt to allocate whatever delalloc extent currently backs offset + * and put the result into iomap. Allocate in a loop because it may + * take several attempts to allocate real blocks for a contiguous + * delalloc extent if free space is sufficiently fragmented. + */ + do { + error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset, + iomap, seq); + if (error) + return error; + } while (iomap->offset + iomap->length <= offset); + + return 0; +} + int xfs_bmapi_remap( struct xfs_trans *tp, @@ -4777,6 +4564,7 @@ xfs_bmapi_remap( } ip->i_nblocks += len; + ip->i_delayed_blks -= len; /* see xfs_bmap_defer_add */ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (ifp->if_format == XFS_DINODE_FMT_BTREE) @@ -4822,32 +4610,18 @@ error0: * ores == 1). The number of stolen blocks is returned. The availability and * subsequent accounting of stolen blocks is the responsibility of the caller. */ -static xfs_filblks_t +static void xfs_bmap_split_indlen( xfs_filblks_t ores, /* original res. */ xfs_filblks_t *indlen1, /* ext1 worst indlen */ - xfs_filblks_t *indlen2, /* ext2 worst indlen */ - xfs_filblks_t avail) /* stealable blocks */ + xfs_filblks_t *indlen2) /* ext2 worst indlen */ { xfs_filblks_t len1 = *indlen1; xfs_filblks_t len2 = *indlen2; xfs_filblks_t nres = len1 + len2; /* new total res. */ - xfs_filblks_t stolen = 0; xfs_filblks_t resfactor; /* - * Steal as many blocks as we can to try and satisfy the worst case - * indlen for both new extents. - */ - if (ores < nres && avail) - stolen = XFS_FILBLKS_MIN(nres - ores, avail); - ores += stolen; - - /* nothing else to do if we've satisfied the new reservation */ - if (ores >= nres) - return stolen; - - /* * We can't meet the total required reservation for the two extents. * Calculate the percent of the overall shortage between both extents * and apply this percentage to each of the requested indlen values. @@ -4891,26 +4665,25 @@ xfs_bmap_split_indlen( *indlen1 = len1; *indlen2 = len2; - - return stolen; } -int +void xfs_bmap_del_extent_delay( struct xfs_inode *ip, int whichfork, struct xfs_iext_cursor *icur, struct xfs_bmbt_irec *got, - struct xfs_bmbt_irec *del) + struct xfs_bmbt_irec *del, + uint32_t bflags) /* bmapi flags */ { struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); struct xfs_bmbt_irec new; int64_t da_old, da_new, da_diff = 0; xfs_fileoff_t del_endoff, got_endoff; - xfs_filblks_t got_indlen, new_indlen, stolen; + xfs_filblks_t got_indlen, new_indlen, stolen = 0; uint32_t state = xfs_bmap_fork_to_state(whichfork); - int error = 0; + uint64_t fdblocks; bool isrt; XFS_STATS_INC(mp, xs_del_exlist); @@ -4925,18 +4698,12 @@ xfs_bmap_del_extent_delay( ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); - if (isrt) - xfs_mod_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount)); - /* * Update the inode delalloc counter now and wait to update the * sb counters as we might have to borrow some blocks for the * indirect block accounting. */ - ASSERT(!isrt); - error = xfs_quota_unreserve_blkres(ip, del->br_blockcount); - if (error) - return error; + xfs_quota_unreserve_blkres(ip, del->br_blockcount); ip->i_delayed_blks -= del->br_blockcount; if (got->br_startoff == del->br_startoff) @@ -4990,8 +4757,24 @@ xfs_bmap_del_extent_delay( new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount); WARN_ON_ONCE(!got_indlen || !new_indlen); - stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen, - del->br_blockcount); + /* + * Steal as many blocks as we can to try and satisfy the worst + * case indlen for both new extents. + * + * However, we can't just steal reservations from the data + * blocks if this is an RT inodes as the data and metadata + * blocks come from different pools. We'll have to live with + * under-filled indirect reservation in this case. + */ + da_new = got_indlen + new_indlen; + if (da_new > da_old && !isrt) { + stolen = XFS_FILBLKS_MIN(da_new - da_old, + del->br_blockcount); + da_old += stolen; + } + if (da_new > da_old) + xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen); + da_new = got_indlen + new_indlen; got->br_startblock = nullstartblock((int)got_indlen); @@ -5003,20 +4786,29 @@ xfs_bmap_del_extent_delay( xfs_iext_next(ifp, icur); xfs_iext_insert(ip, icur, &new, state); - da_new = got_indlen + new_indlen - stolen; del->br_blockcount -= stolen; break; } ASSERT(da_old >= da_new); da_diff = da_old - da_new; - if (!isrt) - da_diff += del->br_blockcount; - if (da_diff) { - xfs_mod_fdblocks(mp, da_diff, false); - xfs_mod_delalloc(mp, -da_diff); + fdblocks = da_diff; + + if (bflags & XFS_BMAPI_REMAP) { + ; + } else if (isrt) { + xfs_rtbxlen_t rtxlen; + + rtxlen = xfs_blen_to_rtbxlen(mp, del->br_blockcount); + if (xfs_is_zoned_inode(ip)) + xfs_zoned_add_available(mp, rtxlen); + xfs_add_frextents(mp, rtxlen); + } else { + fdblocks += del->br_blockcount; } - return error; + + xfs_add_fdblocks(mp, fdblocks); + xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff); } void @@ -5090,6 +4882,34 @@ xfs_bmap_del_extent_cow( ip->i_delayed_blks -= del->br_blockcount; } +static int +xfs_bmap_free_rtblocks( + struct xfs_trans *tp, + struct xfs_bmbt_irec *del) +{ + struct xfs_rtgroup *rtg; + int error; + + rtg = xfs_rtgroup_grab(tp->t_mountp, 0); + if (!rtg) + return -EIO; + + /* + * Ensure the bitmap and summary inodes are locked and joined to the + * transaction before modifying them. + */ + if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) { + tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED; + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP); + xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_BITMAP); + } + + error = xfs_rtfree_blocks(tp, rtg, del->br_startblock, + del->br_blockcount); + xfs_rtgroup_rele(rtg); + return error; +} + /* * Called by xfs_bmapi to update file extent records and the btree * after removing space. @@ -5107,8 +4927,7 @@ xfs_bmap_del_extent_real( { xfs_fsblock_t del_endblock=0; /* first block past del */ xfs_fileoff_t del_endoff; /* first offset past del */ - int do_fx; /* free extent at end of routine */ - int error; /* error return value */ + int error = 0; /* error return value */ struct xfs_bmbt_irec got; /* current extent entry */ xfs_fileoff_t got_endoff; /* first offset past got */ int i; /* temp state */ @@ -5151,20 +4970,10 @@ xfs_bmap_del_extent_real( return -ENOSPC; *logflagsp = XFS_ILOG_CORE; - if (xfs_ifork_is_realtime(ip, whichfork)) { - if (!(bflags & XFS_BMAPI_REMAP)) { - error = xfs_rtfree_blocks(tp, del->br_startblock, - del->br_blockcount); - if (error) - return error; - } - - do_fx = 0; + if (xfs_ifork_is_realtime(ip, whichfork)) qfield = XFS_TRANS_DQ_RTBCOUNT; - } else { - do_fx = 1; + else qfield = XFS_TRANS_DQ_BCOUNT; - } nblks = del->br_blockcount; del_endblock = del->br_startblock + del->br_blockcount; @@ -5312,18 +5121,39 @@ xfs_bmap_del_extent_real( /* * If we need to, add to list of extents to delete. */ - if (do_fx && !(bflags & XFS_BMAPI_REMAP)) { + if (!(bflags & XFS_BMAPI_REMAP)) { + bool isrt = xfs_ifork_is_realtime(ip, whichfork); + if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { - xfs_refcount_decrease_extent(tp, del); + xfs_refcount_decrease_extent(tp, isrt, del); + } else if (isrt && !xfs_has_rtgroups(mp)) { + error = xfs_bmap_free_rtblocks(tp, del); } else { + unsigned int efi_flags = 0; + + if ((bflags & XFS_BMAPI_NODISCARD) || + del->br_state == XFS_EXT_UNWRITTEN) + efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD; + + /* + * Historically, we did not use EFIs to free realtime + * extents. However, when reverse mapping is enabled, + * we must maintain the same order of operations as the + * data device, which is: Remove the file mapping, + * remove the reverse mapping, and then free the + * blocks. Reflink for realtime volumes requires the + * same sort of ordering. Both features rely on + * rtgroups, so let's gate rt EFI usage on rtgroups. + */ + if (isrt) + efi_flags |= XFS_FREE_EXTENT_REALTIME; + error = xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, - XFS_AG_RESV_NONE, - ((bflags & XFS_BMAPI_NODISCARD) || - del->br_state == XFS_EXT_UNWRITTEN)); - if (error) - return error; + XFS_AG_RESV_NONE, efi_flags); } + if (error) + return error; } /* @@ -5414,16 +5244,6 @@ __xfs_bunmapi( } else cur = NULL; - if (isrt) { - /* - * Synchronize by locking the bitmap inode. - */ - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); - xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); - xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); - xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); - } - extno = 0; while (end != (xfs_fileoff_t)-1 && end >= start && (nexts == 0 || extno < nexts)) { @@ -5584,18 +5404,17 @@ __xfs_bunmapi( delete: if (wasdel) { - error = xfs_bmap_del_extent_delay(ip, whichfork, &icur, - &got, &del); + xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, + &del, flags); } else { error = xfs_bmap_del_extent_real(ip, tp, &icur, cur, &del, &tmp_logflags, whichfork, flags); logflags |= tmp_logflags; + if (error) + goto error0; } - if (error) - goto error0; - end = del.br_startoff - 1; nodelete: /* @@ -5678,6 +5497,8 @@ xfs_bunmapi( */ STATIC bool xfs_bmse_can_merge( + struct xfs_inode *ip, + int whichfork, struct xfs_bmbt_irec *left, /* preceding extent */ struct xfs_bmbt_irec *got, /* current extent to shift */ xfs_fileoff_t shift) /* shift fsb */ @@ -5693,7 +5514,8 @@ xfs_bmse_can_merge( if ((left->br_startoff + left->br_blockcount != startoff) || (left->br_startblock + left->br_blockcount != got->br_startblock) || (left->br_state != got->br_state) || - (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN)) + (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN) || + !xfs_bmap_same_rtgroup(ip, whichfork, left, got)) return false; return true; @@ -5729,7 +5551,7 @@ xfs_bmse_merge( blockcount = left->br_blockcount + got->br_blockcount; xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - ASSERT(xfs_bmse_can_merge(left, got, shift)); + ASSERT(xfs_bmse_can_merge(ip, whichfork, left, got, shift)); new = *left; new.br_blockcount = blockcount; @@ -5891,7 +5713,8 @@ xfs_bmap_collapse_extents( goto del_cursor; } - if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) { + if (xfs_bmse_can_merge(ip, whichfork, &prev, &got, + offset_shift_fsb)) { error = xfs_bmse_merge(tp, ip, whichfork, offset_shift_fsb, &icur, &got, &prev, cur, &logflags); @@ -6027,7 +5850,8 @@ xfs_bmap_insert_extents( * never find mergeable extents in this scenario. Check anyways * and warn if we encounter two extents that could be one. */ - if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb)) + if (xfs_bmse_can_merge(ip, whichfork, &got, &next, + offset_shift_fsb)) WARN_ON_ONCE(1); } @@ -6354,6 +6178,7 @@ xfs_bunmapi_range( error = xfs_defer_finish(tpp); if (error) goto out; + cond_resched(); } out: return error; @@ -6401,3 +6226,50 @@ xfs_bmap_query_all( return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query); } + +/* Helper function to extract extent size hint from inode */ +xfs_extlen_t +xfs_get_extsz_hint( + struct xfs_inode *ip) +{ + /* + * No point in aligning allocations if we need to COW to actually + * write to them. + */ + if (!xfs_is_always_cow_inode(ip) && + (ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize) + return ip->i_extsize; + if (XFS_IS_REALTIME_INODE(ip) && + ip->i_mount->m_sb.sb_rextsize > 1) + return ip->i_mount->m_sb.sb_rextsize; + return 0; +} + +/* + * Helper function to extract CoW extent size hint from inode. + * Between the extent size hint and the CoW extent size hint, we + * return the greater of the two. If the value is zero (automatic), + * use the default size. + */ +xfs_extlen_t +xfs_get_cowextsz_hint( + struct xfs_inode *ip) +{ + xfs_extlen_t a, b; + + a = 0; + if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) + a = ip->i_cowextsize; + if (XFS_IS_REALTIME_INODE(ip)) { + b = 0; + if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) + b = ip->i_extsize; + } else { + b = xfs_get_extsz_hint(ip); + } + + a = max(a, b); + if (a == 0) + return XFS_DEFAULT_COWEXTSZ_HINT; + return a; +} |