diff options
Diffstat (limited to 'fs/xfs/libxfs')
56 files changed, 5065 insertions, 4320 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index df3e600835e8..03885a968de8 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -27,6 +27,7 @@ #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_alloc.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" @@ -94,13 +95,13 @@ xfs_ag_resv_critical( switch (type) { case XFS_AG_RESV_METADATA: - avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved; + avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved; orig = pag->pag_meta_resv.ar_asked; break; - case XFS_AG_RESV_AGFL: + case XFS_AG_RESV_RMAPBT: avail = pag->pagf_freeblks + pag->pagf_flcount - pag->pag_meta_resv.ar_reserved; - orig = pag->pag_agfl_resv.ar_asked; + orig = pag->pag_rmapbt_resv.ar_asked; break; default: ASSERT(0); @@ -125,10 +126,10 @@ xfs_ag_resv_needed( { xfs_extlen_t len; - len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved; + len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved; switch (type) { case XFS_AG_RESV_METADATA: - case XFS_AG_RESV_AGFL: + case XFS_AG_RESV_RMAPBT: len -= xfs_perag_resv(pag, type)->ar_reserved; break; case XFS_AG_RESV_NONE: @@ -159,10 +160,11 @@ __xfs_ag_resv_free( if (pag->pag_agno == 0) pag->pag_mount->m_ag_max_usable += resv->ar_asked; /* - * AGFL blocks are always considered "free", so whatever - * was reserved at mount time must be given back at umount. + * RMAPBT blocks come from the AGFL and AGFL blocks are always + * considered "free", so whatever was reserved at mount time must be + * given back at umount. */ - if (type == XFS_AG_RESV_AGFL) + if (type == XFS_AG_RESV_RMAPBT) oldresv = resv->ar_orig_reserved; else oldresv = resv->ar_reserved; @@ -184,7 +186,7 @@ xfs_ag_resv_free( int error; int err2; - error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL); + error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT); err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA); if (err2 && !error) error = err2; @@ -283,15 +285,15 @@ xfs_ag_resv_init( } } - /* Create the AGFL metadata reservation */ - if (pag->pag_agfl_resv.ar_asked == 0) { + /* Create the RMAPBT metadata reservation */ + if (pag->pag_rmapbt_resv.ar_asked == 0) { ask = used = 0; error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; - error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used); + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used); if (error) goto out; } @@ -303,7 +305,7 @@ xfs_ag_resv_init( return error; ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + - xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= + xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <= pag->pagf_freeblks + pag->pagf_flcount); #endif out: @@ -324,8 +326,10 @@ xfs_ag_resv_alloc_extent( trace_xfs_ag_resv_alloc_extent(pag, type, args->len); switch (type) { - case XFS_AG_RESV_METADATA: case XFS_AG_RESV_AGFL: + return; + case XFS_AG_RESV_METADATA: + case XFS_AG_RESV_RMAPBT: resv = xfs_perag_resv(pag, type); break; default: @@ -340,7 +344,7 @@ xfs_ag_resv_alloc_extent( len = min_t(xfs_extlen_t, args->len, resv->ar_reserved); resv->ar_reserved -= len; - if (type == XFS_AG_RESV_AGFL) + if (type == XFS_AG_RESV_RMAPBT) return; /* Allocations of reserved blocks only need on-disk sb updates... */ xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len); @@ -364,8 +368,10 @@ xfs_ag_resv_free_extent( trace_xfs_ag_resv_free_extent(pag, type, len); switch (type) { - case XFS_AG_RESV_METADATA: case XFS_AG_RESV_AGFL: + return; + case XFS_AG_RESV_METADATA: + case XFS_AG_RESV_RMAPBT: resv = xfs_perag_resv(pag, type); break; default: @@ -378,7 +384,7 @@ xfs_ag_resv_free_extent( leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved); resv->ar_reserved += leftover; - if (type == XFS_AG_RESV_AGFL) + if (type == XFS_AG_RESV_RMAPBT) return; /* Freeing into the reserved pool only requires on-disk update... */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len); diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h index 8d6c687deef3..938f2f96c5e8 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.h +++ b/fs/xfs/libxfs/xfs_ag_resv.h @@ -32,4 +32,35 @@ void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type, void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type, struct xfs_trans *tp, xfs_extlen_t len); +/* + * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from + * the AGFL, they are allocated one at a time and the reservation updates don't + * require a transaction. + */ +static inline void +xfs_ag_resv_rmapbt_alloc( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_alloc_arg args = {0}; + struct xfs_perag *pag; + + args.len = 1; + pag = xfs_perag_get(mp, agno); + xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args); + xfs_perag_put(pag); +} + +static inline void +xfs_ag_resv_rmapbt_free( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, agno); + xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); + xfs_perag_put(pag); +} + #endif /* __XFS_AG_RESV_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 744dcaec34cc..39387bdd225d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -31,6 +31,7 @@ #include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_cksum.h" #include "xfs_trace.h" @@ -52,6 +53,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); +/* + * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in + * the beginning of the block for a proper header with the location information + * and CRC. + */ +unsigned int +xfs_agfl_size( + struct xfs_mount *mp) +{ + unsigned int size = mp->m_sb.sb_sectsize; + + if (xfs_sb_version_hascrc(&mp->m_sb)) + size -= sizeof(struct xfs_agfl); + + return size / sizeof(xfs_agblock_t); +} + unsigned int xfs_refc_block( struct xfs_mount *mp) @@ -166,7 +184,7 @@ xfs_alloc_lookup_ge( * Lookup the first record less than or equal to [bno, len] * in the btree given by cur. */ -static int /* error */ +int /* error */ xfs_alloc_lookup_le( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -519,7 +537,7 @@ xfs_alloc_fixup_trees( return 0; } -static bool +static xfs_failaddr_t xfs_agfl_verify( struct xfs_buf *bp) { @@ -527,10 +545,19 @@ xfs_agfl_verify( struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); int i; + /* + * There is no verification of non-crc AGFLs because mkfs does not + * initialise the AGFL to zero or NULL. Hence the only valid part of the + * AGFL is what the AGF says is active. We can't get to the AGF, so we + * can't verify just those entries are valid. + */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return NULL; + if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -538,16 +565,17 @@ xfs_agfl_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; - for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { + for (i = 0; i < xfs_agfl_size(mp); i++) { if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) - return false; + return __this_address; } - return xfs_log_check_lsn(mp, - be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); + if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) + return __this_address; + return NULL; } static void @@ -555,6 +583,7 @@ xfs_agfl_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; /* * There is no verification of non-crc AGFLs because mkfs does not @@ -566,28 +595,29 @@ xfs_agfl_read_verify( return; if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_agfl_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agfl_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void xfs_agfl_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; /* no verification of non-crc AGFLs */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_agfl_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_agfl_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -601,6 +631,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = { .name = "xfs_agfl", .verify_read = xfs_agfl_read_verify, .verify_write = xfs_agfl_write_verify, + .verify_struct = xfs_agfl_verify, }; /* @@ -701,7 +732,7 @@ xfs_alloc_ag_vextent( ASSERT(args->agbno % args->alignment == 0); /* if not file data, insert new block into the reverse map btree */ - if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, args->agbno, args->len, &args->oinfo); if (error) @@ -1550,7 +1581,6 @@ xfs_alloc_ag_vextent_small( int *stat) /* status: 0-freelist, 1-normal/none */ { struct xfs_owner_info oinfo; - struct xfs_perag *pag; int error; xfs_agblock_t fbno; xfs_extlen_t flen; @@ -1584,6 +1614,10 @@ xfs_alloc_ag_vextent_small( bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto error0; + } xfs_trans_binval(args->tp, bp); } args->len = 1; @@ -1598,18 +1632,13 @@ xfs_alloc_ag_vextent_small( /* * If we're feeding an AGFL block to something that * doesn't live in the free space, we need to clear - * out the OWN_AG rmap and add the block back to - * the AGFL per-AG reservation. + * out the OWN_AG rmap. */ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1, &oinfo); if (error) goto error0; - pag = xfs_perag_get(args->mp, args->agno); - xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL, - args->tp, 1); - xfs_perag_put(pag); *stat = 0; return 0; @@ -1677,7 +1706,7 @@ xfs_free_ag_extent( bno_cur = cnt_cur = NULL; mp = tp->t_mountp; - if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(oinfo)) { error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); if (error) goto error0; @@ -1893,14 +1922,12 @@ xfs_free_ag_extent( XFS_STATS_INC(mp, xs_freex); XFS_STATS_ADD(mp, xs_freeb, len); - trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL, - haveleft, haveright); + trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright); return 0; error0: - trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL, - -1, -1); + trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1); if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); if (cnt_cur) @@ -2036,6 +2063,93 @@ xfs_alloc_space_available( } /* + * Check the agfl fields of the agf for inconsistency or corruption. The purpose + * is to detect an agfl header padding mismatch between current and early v5 + * kernels. This problem manifests as a 1-slot size difference between the + * on-disk flcount and the active [first, last] range of a wrapped agfl. This + * may also catch variants of agfl count corruption unrelated to padding. Either + * way, we'll reset the agfl and warn the user. + * + * Return true if a reset is required before the agfl can be used, false + * otherwise. + */ +static bool +xfs_agfl_needs_reset( + struct xfs_mount *mp, + struct xfs_agf *agf) +{ + uint32_t f = be32_to_cpu(agf->agf_flfirst); + uint32_t l = be32_to_cpu(agf->agf_fllast); + uint32_t c = be32_to_cpu(agf->agf_flcount); + int agfl_size = xfs_agfl_size(mp); + int active; + + /* no agfl header on v4 supers */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + + /* + * The agf read verifier catches severe corruption of these fields. + * Repeat some sanity checks to cover a packed -> unpacked mismatch if + * the verifier allows it. + */ + if (f >= agfl_size || l >= agfl_size) + return true; + if (c > agfl_size) + return true; + + /* + * Check consistency between the on-disk count and the active range. An + * agfl padding mismatch manifests as an inconsistent flcount. + */ + if (c && l >= f) + active = l - f + 1; + else if (c) + active = agfl_size - f + l + 1; + else + active = 0; + + return active != c; +} + +/* + * Reset the agfl to an empty state. Ignore/drop any existing blocks since the + * agfl content cannot be trusted. Warn the user that a repair is required to + * recover leaked blocks. + * + * The purpose of this mechanism is to handle filesystems affected by the agfl + * header padding mismatch problem. A reset keeps the filesystem online with a + * relatively minor free space accounting inconsistency rather than suffer the + * inevitable crash from use of an invalid agfl block. + */ +static void +xfs_agfl_reset( + struct xfs_trans *tp, + struct xfs_buf *agbp, + struct xfs_perag *pag) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + + ASSERT(pag->pagf_agflreset); + trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); + + xfs_warn(mp, + "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. " + "Please unmount and run xfs_repair.", + pag->pag_agno, pag->pagf_flcount); + + agf->agf_flfirst = 0; + agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1); + agf->agf_flcount = 0; + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST | + XFS_AGF_FLCOUNT); + + pag->pagf_flcount = 0; + pag->pagf_agflreset = false; +} + +/* * Decide whether to use this allocation group for this allocation. * If so, fix up the btree freelist's size. */ @@ -2096,6 +2210,10 @@ xfs_alloc_fix_freelist( } } + /* reset a padding mismatched agfl before final free space check */ + if (pag->pagf_agflreset) + xfs_agfl_reset(tp, agbp, pag); + /* If there isn't enough total space or single-extent, reject it. */ need = xfs_alloc_min_freelist(mp, pag); if (!xfs_alloc_space_available(args, need, flags)) @@ -2141,6 +2259,10 @@ xfs_alloc_fix_freelist( if (error) goto out_agbp_relse; bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto out_agbp_relse; + } xfs_trans_binval(tp, bp); } @@ -2244,10 +2366,11 @@ xfs_alloc_get_freelist( bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); - if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) + if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) agf->agf_flfirst = 0; pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; @@ -2355,10 +2478,11 @@ xfs_alloc_put_freelist( be32_to_cpu(agf->agf_seqno), &agflbp))) return error; be32_add_cpu(&agf->agf_fllast, 1); - if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) + if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) agf->agf_fllast = 0; pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; @@ -2373,7 +2497,7 @@ xfs_alloc_put_freelist( xfs_alloc_log_agf(tp, agbp, logflags); - ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); + ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; @@ -2388,39 +2512,39 @@ xfs_alloc_put_freelist( return 0; } -static bool +static xfs_failaddr_t xfs_agf_verify( - struct xfs_mount *mp, - struct xfs_buf *bp) - { - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) - return false; + return __this_address; } if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && - be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) - return false; + be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) && + be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) && + be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) + return __this_address; if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) - return false; + return __this_address; if (xfs_sb_version_hasrmapbt(&mp->m_sb) && (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, @@ -2429,18 +2553,18 @@ xfs_agf_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; if (xfs_sb_version_haslazysbcount(&mp->m_sb) && be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) - return false; + return __this_address; if (xfs_sb_version_hasreflink(&mp->m_sb) && (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; - return true;; + return NULL; } @@ -2449,28 +2573,29 @@ xfs_agf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, - XFS_ERRTAG_ALLOC_READ_AGF)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agf_verify(bp); + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void xfs_agf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; - if (!xfs_agf_verify(mp, bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_agf_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -2487,6 +2612,7 @@ const struct xfs_buf_ops xfs_agf_buf_ops = { .name = "xfs_agf", .verify_read = xfs_agf_read_verify, .verify_write = xfs_agf_write_verify, + .verify_struct = xfs_agf_verify, }; /* @@ -2564,6 +2690,7 @@ xfs_alloc_read_agf( pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; pag->pagf_init = 1; + pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); } #ifdef DEBUG else if (!XFS_FORCED_SHUTDOWN(mp)) { @@ -2923,3 +3050,71 @@ xfs_alloc_query_all( query.fn = fn; return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); } + +/* Find the size of the AG, in blocks. */ +xfs_agblock_t +xfs_ag_block_count( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + ASSERT(agno < mp->m_sb.sb_agcount); + + if (agno < mp->m_sb.sb_agcount - 1) + return mp->m_sb.sb_agblocks; + return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks); +} + +/* + * Verify that an AG block number pointer neither points outside the AG + * nor points at static metadata. + */ +bool +xfs_verify_agbno( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno) +{ + xfs_agblock_t eoag; + + eoag = xfs_ag_block_count(mp, agno); + if (agbno >= eoag) + return false; + if (agbno <= XFS_AGFL_BLOCK(mp)) + return false; + return true; +} + +/* + * Verify that an FS block number pointer neither points outside the + * filesystem nor points at static AG metadata. + */ +bool +xfs_verify_fsbno( + struct xfs_mount *mp, + xfs_fsblock_t fsbno) +{ + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno); + + if (agno >= mp->m_sb.sb_agcount) + return false; + return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); +} + +/* Is there a record covering a given extent? */ +int +xfs_alloc_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.a.ar_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.a.ar_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index ef26edc2e938..a311a2414a6b 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -26,6 +26,8 @@ struct xfs_trans; extern struct workqueue_struct *xfs_alloc_wq; +unsigned int xfs_agfl_size(struct xfs_mount *mp); + /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. */ @@ -198,6 +200,13 @@ xfs_free_extent( enum xfs_ag_resv_type type); /* block reservation type */ int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +int /* error */ xfs_alloc_lookup_ge( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -232,5 +241,12 @@ int xfs_alloc_query_range(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn, void *priv); int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn, void *priv); +xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno); +bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno); +bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); + +int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, bool *exist); #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index cfde0a0f9706..b451649ba176 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -74,18 +74,13 @@ xfs_allocbt_alloc_block( int error; xfs_agblock_t bno; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - /* Allocate the new block from the freelist. If we can't, give up. */ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, &bno, 1); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } if (bno == NULLAGBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -95,7 +90,6 @@ xfs_allocbt_alloc_block( xfs_trans_agbtree_delta(cur->bc_tp, 1); new->s = cpu_to_be32(bno); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; } @@ -307,13 +301,14 @@ xfs_cntbt_diff_two_keys( be32_to_cpu(k2->alloc.ar_startblock); } -static bool +static xfs_failaddr_t xfs_allocbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; /* @@ -331,29 +326,31 @@ xfs_allocbt_verify( level = be16_to_cpu(block->bb_level); switch (block->bb_magic) { case cpu_to_be32(XFS_ABTB_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_ABTB_MAGIC): if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) - return false; + return __this_address; } else if (level >= mp->m_ag_maxlevels) - return false; + return __this_address; break; case cpu_to_be32(XFS_ABTC_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_ABTC_MAGIC): if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) - return false; + return __this_address; } else if (level >= mp->m_ag_maxlevels) - return false; + return __this_address; break; default: - return false; + return __this_address; } return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); @@ -363,25 +360,30 @@ static void xfs_allocbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_allocbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_allocbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_allocbt_write_verify( struct xfs_buf *bp) { - if (!xfs_allocbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_allocbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -392,6 +394,7 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = { .name = "xfs_allocbt", .verify_read = xfs_allocbt_read_verify, .verify_write = xfs_allocbt_write_verify, + .verify_struct = xfs_allocbt_verify, }; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6249c92671de..ce4a34a2751d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -212,6 +212,7 @@ xfs_attr_set( int flags) { struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_defer_ops dfops; struct xfs_trans_res tres; @@ -327,9 +328,16 @@ xfs_attr_set( * GROT: another possible req'mt for a double-split btree op. */ xfs_defer_init(args.dfops, args.firstblock); - error = xfs_attr_shortform_to_leaf(&args); + error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) goto out_defer_cancel; + /* + * Prevent the leaf buffer from being unlocked so that a + * concurrent AIL push cannot grab the half-baked leaf + * buffer and run into problems with the write verifier. + */ + xfs_trans_bhold(args.trans, leaf_bp); + xfs_defer_bjoin(args.dfops, leaf_bp); xfs_defer_ijoin(args.dfops, dp); error = xfs_defer_finish(&args.trans, args.dfops); if (error) @@ -337,13 +345,14 @@ xfs_attr_set( /* * Commit the leaf transformation. We'll need another (linked) - * transaction to add the new attribute to the leaf. + * transaction to add the new attribute to the leaf, which + * means that we have to hold & join the leaf buffer here too. */ - error = xfs_trans_roll_inode(&args.trans, dp); if (error) goto out; - + xfs_trans_bjoin(args.trans, leaf_bp); + leaf_bp = NULL; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) @@ -374,8 +383,9 @@ xfs_attr_set( out_defer_cancel: xfs_defer_cancel(&dfops); - args.trans = NULL; out: + if (leaf_bp) + xfs_trans_brelse(args.trans, leaf_bp); if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -707,7 +717,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) return error; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; return error; } @@ -760,7 +769,6 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) return 0; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; return error; } @@ -1035,7 +1043,6 @@ out: return retval; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; goto out; } @@ -1176,7 +1183,6 @@ out: return error; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; goto out; } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 5c16db86b38f..2135b8e67dcc 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -247,14 +247,15 @@ xfs_attr3_leaf_hdr_to_disk( } } -static bool +static xfs_failaddr_t xfs_attr3_leaf_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_attr_leafblock *leaf = bp->b_addr; - struct xfs_perag *pag = bp->b_pag; - struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_perag *pag = bp->b_pag; + struct xfs_attr_leaf_entry *entries; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); @@ -262,17 +263,17 @@ xfs_attr3_leaf_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) - return false; + return __this_address; if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) - return false; + return __this_address; } else { if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) - return false; + return __this_address; } /* * In recovery there is a transient state where count == 0 is valid @@ -280,12 +281,27 @@ xfs_attr3_leaf_verify( * if the attr didn't fit in shortform. */ if (pag && pag->pagf_init && ichdr.count == 0) - return false; + return __this_address; + + /* + * firstused is the block offset of the first name info structure. + * Make sure it doesn't go off the block or crash into the header. + */ + if (ichdr.firstused > mp->m_attr_geo->blksize) + return __this_address; + if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf)) + return __this_address; + + /* Make sure the entries array doesn't crash into the name info. */ + entries = xfs_attr3_leaf_entryp(bp->b_addr); + if ((char *)&entries[ichdr.count] > + (char *)bp->b_addr + ichdr.firstused) + return __this_address; /* XXX: need to range check rest of attr header values */ /* XXX: hash order check? */ - return true; + return NULL; } static void @@ -293,12 +309,13 @@ xfs_attr3_leaf_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_attr3_leaf_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_attr3_leaf_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -322,21 +339,23 @@ xfs_attr3_leaf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_attr3_leaf_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_attr3_leaf_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { .name = "xfs_attr3_leaf", .verify_read = xfs_attr3_leaf_read_verify, .verify_write = xfs_attr3_leaf_write_verify, + .verify_struct = xfs_attr3_leaf_verify, }; int @@ -397,13 +416,9 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) /* rounded down */ offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; - switch (dp->i_d.di_format) { - case XFS_DINODE_FMT_DEV: + if (dp->i_d.di_format == XFS_DINODE_FMT_DEV) { minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; return (offset >= minforkoff) ? minforkoff : 0; - case XFS_DINODE_FMT_UUID: - minforkoff = roundup(sizeof(uuid_t), 8) >> 3; - return (offset >= minforkoff) ? minforkoff : 0; } /* @@ -739,10 +754,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) } /* - * Convert from using the shortform to the leaf. + * Convert from using the shortform to the leaf. On success, return the + * buffer so that we can keep it locked until we're totally done with it. */ int -xfs_attr_shortform_to_leaf(xfs_da_args_t *args) +xfs_attr_shortform_to_leaf( + struct xfs_da_args *args, + struct xfs_buf **leaf_bp) { xfs_inode_t *dp; xfs_attr_shortform_t *sf; @@ -822,7 +840,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } error = 0; - + *leaf_bp = bp; out: kmem_free(tmpbuffer); return error; @@ -871,6 +889,80 @@ xfs_attr_shortform_allfit( return xfs_attr_shortform_bytesfit(dp, bytes); } +/* Verify the consistency of an inline attribute fork. */ +xfs_failaddr_t +xfs_attr_shortform_verify( + struct xfs_inode *ip) +{ + struct xfs_attr_shortform *sfp; + struct xfs_attr_sf_entry *sfep; + struct xfs_attr_sf_entry *next_sfep; + char *endp; + struct xfs_ifork *ifp; + int i; + int size; + + ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL); + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); + sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; + size = ifp->if_bytes; + + /* + * Give up if the attribute is way too short. + */ + if (size < sizeof(struct xfs_attr_sf_hdr)) + return __this_address; + + endp = (char *)sfp + size; + + /* Check all reported entries */ + sfep = &sfp->list[0]; + for (i = 0; i < sfp->hdr.count; i++) { + /* + * struct xfs_attr_sf_entry has a variable length. + * Check the fixed-offset parts of the structure are + * within the data buffer. + */ + if (((char *)sfep + sizeof(*sfep)) >= endp) + return __this_address; + + /* Don't allow names with known bad length. */ + if (sfep->namelen == 0) + return __this_address; + + /* + * Check that the variable-length part of the structure is + * within the data buffer. The next entry starts after the + * name component, so nextentry is an acceptable test. + */ + next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep); + if ((char *)next_sfep > endp) + return __this_address; + + /* + * Check for unknown flags. Short form doesn't support + * the incomplete or local bits, so we can use the namespace + * mask here. + */ + if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK) + return __this_address; + + /* + * Check for invalid namespace combinations. We only allow + * one namespace flag per xattr, so we can just count the + * bits (i.e. hweight) here. + */ + if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) + return __this_address; + + sfep = next_sfep; + } + if ((void *)sfep != (void *)endp) + return __this_address; + + return NULL; +} + /* * Convert a leaf attribute list to shortform attribute list */ @@ -2174,7 +2266,8 @@ xfs_attr3_leaf_lookup_int( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); - ASSERT(ichdr.count < args->geo->blksize / 8); + if (ichdr.count >= args->geo->blksize / 8) + return -EFSCORRUPTED; /* * Binary search. (note: small blocks will skip this loop) @@ -2190,8 +2283,10 @@ xfs_attr3_leaf_lookup_int( else break; } - ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); - ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); + if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) + return -EFSCORRUPTED; + if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) + return -EFSCORRUPTED; /* * Since we may have duplicate hashval's, find the first matching diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index f7dda0c237b0..4da08af5b134 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -48,10 +48,12 @@ void xfs_attr_shortform_create(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); -int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); +int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, + struct xfs_buf **leaf_bp); int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); +xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip); void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d56caf037ca0..21be186067a2 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -65,7 +65,7 @@ xfs_attr3_rmt_blocks( * does CRC, location and bounds checking, the unpacking function checks the * attribute parameters and owner. */ -static bool +static xfs_failaddr_t xfs_attr3_rmt_hdr_ok( void *ptr, xfs_ino_t ino, @@ -76,19 +76,19 @@ xfs_attr3_rmt_hdr_ok( struct xfs_attr3_rmt_hdr *rmt = ptr; if (bno != be64_to_cpu(rmt->rm_blkno)) - return false; + return __this_address; if (offset != be32_to_cpu(rmt->rm_offset)) - return false; + return __this_address; if (size != be32_to_cpu(rmt->rm_bytes)) - return false; + return __this_address; if (ino != be64_to_cpu(rmt->rm_owner)) - return false; + return __this_address; /* ok */ - return true; + return NULL; } -static bool +static xfs_failaddr_t xfs_attr3_rmt_verify( struct xfs_mount *mp, void *ptr, @@ -98,27 +98,29 @@ xfs_attr3_rmt_verify( struct xfs_attr3_rmt_hdr *rmt = ptr; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(rmt->rm_blkno) != bno) - return false; + return __this_address; if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) - return false; + return __this_address; if (be32_to_cpu(rmt->rm_offset) + be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) - return false; + return __this_address; if (rmt->rm_owner == 0) - return false; + return __this_address; - return true; + return NULL; } -static void -xfs_attr3_rmt_read_verify( - struct xfs_buf *bp) +static int +__xfs_attr3_rmt_read_verify( + struct xfs_buf *bp, + bool check_crc, + xfs_failaddr_t *failaddr) { struct xfs_mount *mp = bp->b_target->bt_mount; char *ptr; @@ -128,7 +130,7 @@ xfs_attr3_rmt_read_verify( /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; + return 0; ptr = bp->b_addr; bno = bp->b_bn; @@ -136,23 +138,48 @@ xfs_attr3_rmt_read_verify( ASSERT(len >= blksize); while (len > 0) { - if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { - xfs_buf_ioerror(bp, -EFSBADCRC); - break; - } - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - break; + if (check_crc && + !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { + *failaddr = __this_address; + return -EFSBADCRC; } + *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); + if (*failaddr) + return -EFSCORRUPTED; len -= blksize; ptr += blksize; bno += BTOBB(blksize); } - if (bp->b_error) - xfs_verifier_error(bp); - else - ASSERT(len == 0); + if (len != 0) { + *failaddr = __this_address; + return -EFSCORRUPTED; + } + + return 0; +} + +static void +xfs_attr3_rmt_read_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + int error; + + error = __xfs_attr3_rmt_read_verify(bp, true, &fa); + if (error) + xfs_verifier_error(bp, error, fa); +} + +static xfs_failaddr_t +xfs_attr3_rmt_verify_struct( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + int error; + + error = __xfs_attr3_rmt_read_verify(bp, false, &fa); + return error ? fa : NULL; } static void @@ -160,6 +187,7 @@ xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; int blksize = mp->m_attr_geo->blksize; char *ptr; int len; @@ -177,9 +205,9 @@ xfs_attr3_rmt_write_verify( while (len > 0) { struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -188,8 +216,7 @@ xfs_attr3_rmt_write_verify( * xfs_attr3_rmt_hdr_set() for the explanation. */ if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -198,13 +225,16 @@ xfs_attr3_rmt_write_verify( ptr += blksize; bno += BTOBB(blksize); } - ASSERT(len == 0); + + if (len != 0) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { .name = "xfs_attr3_rmt", .verify_read = xfs_attr3_rmt_read_verify, .verify_write = xfs_attr3_rmt_write_verify, + .verify_struct = xfs_attr3_rmt_verify_struct, }; STATIC int @@ -269,7 +299,7 @@ xfs_attr_rmtval_copyout( byte_cnt = min(*valuelen, byte_cnt); if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, + if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, byte_cnt, bno)) { xfs_alert(mp, "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 044a363119be..3b03d886df66 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -38,6 +38,7 @@ #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" #include "xfs_rtalloc.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" @@ -112,28 +113,21 @@ xfs_bmap_compute_maxlevels( STATIC int /* error */ xfs_bmbt_lookup_eq( struct xfs_btree_cur *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, + struct xfs_bmbt_irec *irec, int *stat) /* success/failure */ { - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; + cur->bc_rec.b = *irec; return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); } STATIC int /* error */ -xfs_bmbt_lookup_ge( +xfs_bmbt_lookup_first( struct xfs_btree_cur *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, int *stat) /* success/failure */ { - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; + cur->bc_rec.b.br_startoff = 0; + cur->bc_rec.b.br_startblock = 0; + cur->bc_rec.b.br_blockcount = 0; return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); } @@ -160,21 +154,17 @@ static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) } /* - * Update the record referred to by cur to the value given - * by [off, bno, len, state]. + * Update the record referred to by cur to the value given by irec * This either works (return 0) or gets an EFSCORRUPTED error. */ STATIC int xfs_bmbt_update( struct xfs_btree_cur *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - xfs_exntst_t state) + struct xfs_bmbt_irec *irec) { union xfs_btree_rec rec; - xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state); + xfs_bmbt_disk_set_all(&rec.bmbt, irec); return xfs_btree_update(cur, &rec); } @@ -242,7 +232,6 @@ xfs_bmap_forkoff_reset( { if (whichfork == XFS_ATTR_FORK && ip->i_d.di_format != XFS_DINODE_FMT_DEV && - ip->i_d.di_format != XFS_DINODE_FMT_UUID && ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; @@ -411,7 +400,7 @@ xfs_bmap_check_leaf_extents( pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(mp, - XFS_FSB_SANITY_CHECK(mp, bno), error0); + xfs_verify_fsbno(mp, bno), error0); if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); @@ -499,31 +488,6 @@ error_norelse: } /* - * Add bmap trace insert entries for all the contents of the extent records. - */ -void -xfs_bmap_trace_exlist( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t cnt, /* count of entries in the list */ - int whichfork, /* data or attr or cow fork */ - unsigned long caller_ip) -{ - xfs_extnum_t idx; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - int state = 0; - - if (whichfork == XFS_ATTR_FORK) - state |= BMAP_ATTRFORK; - else if (whichfork == XFS_COW_FORK) - state |= BMAP_COWFORK; - - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(cnt == xfs_iext_count(ifp)); - for (idx = 0; idx < cnt; idx++) - trace_xfs_extlist(ip, idx, state, caller_ip); -} - -/* * Validate that the bmbt_irecs being returned from bmapi are valid * given the caller's original parameters. Specifically check the * ranges of the returned irecs to ensure that they only extend beyond @@ -657,8 +621,8 @@ xfs_bmap_btree_to_extents( cbno = be64_to_cpu(*pp); *logflagsp = 0; #ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, cbno, 1))) - return error; + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, + xfs_btree_check_lptr(cur, cbno, 1)); #endif error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); @@ -703,14 +667,14 @@ xfs_bmap_extents_to_btree( xfs_bmbt_rec_t *arp; /* child record pointer */ struct xfs_btree_block *block; /* btree root block */ xfs_btree_cur_t *cur; /* bmap btree cursor */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ int error; /* error return value */ - xfs_extnum_t i, cnt; /* extent record index */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_key_t *kp; /* root block key pointer */ xfs_mount_t *mp; /* mount structure */ - xfs_extnum_t nextents; /* number of file extents */ xfs_bmbt_ptr_t *pp; /* root block address pointer */ + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec rec; + xfs_extnum_t cnt = 0; mp = ip->i_mount; ASSERT(whichfork != XFS_COW_FORK); @@ -789,15 +753,12 @@ xfs_bmap_extents_to_btree( XFS_BTNUM_BMAP, 0, 0, ip->i_ino, XFS_BTREE_LONG_PTRS); - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); - nextents = xfs_iext_count(ifp); - for (cnt = i = 0; i < nextents; i++) { - ep = xfs_iext_get_ext(ifp, i); - if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { - arp->l0 = cpu_to_be64(ep->l0); - arp->l1 = cpu_to_be64(ep->l1); - arp++; cnt++; - } + for_each_xfs_iext(ifp, &icur, &rec) { + if (isnullstartblock(rec.br_startblock)) + continue; + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt); + xfs_bmbt_disk_set_all(arp, &rec); + cnt++; } ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); xfs_btree_set_numrecs(ablock, cnt); @@ -845,6 +806,8 @@ xfs_bmap_local_to_extents_empty( xfs_bmap_forkoff_reset(ip, whichfork); ifp->if_flags &= ~XFS_IFINLINE; ifp->if_flags |= XFS_IFEXTENTS; + ifp->if_u1.if_root = NULL; + ifp->if_height = 0; XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); } @@ -868,6 +831,7 @@ xfs_bmap_local_to_extents( xfs_alloc_arg_t args; /* allocation arguments */ xfs_buf_t *bp; /* buffer for extent block */ struct xfs_bmbt_irec rec; + struct xfs_iext_cursor icur; /* * We don't want to deal with the case of keeping inode data inline yet. @@ -885,8 +849,7 @@ xfs_bmap_local_to_extents( flags = 0; error = 0; - ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == - XFS_IFINLINE); + ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE); memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = ip->i_mount; @@ -930,15 +893,16 @@ xfs_bmap_local_to_extents( xfs_bmap_local_to_extents_empty(ip, whichfork); flags |= XFS_ILOG_CORE; + ifp->if_u1.if_root = NULL; + ifp->if_height = 0; + rec.br_startoff = 0; rec.br_startblock = args.fsbno; rec.br_blockcount = 1; rec.br_state = XFS_EXT_NORM; - xfs_iext_insert(ip, 0, 1, &rec, 0); + xfs_iext_first(ifp, &icur); + xfs_iext_insert(ip, &icur, &rec, 0); - trace_xfs_bmap_post_update(ip, 0, - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, - _THIS_IP_); XFS_IFORK_NEXT_SET(ip, whichfork, 1); ip->i_d.di_nblocks = 1; xfs_trans_mod_dquot_byino(tp, ip, @@ -973,7 +937,8 @@ xfs_bmap_add_attrfork_btree( cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); cur->bc_private.b.dfops = dfops; cur->bc_private.b.firstblock = *firstblock; - if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) + error = xfs_bmbt_lookup_first(cur, &stat); + if (error) goto error0; /* must be at least one entry */ XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0); @@ -1124,9 +1089,6 @@ xfs_bmap_add_attrfork( case XFS_DINODE_FMT_DEV: ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; break; - case XFS_DINODE_FMT_UUID: - ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3; - break; case XFS_DINODE_FMT_LOCAL: case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE: @@ -1206,32 +1168,35 @@ trans_cancel: */ /* - * Read in the extents to if_extents. - * All inode fields are set up by caller, we just traverse the btree - * and copy the records in. If the file system cannot contain unwritten - * extents, the records are checked for no "state" flags. + * Read in extents from a btree-format inode. */ -int /* error */ -xfs_bmap_read_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - int whichfork) /* data or attr fork */ +int +xfs_iread_extents( + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork) { - struct xfs_btree_block *block; /* current btree block */ - xfs_fsblock_t bno; /* block # of "block" */ - xfs_buf_t *bp; /* buffer for "block" */ - int error; /* error return value */ - xfs_extnum_t i, j; /* index into the extents list */ - xfs_ifork_t *ifp; /* fork structure */ - int level; /* btree level, for checking */ - xfs_mount_t *mp; /* file system mount structure */ - __be64 *pp; /* pointer to block address */ - /* REFERENCED */ - xfs_extnum_t room; /* number of entries there's room for */ + struct xfs_mount *mp = ip->i_mount; + int state = xfs_bmap_fork_to_state(whichfork); + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork); + struct xfs_btree_block *block = ifp->if_broot; + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec new; + xfs_fsblock_t bno; + struct xfs_buf *bp; + xfs_extnum_t i, j; + int level; + __be64 *pp; + int error; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - block = ifp->if_broot; /* * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ @@ -1248,21 +1213,23 @@ xfs_bmap_read_extents( error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) - return error; + goto out; block = XFS_BUF_TO_BLOCK(bp); if (level == 0) break; pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(mp, - XFS_FSB_SANITY_CHECK(mp, bno), error0); + xfs_verify_fsbno(mp, bno), out_brelse); xfs_trans_brelse(tp, bp); } + /* * Here with bp and block set to the leftmost leaf node in the tree. */ - room = xfs_iext_count(ifp); i = 0; + xfs_iext_first(ifp, &icur); + /* * Loop over all leaf nodes. Copy information to the extent records. */ @@ -1272,14 +1239,16 @@ xfs_bmap_read_extents( xfs_extnum_t num_recs; num_recs = xfs_btree_get_numrecs(block); - if (unlikely(i + num_recs > room)) { - ASSERT(i + num_recs <= room); + if (unlikely(i + num_recs > nextents)) { + ASSERT(i + num_recs <= nextents); xfs_warn(ip->i_mount, "corrupt dinode %Lu, (btree extents).", (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", - XFS_ERRLEVEL_LOW, ip->i_mount, block); - goto error0; + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + __func__, block, sizeof(*block), + __this_address); + error = -EFSCORRUPTED; + goto out_brelse; } /* * Read-ahead the next leaf block, if any. @@ -1292,15 +1261,21 @@ xfs_bmap_read_extents( * Copy records into the extent records. */ frp = XFS_BMBT_REC_ADDR(mp, block, 1); - for (j = 0; j < num_recs; j++, i++, frp++) { - xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); - trp->l0 = be64_to_cpu(frp->l0); - trp->l1 = be64_to_cpu(frp->l1); - if (!xfs_bmbt_validate_extent(mp, whichfork, trp)) { - XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", - XFS_ERRLEVEL_LOW, mp); - goto error0; + for (j = 0; j < num_recs; j++, frp++, i++) { + xfs_failaddr_t fa; + + xfs_bmbt_disk_get_all(frp, &new); + fa = xfs_bmap_validate_extent(ip, whichfork, &new); + if (fa) { + error = -EFSCORRUPTED; + xfs_inode_verifier_error(ip, error, + "xfs_iread_extents(2)", + frp, sizeof(*frp), fa); + goto out_brelse; } + xfs_iext_insert(ip, &icur, &new, state); + trace_xfs_read_extent(ip, &icur, state, _THIS_IP_); + xfs_iext_next(ifp, &icur); } xfs_trans_brelse(tp, bp); bno = nextbno; @@ -1312,71 +1287,74 @@ xfs_bmap_read_extents( error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) - return error; + goto out; block = XFS_BUF_TO_BLOCK(bp); } - if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) - return -EFSCORRUPTED; + + if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) { + error = -EFSCORRUPTED; + goto out; + } ASSERT(i == xfs_iext_count(ifp)); - XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); + + ifp->if_flags |= XFS_IFEXTENTS; return 0; -error0: + +out_brelse: xfs_trans_brelse(tp, bp); - return -EFSCORRUPTED; +out: + xfs_iext_destroy(ifp); + return error; } /* - * Returns the file-relative block number of the first unused block(s) - * in the file with at least "len" logically contiguous blocks free. - * This is the lowest-address hole if the file has holes, else the first block - * past the end of file. - * Return 0 if the file is currently local (in-inode). + * Returns the relative block number of the first unused block(s) in the given + * fork with at least "len" logically contiguous blocks free. This is the + * lowest-address hole if the fork has holes, else the first block past the end + * of fork. Return 0 if the fork is currently local (in-inode). */ int /* error */ xfs_bmap_first_unused( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - xfs_extlen_t len, /* size of hole to find */ - xfs_fileoff_t *first_unused, /* unused block */ - int whichfork) /* data or attr fork */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_extlen_t len, /* size of hole to find */ + xfs_fileoff_t *first_unused, /* unused block */ + int whichfork) /* data or attr fork */ { - int error; /* error return value */ - int idx; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_fileoff_t lastaddr; /* last block number seen */ - xfs_fileoff_t lowest; /* lowest useful block */ - xfs_fileoff_t max; /* starting useful block */ - xfs_extnum_t nextents; /* number of extent entries */ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_bmbt_irec got; + struct xfs_iext_cursor icur; + xfs_fileoff_t lastaddr = 0; + xfs_fileoff_t lowest, max; + int error; ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { *first_unused = 0; return 0; } - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - lowest = *first_unused; - nextents = xfs_iext_count(ifp); - for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { - struct xfs_bmbt_irec got; - xfs_iext_get_extent(ifp, idx, &got); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; + } + lowest = max = *first_unused; + for_each_xfs_iext(ifp, &icur, &got) { /* * See if the hole before this extent will work. */ if (got.br_startoff >= lowest + len && - got.br_startoff - max >= len) { - *first_unused = max; - return 0; - } + got.br_startoff - max >= len) + break; lastaddr = got.br_startoff + got.br_blockcount; max = XFS_FILEOFF_MAX(lastaddr, lowest); } + *first_unused = max; return 0; } @@ -1396,7 +1374,7 @@ xfs_bmap_last_before( { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_bmbt_irec got; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; int error; switch (XFS_IFORK_FORMAT(ip, whichfork)) { @@ -1416,17 +1394,8 @@ xfs_bmap_last_before( return error; } - if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) { - if (got.br_startoff <= *last_block - 1) - return 0; - } - - if (xfs_iext_get_extent(ifp, idx - 1, &got)) { - *last_block = got.br_startoff + got.br_blockcount; - return 0; - } - - *last_block = 0; + if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got)) + *last_block = 0; return 0; } @@ -1439,8 +1408,8 @@ xfs_bmap_last_extent( int *is_empty) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_iext_cursor icur; int error; - int nextents; if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(tp, ip, whichfork); @@ -1448,14 +1417,11 @@ xfs_bmap_last_extent( return error; } - nextents = xfs_iext_count(ifp); - if (nextents == 0) { + xfs_iext_last(ifp, &icur); + if (!xfs_iext_get_extent(ifp, &icur, rec)) *is_empty = 1; - return 0; - } - - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec); - *is_empty = 0; + else + *is_empty = 0; return 0; } @@ -1477,14 +1443,14 @@ xfs_bmap_isaeof( int is_empty; int error; - bma->aeof = 0; + bma->aeof = false; error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, &is_empty); if (error) return error; if (is_empty) { - bma->aeof = 1; + bma->aeof = true; return 0; } @@ -1540,10 +1506,10 @@ xfs_bmap_one_block( xfs_inode_t *ip, /* incore inode */ int whichfork) /* data or attr fork */ { - xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */ xfs_ifork_t *ifp; /* inode fork pointer */ int rval; /* return value */ xfs_bmbt_irec_t s; /* internal version of extent */ + struct xfs_iext_cursor icur; #ifndef DEBUG if (whichfork == XFS_DATA_FORK) @@ -1555,8 +1521,8 @@ xfs_bmap_one_block( return 0; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(ifp->if_flags & XFS_IFEXTENTS); - ep = xfs_iext_get_ext(ifp, 0); - xfs_bmbt_get_all(ep, &s); + xfs_iext_first(ifp, &icur); + xfs_iext_get_extent(ifp, &icur, &s); rval = s.br_startoff == 0 && s.br_blockcount == 1; if (rval && whichfork == XFS_DATA_FORK) ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); @@ -1576,8 +1542,6 @@ xfs_bmap_add_extent_delay_real( int whichfork) { struct xfs_bmbt_irec *new = &bma->got; - int diff; /* temp value */ - xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ int error; /* error return value */ int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ @@ -1585,14 +1549,14 @@ xfs_bmap_add_extent_delay_real( xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ - int state = 0;/* state bits, accessed thru macros */ + int state = xfs_bmap_fork_to_state(whichfork); xfs_filblks_t da_new; /* new count del alloc blocks used */ xfs_filblks_t da_old; /* old count del alloc blocks used */ xfs_filblks_t temp=0; /* value for da_new calculations */ - xfs_filblks_t temp2=0;/* value for da_new calculations */ int tmp_rval; /* partial logging flags */ struct xfs_mount *mp; xfs_extnum_t *nextents; + struct xfs_bmbt_irec old; mp = bma->ip->i_mount; ifp = XFS_IFORK_PTR(bma->ip, whichfork); @@ -1600,8 +1564,6 @@ xfs_bmap_add_extent_delay_real( nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents : &bma->ip->i_d.di_nextents); - ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); @@ -1612,15 +1574,12 @@ xfs_bmap_add_extent_delay_real( #define RIGHT r[1] #define PREV r[2] - if (whichfork == XFS_COW_FORK) - state |= BMAP_COWFORK; - /* * Set up a bunch of variables to make the tests simpler. */ - ep = xfs_iext_get_ext(ifp, bma->idx); - xfs_bmbt_get_all(ep, &PREV); + xfs_iext_get_extent(ifp, &bma->icur, &PREV); new_endoff = new->br_startoff + new->br_blockcount; + ASSERT(isnullstartblock(PREV.br_startblock)); ASSERT(PREV.br_startoff <= new->br_startoff); ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); @@ -1640,10 +1599,8 @@ xfs_bmap_add_extent_delay_real( * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (bma->idx > 0) { + if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT); - if (isnullstartblock(LEFT.br_startblock)) state |= BMAP_LEFT_DELAY; } @@ -1660,10 +1617,8 @@ xfs_bmap_add_extent_delay_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (bma->idx < xfs_iext_count(ifp) - 1) { + if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); - if (isnullstartblock(RIGHT.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -1693,22 +1648,19 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left and right neighbors are both contiguous with new. */ - bma->idx--; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); + LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; + + xfs_iext_remove(bma->ip, &bma->icur, state); + xfs_iext_remove(bma->ip, &bma->icur, state); + xfs_iext_prev(ifp, &bma->icur); + xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); (*nextents)--; + if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i); + error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); @@ -1720,11 +1672,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + - PREV.br_blockcount + - RIGHT.br_blockcount, LEFT.br_state); + error = xfs_bmbt_update(bma->cur, &LEFT); if (error) goto done; } @@ -1735,28 +1683,22 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left neighbor is contiguous, the right is not. */ - bma->idx--; + old = LEFT; + LEFT.br_blockcount += PREV.br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), - LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + xfs_iext_remove(bma->ip, &bma->icur, state); + xfs_iext_prev(ifp, &bma->icur); + xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); if (bma->cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, LEFT.br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + - PREV.br_blockcount, LEFT.br_state); + error = xfs_bmbt_update(bma->cur, &LEFT); if (error) goto done; } @@ -1767,27 +1709,23 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The right neighbor is contiguous, the left is not. */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, new->br_startblock); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + PREV.br_startblock = new->br_startblock; + PREV.br_blockcount += RIGHT.br_blockcount; + + xfs_iext_next(ifp, &bma->icur); + xfs_iext_remove(bma->ip, &bma->icur, state); + xfs_iext_prev(ifp, &bma->icur); + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); if (bma->cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i); + error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, PREV.br_startoff, - new->br_startblock, - PREV.br_blockcount + - RIGHT.br_blockcount, PREV.br_state); + error = xfs_bmbt_update(bma->cur, &PREV); if (error) goto done; } @@ -1799,23 +1737,19 @@ xfs_bmap_add_extent_delay_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, new->br_startblock); - xfs_bmbt_set_state(ep, new->br_state); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + PREV.br_startblock = new->br_startblock; + PREV.br_state = new->br_state; + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); (*nextents)++; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(bma->cur, new, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; error = xfs_btree_insert(bma->cur, &i); if (error) goto done; @@ -1828,40 +1762,33 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is contiguous. */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1), - LEFT.br_blockcount + new->br_blockcount); - xfs_bmbt_set_startoff(ep, - PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_); - + old = LEFT; temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), + startblockval(PREV.br_startblock)); + + LEFT.br_blockcount += new->br_blockcount; + + PREV.br_blockcount = temp; + PREV.br_startoff += new->br_blockcount; + PREV.br_startblock = nullstartblock(da_new); + + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); + xfs_iext_prev(ifp, &bma->icur); + xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); + if (bma->cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, LEFT.br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + - new->br_blockcount, - LEFT.br_state); + error = xfs_bmbt_update(bma->cur, &LEFT); if (error) goto done; } - da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), - startblockval(PREV.br_startblock)); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - - bma->idx--; break; case BMAP_LEFT_FILLING: @@ -1869,23 +1796,16 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startoff(ep, new_endoff); - temp = PREV.br_blockcount - new->br_blockcount; - xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(bma->ip, bma->idx, 1, new, state); + xfs_iext_update_extent(bma->ip, state, &bma->icur, new); (*nextents)++; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(bma->cur, new, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; error = xfs_btree_insert(bma->cur, &i); if (error) goto done; @@ -1900,12 +1820,18 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + + temp = PREV.br_blockcount - new->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, bma->idx + 1); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); + + PREV.br_startoff = new_endoff; + PREV.br_blockcount = temp; + PREV.br_startblock = nullstartblock(da_new); + xfs_iext_next(ifp, &bma->icur); + xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); + xfs_iext_prev(ifp, &bma->icur); break; case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: @@ -1913,40 +1839,34 @@ xfs_bmap_add_extent_delay_real( * Filling in the last part of a previous delayed allocation. * The right neighbor is contiguous with the new allocation. */ - temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), - new->br_startoff, new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, - RIGHT.br_state); - trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); + old = RIGHT; + RIGHT.br_startoff = new->br_startoff; + RIGHT.br_startblock = new->br_startblock; + RIGHT.br_blockcount += new->br_blockcount; + if (bma->cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i); + error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + - RIGHT.br_blockcount, - RIGHT.br_state); + error = xfs_bmbt_update(bma->cur, &RIGHT); if (error) goto done; } + temp = PREV.br_blockcount - new->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock)); - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - bma->idx++; + PREV.br_blockcount = temp; + PREV.br_startblock = nullstartblock(da_new); + + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); + xfs_iext_next(ifp, &bma->icur); + xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT); break; case BMAP_RIGHT_FILLING: @@ -1954,22 +1874,16 @@ xfs_bmap_add_extent_delay_real( * Filling in the last part of a previous delayed allocation. * The right neighbor is not contiguous. */ - temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); + xfs_iext_update_extent(bma->ip, state, &bma->icur, new); (*nextents)++; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(bma->cur, new, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; error = xfs_btree_insert(bma->cur, &i); if (error) goto done; @@ -1984,14 +1898,16 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } + + temp = PREV.br_blockcount - new->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, bma->idx); - xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - bma->idx++; + PREV.br_startblock = nullstartblock(da_new); + PREV.br_blockcount = temp; + xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); + xfs_iext_next(ifp, &bma->icur); break; case 0: @@ -2015,30 +1931,40 @@ xfs_bmap_add_extent_delay_real( * PREV @ idx LEFT RIGHT * inserted at idx + 1 */ - temp = new->br_startoff - PREV.br_startoff; - temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ + old = PREV; + + /* LEFT is the new middle */ LEFT = *new; + + /* RIGHT is the new right */ RIGHT.br_state = PREV.br_state; - RIGHT.br_startblock = nullstartblock( - (int)xfs_bmap_worst_indlen(bma->ip, temp2)); RIGHT.br_startoff = new_endoff; - RIGHT.br_blockcount = temp2; - /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ - xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); + RIGHT.br_blockcount = + PREV.br_startoff + PREV.br_blockcount - new_endoff; + RIGHT.br_startblock = + nullstartblock(xfs_bmap_worst_indlen(bma->ip, + RIGHT.br_blockcount)); + + /* truncate PREV */ + PREV.br_blockcount = new->br_startoff - PREV.br_startoff; + PREV.br_startblock = + nullstartblock(xfs_bmap_worst_indlen(bma->ip, + PREV.br_blockcount)); + xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); + + xfs_iext_next(ifp, &bma->icur); + xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); + xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); (*nextents)++; + if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(bma->cur, new, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; error = xfs_btree_insert(bma->cur, &i); if (error) goto done; @@ -2053,30 +1979,9 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; } - temp = xfs_bmap_worst_indlen(bma->ip, temp); - temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); - diff = (int)(temp + temp2 - - (startblockval(PREV.br_startblock) - - (bma->cur ? - bma->cur->bc_private.b.allocated : 0))); - if (diff > 0) { - error = xfs_mod_fdblocks(bma->ip->i_mount, - -((int64_t)diff), false); - ASSERT(!error); - if (error) - goto done; - } - - ep = xfs_iext_get_ext(ifp, bma->idx); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2), - nullstartblock((int)temp2)); - trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_); - bma->idx++; - da_new = temp + temp2; + da_new = startblockval(PREV.br_startblock) + + startblockval(RIGHT.br_startblock); break; case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: @@ -2110,19 +2015,17 @@ xfs_bmap_add_extent_delay_real( goto done; } - /* adjust for changes in reserved delayed indirect blocks */ - if (da_old || da_new) { - temp = da_new; - if (bma->cur) - temp += bma->cur->bc_private.b.allocated; - if (temp < da_old) - xfs_mod_fdblocks(bma->ip->i_mount, - (int64_t)(da_old - temp), false); + if (bma->cur) { + da_new += bma->cur->bc_private.b.allocated; + bma->cur->bc_private.b.allocated = 0; } - /* clear out the allocated field, done with it now in any case. */ - if (bma->cur) - bma->cur->bc_private.b.allocated = 0; + /* adjust for changes in reserved delayed indirect blocks */ + if (da_new != da_old) { + ASSERT(state == 0 || da_new < da_old); + error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), + false); + } xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); done: @@ -2142,7 +2045,7 @@ xfs_bmap_add_extent_unwritten_real( struct xfs_trans *tp, xfs_inode_t *ip, /* incore inode pointer */ int whichfork, - xfs_extnum_t *idx, /* extent number to update/insert */ + struct xfs_iext_cursor *icur, xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ xfs_fsblock_t *first, /* pointer to firstblock variable */ @@ -2150,28 +2053,22 @@ xfs_bmap_add_extent_unwritten_real( int *logflagsp) /* inode logging flags */ { xfs_btree_cur_t *cur; /* btree cursor */ - xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ int error; /* error return value */ int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_fileoff_t new_endoff; /* end offset of new entry */ - xfs_exntst_t newext; /* new extent state */ - xfs_exntst_t oldext; /* old extent state */ xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ - int state = 0;/* state bits, accessed thru macros */ + int state = xfs_bmap_fork_to_state(whichfork); struct xfs_mount *mp = ip->i_mount; + struct xfs_bmbt_irec old; *logflagsp = 0; cur = *curp; ifp = XFS_IFORK_PTR(ip, whichfork); - if (whichfork == XFS_COW_FORK) - state |= BMAP_COWFORK; - ASSERT(*idx >= 0); - ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); XFS_STATS_INC(mp, xs_add_exlist); @@ -2184,12 +2081,8 @@ xfs_bmap_add_extent_unwritten_real( * Set up a bunch of variables to make the tests simpler. */ error = 0; - ep = xfs_iext_get_ext(ifp, *idx); - xfs_bmbt_get_all(ep, &PREV); - newext = new->br_state; - oldext = (newext == XFS_EXT_UNWRITTEN) ? - XFS_EXT_NORM : XFS_EXT_UNWRITTEN; - ASSERT(PREV.br_state == oldext); + xfs_iext_get_extent(ifp, icur, &PREV); + ASSERT(new->br_state != PREV.br_state); new_endoff = new->br_startoff + new->br_blockcount; ASSERT(PREV.br_startoff <= new->br_startoff); ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); @@ -2207,10 +2100,8 @@ xfs_bmap_add_extent_unwritten_real( * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (*idx > 0) { + if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); - if (isnullstartblock(LEFT.br_startblock)) state |= BMAP_LEFT_DELAY; } @@ -2218,7 +2109,7 @@ xfs_bmap_add_extent_unwritten_real( if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && - LEFT.br_state == newext && + LEFT.br_state == new->br_state && LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) state |= BMAP_LEFT_CONTIG; @@ -2227,9 +2118,8 @@ xfs_bmap_add_extent_unwritten_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (*idx < xfs_iext_count(ifp) - 1) { + if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -2237,7 +2127,7 @@ xfs_bmap_add_extent_unwritten_real( if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && new_endoff == RIGHT.br_startoff && new->br_startblock + new->br_blockcount == RIGHT.br_startblock && - newext == RIGHT.br_state && + new->br_state == RIGHT.br_state && new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) != @@ -2258,24 +2148,20 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left and right neighbors are both contiguous with new. */ - --*idx; - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), - LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; - xfs_iext_remove(ip, *idx + 1, 2, state); + xfs_iext_remove(ip, icur, state); + xfs_iext_remove(ip, icur, state); + xfs_iext_prev(ifp, icur); + xfs_iext_update_extent(ip, state, icur, &LEFT); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 2); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i))) + error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_delete(cur, &i))) @@ -2290,10 +2176,8 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + PREV.br_blockcount + - RIGHT.br_blockcount, LEFT.br_state))) + error = xfs_bmbt_update(cur, &LEFT); + if (error) goto done; } break; @@ -2303,23 +2187,19 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ - --*idx; + LEFT.br_blockcount += PREV.br_blockcount; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), - LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - xfs_iext_remove(ip, *idx + 1, 1, state); + xfs_iext_remove(ip, icur, state); + xfs_iext_prev(ifp, icur); + xfs_iext_update_extent(ip, state, icur, &LEFT); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) + error = xfs_bmbt_lookup_eq(cur, &PREV, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_delete(cur, &i))) @@ -2328,10 +2208,8 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + PREV.br_blockcount, - LEFT.br_state))) + error = xfs_bmbt_update(cur, &LEFT); + if (error) goto done; } break; @@ -2341,21 +2219,22 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The right neighbor is contiguous, the left is not. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount + RIGHT.br_blockcount); - xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, *idx + 1, 1, state); + PREV.br_blockcount += RIGHT.br_blockcount; + PREV.br_state = new->br_state; + + xfs_iext_next(ifp, icur); + xfs_iext_remove(ip, icur, state); + xfs_iext_prev(ifp, icur); + xfs_iext_update_extent(ip, state, icur, &PREV); + XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, - RIGHT.br_startblock, - RIGHT.br_blockcount, &i))) + error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); if ((error = xfs_btree_delete(cur, &i))) @@ -2364,10 +2243,8 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - if ((error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, - newext))) + error = xfs_bmbt_update(cur, &PREV); + if (error) goto done; } break; @@ -2378,22 +2255,19 @@ xfs_bmap_add_extent_unwritten_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + PREV.br_state = new->br_state; + xfs_iext_update_extent(ip, state, icur, &PREV); if (cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i))) + error = xfs_bmbt_lookup_eq(cur, new, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - if ((error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - newext))) + error = xfs_bmbt_update(cur, &PREV); + if (error) goto done; } break; @@ -2403,43 +2277,32 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is contiguous. */ - trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), - LEFT.br_blockcount + new->br_blockcount); - xfs_bmbt_set_startoff(ep, - PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); - - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_startblock(ep, - new->br_startblock + new->br_blockcount); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - --*idx; + LEFT.br_blockcount += new->br_blockcount; + + old = PREV; + PREV.br_startoff += new->br_blockcount; + PREV.br_startblock += new->br_blockcount; + PREV.br_blockcount -= new->br_blockcount; + + xfs_iext_update_extent(ip, state, icur, &PREV); + xfs_iext_prev(ifp, icur); + xfs_iext_update_extent(ip, state, icur, &LEFT); if (cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) + error = xfs_bmbt_lookup_eq(cur, &old, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - if ((error = xfs_bmbt_update(cur, - PREV.br_startoff + new->br_blockcount, - PREV.br_startblock + new->br_blockcount, - PREV.br_blockcount - new->br_blockcount, - oldext))) + error = xfs_bmbt_update(cur, &PREV); + if (error) goto done; - if ((error = xfs_btree_decrement(cur, 0, &i))) + error = xfs_btree_decrement(cur, 0, &i); + if (error) goto done; - error = xfs_bmbt_update(cur, LEFT.br_startoff, - LEFT.br_startblock, - LEFT.br_blockcount + new->br_blockcount, - LEFT.br_state); + error = xfs_bmbt_update(cur, &LEFT); if (error) goto done; } @@ -2450,32 +2313,25 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); - xfs_bmbt_set_startoff(ep, new_endoff); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - xfs_bmbt_set_startblock(ep, - new->br_startblock + new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - - xfs_iext_insert(ip, *idx, 1, new, state); + old = PREV; + PREV.br_startoff += new->br_blockcount; + PREV.br_startblock += new->br_blockcount; + PREV.br_blockcount -= new->br_blockcount; + + xfs_iext_update_extent(ip, state, icur, &PREV); + xfs_iext_insert(ip, icur, new, state); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) + error = xfs_bmbt_lookup_eq(cur, &old, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - if ((error = xfs_bmbt_update(cur, - PREV.br_startoff + new->br_blockcount, - PREV.br_startblock + new->br_blockcount, - PREV.br_blockcount - new->br_blockcount, - oldext))) + error = xfs_bmbt_update(cur, &PREV); + if (error) goto done; cur->bc_rec.b = *new; if ((error = xfs_btree_insert(cur, &i))) @@ -2489,39 +2345,33 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is contiguous with the new allocation. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + old = PREV; + PREV.br_blockcount -= new->br_blockcount; - ++*idx; + RIGHT.br_startoff = new->br_startoff; + RIGHT.br_startblock = new->br_startblock; + RIGHT.br_blockcount += new->br_blockcount; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), - new->br_startoff, new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, newext); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_update_extent(ip, state, icur, &PREV); + xfs_iext_next(ifp, icur); + xfs_iext_update_extent(ip, state, icur, &RIGHT); if (cur == NULL) rval = XFS_ILOG_DEXT; else { rval = 0; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, - PREV.br_blockcount, &i))) + error = xfs_bmbt_lookup_eq(cur, &old, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - if ((error = xfs_bmbt_update(cur, PREV.br_startoff, - PREV.br_startblock, - PREV.br_blockcount - new->br_blockcount, - oldext))) + error = xfs_bmbt_update(cur, &PREV); + if (error) goto done; - if ((error = xfs_btree_increment(cur, 0, &i))) + error = xfs_btree_increment(cur, 0, &i); + if (error) goto done; - if ((error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + RIGHT.br_blockcount, - newext))) + error = xfs_bmbt_update(cur, &RIGHT); + if (error) goto done; } break; @@ -2531,13 +2381,12 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + old = PREV; + PREV.br_blockcount -= new->br_blockcount; - ++*idx; - xfs_iext_insert(ip, *idx, 1, new, state); + xfs_iext_update_extent(ip, state, icur, &PREV); + xfs_iext_next(ifp, icur); + xfs_iext_insert(ip, icur, new, state); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); @@ -2545,22 +2394,17 @@ xfs_bmap_add_extent_unwritten_real( rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) + error = xfs_bmbt_lookup_eq(cur, &old, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - if ((error = xfs_bmbt_update(cur, PREV.br_startoff, - PREV.br_startblock, - PREV.br_blockcount - new->br_blockcount, - oldext))) + error = xfs_bmbt_update(cur, &PREV); + if (error) goto done; - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i))) + error = xfs_bmbt_lookup_eq(cur, new, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - cur->bc_rec.b.br_state = XFS_EXT_NORM; if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); @@ -2573,20 +2417,20 @@ xfs_bmap_add_extent_unwritten_real( * newext. Contiguity is impossible here. * One extent becomes three extents. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, - new->br_startoff - PREV.br_startoff); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + old = PREV; + PREV.br_blockcount = new->br_startoff - PREV.br_startoff; r[0] = *new; r[1].br_startoff = new_endoff; r[1].br_blockcount = - PREV.br_startoff + PREV.br_blockcount - new_endoff; + old.br_startoff + old.br_blockcount - new_endoff; r[1].br_startblock = new->br_startblock + new->br_blockcount; - r[1].br_state = oldext; + r[1].br_state = PREV.br_state; - ++*idx; - xfs_iext_insert(ip, *idx, 2, &r[0], state); + xfs_iext_update_extent(ip, state, icur, &PREV); + xfs_iext_next(ifp, icur); + xfs_iext_insert(ip, icur, &r[1], state); + xfs_iext_insert(ip, icur, &r[0], state); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 2); @@ -2594,20 +2438,16 @@ xfs_bmap_add_extent_unwritten_real( rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { rval = XFS_ILOG_CORE; - if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, - PREV.br_startblock, PREV.br_blockcount, - &i))) + error = xfs_bmbt_lookup_eq(cur, &old, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); /* new right extent - oldext */ - if ((error = xfs_bmbt_update(cur, r[1].br_startoff, - r[1].br_startblock, r[1].br_blockcount, - r[1].br_state))) + error = xfs_bmbt_update(cur, &r[1]); + if (error) goto done; /* new left extent - oldext */ cur->bc_rec.b = PREV; - cur->bc_rec.b.br_blockcount = - new->br_startoff - PREV.br_startoff; if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); @@ -2616,13 +2456,11 @@ xfs_bmap_add_extent_unwritten_real( * we are about to insert as we can't trust it after * the previous insert. */ - if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, - new->br_startblock, new->br_blockcount, - &i))) + error = xfs_bmbt_lookup_eq(cur, new, &i); + if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); /* new middle extent - newext */ - cur->bc_rec.b.br_state = new->br_state; if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); @@ -2681,7 +2519,7 @@ STATIC void xfs_bmap_add_extent_hole_delay( xfs_inode_t *ip, /* incore inode pointer */ int whichfork, - xfs_extnum_t *idx, /* extent number to update/insert */ + struct xfs_iext_cursor *icur, xfs_bmbt_irec_t *new) /* new data to add to file extents */ { xfs_ifork_t *ifp; /* inode fork pointer */ @@ -2689,22 +2527,17 @@ xfs_bmap_add_extent_hole_delay( xfs_filblks_t newlen=0; /* new indirect size */ xfs_filblks_t oldlen=0; /* old indirect size */ xfs_bmbt_irec_t right; /* right neighbor extent entry */ - int state; /* state bits, accessed thru macros */ - xfs_filblks_t temp=0; /* temp for indirect calculations */ + int state = xfs_bmap_fork_to_state(whichfork); + xfs_filblks_t temp; /* temp for indirect calculations */ ifp = XFS_IFORK_PTR(ip, whichfork); - state = 0; - if (whichfork == XFS_COW_FORK) - state |= BMAP_COWFORK; ASSERT(isnullstartblock(new->br_startblock)); /* * Check and set flags if this segment has a left neighbor */ - if (*idx > 0) { + if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); - if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; } @@ -2713,10 +2546,8 @@ xfs_bmap_add_extent_hole_delay( * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (*idx < xfs_iext_count(ifp)) { + if (xfs_iext_get_extent(ifp, icur, &right)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); - if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -2748,22 +2579,20 @@ xfs_bmap_add_extent_hole_delay( * on the left and on the right. * Merge all three into a single extent record. */ - --*idx; temp = left.br_blockcount + new->br_blockcount + right.br_blockcount; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock) + startblockval(right.br_startblock); newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), oldlen); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), - nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + left.br_startblock = nullstartblock(newlen); + left.br_blockcount = temp; - xfs_iext_remove(ip, *idx + 1, 1, state); + xfs_iext_remove(ip, icur, state); + xfs_iext_prev(ifp, icur); + xfs_iext_update_extent(ip, state, icur, &left); break; case BMAP_LEFT_CONTIG: @@ -2772,18 +2601,17 @@ xfs_bmap_add_extent_hole_delay( * on the left. * Merge the new allocation with the left neighbor. */ - --*idx; temp = left.br_blockcount + new->br_blockcount; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock); newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), oldlen); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), - nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + left.br_blockcount = temp; + left.br_startblock = nullstartblock(newlen); + + xfs_iext_prev(ifp, icur); + xfs_iext_update_extent(ip, state, icur, &left); break; case BMAP_RIGHT_CONTIG: @@ -2792,16 +2620,15 @@ xfs_bmap_add_extent_hole_delay( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); temp = new->br_blockcount + right.br_blockcount; oldlen = startblockval(new->br_startblock) + startblockval(right.br_startblock); newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), oldlen); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), - new->br_startoff, - nullstartblock((int)newlen), temp, right.br_state); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + right.br_startoff = new->br_startoff; + right.br_startblock = nullstartblock(newlen); + right.br_blockcount = temp; + xfs_iext_update_extent(ip, state, icur, &right); break; case 0: @@ -2811,7 +2638,7 @@ xfs_bmap_add_extent_hole_delay( * Insert a new entry. */ oldlen = newlen = 0; - xfs_iext_insert(ip, *idx, 1, new, state); + xfs_iext_insert(ip, icur, new, state); break; } if (oldlen != newlen) { @@ -2832,7 +2659,7 @@ xfs_bmap_add_extent_hole_real( struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, - xfs_extnum_t *idx, + struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_bmbt_irec *new, xfs_fsblock_t *first, @@ -2847,27 +2674,19 @@ xfs_bmap_add_extent_hole_real( xfs_bmbt_irec_t left; /* left neighbor extent entry */ xfs_bmbt_irec_t right; /* right neighbor extent entry */ int rval=0; /* return value (logging flags) */ - int state; /* state bits, accessed thru macros */ + int state = xfs_bmap_fork_to_state(whichfork); + struct xfs_bmbt_irec old; - ASSERT(*idx >= 0); - ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); XFS_STATS_INC(mp, xs_add_exlist); - state = 0; - if (whichfork == XFS_ATTR_FORK) - state |= BMAP_ATTRFORK; - if (whichfork == XFS_COW_FORK) - state |= BMAP_COWFORK; - /* * Check and set flags if this segment has a left neighbor. */ - if (*idx > 0) { + if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; } @@ -2876,9 +2695,8 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (*idx < xfs_iext_count(ifp)) { + if (xfs_iext_get_extent(ifp, icur, &right)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -2915,14 +2733,11 @@ xfs_bmap_add_extent_hole_real( * left and on the right. * Merge all three into a single extent record. */ - --*idx; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), - left.br_blockcount + new->br_blockcount + - right.br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + left.br_blockcount += new->br_blockcount + right.br_blockcount; - xfs_iext_remove(ip, *idx + 1, 1, state); + xfs_iext_remove(ip, icur, state); + xfs_iext_prev(ifp, icur); + xfs_iext_update_extent(ip, state, icur, &left); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); @@ -2930,9 +2745,7 @@ xfs_bmap_add_extent_hole_real( rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(cur, right.br_startoff, - right.br_startblock, right.br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(cur, &right, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); @@ -2944,12 +2757,7 @@ xfs_bmap_add_extent_hole_real( if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(cur, left.br_startoff, - left.br_startblock, - left.br_blockcount + - new->br_blockcount + - right.br_blockcount, - left.br_state); + error = xfs_bmbt_update(cur, &left); if (error) goto done; } @@ -2961,27 +2769,21 @@ xfs_bmap_add_extent_hole_real( * on the left. * Merge the new allocation with the left neighbor. */ - --*idx; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), - left.br_blockcount + new->br_blockcount); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + old = left; + left.br_blockcount += new->br_blockcount; + + xfs_iext_prev(ifp, icur); + xfs_iext_update_extent(ip, state, icur, &left); if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { rval = 0; - error = xfs_bmbt_lookup_eq(cur, left.br_startoff, - left.br_startblock, left.br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(cur, &old, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(cur, left.br_startoff, - left.br_startblock, - left.br_blockcount + - new->br_blockcount, - left.br_state); + error = xfs_bmbt_update(cur, &left); if (error) goto done; } @@ -2993,29 +2795,22 @@ xfs_bmap_add_extent_hole_real( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), - new->br_startoff, new->br_startblock, - new->br_blockcount + right.br_blockcount, - right.br_state); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + old = right; + + right.br_startoff = new->br_startoff; + right.br_startblock = new->br_startblock; + right.br_blockcount += new->br_blockcount; + xfs_iext_update_extent(ip, state, icur, &right); if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { rval = 0; - error = xfs_bmbt_lookup_eq(cur, - right.br_startoff, - right.br_startblock, - right.br_blockcount, &i); + error = xfs_bmbt_lookup_eq(cur, &old, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(cur, new->br_startoff, - new->br_startblock, - new->br_blockcount + - right.br_blockcount, - right.br_state); + error = xfs_bmbt_update(cur, &right); if (error) goto done; } @@ -3027,21 +2822,17 @@ xfs_bmap_add_extent_hole_real( * real allocation. * Insert a new entry. */ - xfs_iext_insert(ip, *idx, 1, new, state); + xfs_iext_insert(ip, icur, new, state); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(cur, - new->br_startoff, - new->br_startblock, - new->br_blockcount, &i); + error = xfs_bmbt_lookup_eq(cur, new, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - cur->bc_rec.b.br_state = new->br_state; error = xfs_btree_insert(cur, &i); if (error) goto done; @@ -3551,6 +3342,49 @@ xfs_bmap_btalloc_filestreams( return 0; } +/* Update all inode and quota accounting for the allocation we just did. */ +static void +xfs_bmap_btalloc_accounting( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args) +{ + if (ap->flags & XFS_BMAPI_COWFORK) { + /* + * COW fork blocks are in-core only and thus are treated as + * in-core quota reservation (like delalloc blocks) even when + * converted to real blocks. The quota reservation is not + * accounted to disk until blocks are remapped to the data + * fork. So if these blocks were previously delalloc, we + * already have quota reservation and there's nothing to do + * yet. + */ + if (ap->wasdel) + return; + + /* + * Otherwise, we've allocated blocks in a hole. The transaction + * has acquired in-core quota reservation for this extent. + * Rather than account these as real blocks, however, we reduce + * the transaction quota reservation based on the allocation. + * This essentially transfers the transaction quota reservation + * to that of a delalloc extent. + */ + ap->ip->i_delayed_blks += args->len; + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS, + -(long)args->len); + return; + } + + /* data/attr fork only */ + ap->ip->i_d.di_nblocks += args->len; + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); + if (ap->wasdel) + ap->ip->i_delayed_blks -= args->len; + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, + args->len); +} + STATIC int xfs_bmap_btalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ @@ -3561,6 +3395,8 @@ xfs_bmap_btalloc( xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ xfs_agnumber_t ag; xfs_alloc_arg_t args; + xfs_fileoff_t orig_offset; + xfs_extlen_t orig_length; xfs_extlen_t blen; xfs_extlen_t nextminlen = 0; int nullfb; /* true if ap->firstblock isn't set */ @@ -3570,6 +3406,8 @@ xfs_bmap_btalloc( int stripe_align; ASSERT(ap->length); + orig_offset = ap->offset; + orig_length = ap->length; mp = ap->ip->i_mount; @@ -3785,19 +3623,23 @@ xfs_bmap_btalloc( *ap->firstblock = args.fsbno; ASSERT(nullfb || fb_agno <= args.agno); ap->length = args.len; - if (!(ap->flags & XFS_BMAPI_COWFORK)) - ap->ip->i_d.di_nblocks += args.len; - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); - if (ap->wasdel) - ap->ip->i_delayed_blks -= args.len; /* - * Adjust the disk quota also. This was reserved - * earlier. + * If the extent size hint is active, we tried to round the + * caller's allocation request offset down to extsz and the + * length up to another extsz boundary. If we found a free + * extent we mapped it in starting at this new offset. If the + * newly mapped space isn't long enough to cover any of the + * range of offsets that was originally requested, move the + * mapping up so that we can fill as much of the caller's + * original request as possible. Free space is apparently + * very fragmented so we're unlikely to be able to satisfy the + * hints anyway. */ - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, - ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : - XFS_TRANS_DQ_BCOUNT, - (long) args.len); + if (ap->length <= orig_length) + ap->offset = orig_offset; + else if (ap->offset + ap->length < orig_offset + orig_length) + ap->offset = orig_offset + orig_length - ap->length; + xfs_bmap_btalloc_accounting(ap, &args); } else { ap->blkno = NULLFSBLOCK; ap->length = 0; @@ -3852,6 +3694,17 @@ xfs_trim_extent( } } +/* trim extent to within eof */ +void +xfs_trim_extent_eof( + struct xfs_bmbt_irec *irec, + struct xfs_inode *ip) + +{ + xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount, + i_size_read(VFS_I(ip)))); +} + /* * Trim the returned map to the required bounds */ @@ -3970,7 +3823,7 @@ xfs_bmapi_read( struct xfs_bmbt_irec got; xfs_fileoff_t obno; xfs_fileoff_t end; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; int error; bool eof = false; int n = 0; @@ -4012,7 +3865,7 @@ xfs_bmapi_read( return error; } - if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) + if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) eof = true; end = bno + len; obno = bno; @@ -4044,7 +3897,7 @@ xfs_bmapi_read( break; /* Else go on to the next record. */ - if (!xfs_iext_get_extent(ifp, ++idx, &got)) + if (!xfs_iext_next_extent(ifp, &icur, &got)) eof = true; } *nmap = n; @@ -4072,15 +3925,13 @@ xfs_bmapi_reserve_delalloc( xfs_filblks_t len, xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, - xfs_extnum_t *lastx, + struct xfs_iext_cursor *icur, int eof) { struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); xfs_extlen_t alen; xfs_extlen_t indlen; - char rt = XFS_IS_REALTIME_INODE(ip); - xfs_extlen_t extsz; int error; xfs_fileoff_t aoff = off; @@ -4095,31 +3946,25 @@ xfs_bmapi_reserve_delalloc( prealloc = alen - len; /* Figure out the extent size, adjust alen */ - if (whichfork == XFS_COW_FORK) - extsz = xfs_get_cowextsz_hint(ip); - else - extsz = xfs_get_extsz_hint(ip); - if (extsz) { + if (whichfork == XFS_COW_FORK) { struct xfs_bmbt_irec prev; + xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); - if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev)) + if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) prev.br_startoff = NULLFILEOFF; - error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 1, 0, &aoff, &alen); ASSERT(!error); } - if (rt) - extsz = alen / mp->m_sb.sb_rextsize; - /* * Make a transaction-less quota reservation for delayed allocation * blocks. This number gets adjusted later. We return if we haven't * allocated blocks already inside this loop. */ error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, - rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + XFS_QMOPT_RES_REGBLKS); if (error) return error; @@ -4130,12 +3975,7 @@ xfs_bmapi_reserve_delalloc( indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); ASSERT(indlen > 0); - if (rt) { - error = xfs_mod_frextents(mp, -((int64_t)extsz)); - } else { - error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); - } - + error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); if (error) goto out_unreserve_quota; @@ -4151,7 +3991,7 @@ xfs_bmapi_reserve_delalloc( got->br_blockcount = alen; got->br_state = XFS_EXT_NORM; - xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); + xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got); /* * Tag the inode if blocks were preallocated. Note that COW fork @@ -4166,14 +4006,11 @@ xfs_bmapi_reserve_delalloc( return 0; out_unreserve_blocks: - if (rt) - xfs_mod_frextents(mp, extsz); - else - xfs_mod_fdblocks(mp, alen, false); + xfs_mod_fdblocks(mp, alen, false); out_unreserve_quota: if (XFS_IS_QUOTA_ON(mp)) - xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, + XFS_QMOPT_RES_REGBLKS); return error; } @@ -4196,10 +4033,7 @@ xfs_bmapi_allocate( if (bma->wasdel) { bma->length = (xfs_extlen_t)bma->got.br_blockcount; bma->offset = bma->got.br_startoff; - if (bma->idx) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), - &bma->prev); - } + xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev); } else { bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); if (!bma->eof) @@ -4284,7 +4118,7 @@ xfs_bmapi_allocate( error = xfs_bmap_add_extent_delay_real(bma, whichfork); else error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, - whichfork, &bma->idx, &bma->cur, &bma->got, + whichfork, &bma->icur, &bma->cur, &bma->got, bma->firstblock, bma->dfops, &bma->logflags); bma->logflags |= tmp_logflags; @@ -4296,7 +4130,7 @@ xfs_bmapi_allocate( * or xfs_bmap_add_extent_hole_real might have merged it into one of * the neighbouring ones. */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); + xfs_iext_get_extent(ifp, &bma->icur, &bma->got); ASSERT(bma->got.br_startoff <= bma->offset); ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= @@ -4354,8 +4188,8 @@ xfs_bmapi_convert_unwritten( } error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, - &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, - &tmp_logflags); + &bma->icur, &bma->cur, mval, bma->firstblock, + bma->dfops, &tmp_logflags); /* * Log the inode core unconditionally in the unwritten extent conversion * path because the conversion might not have done so (e.g., if the @@ -4377,7 +4211,7 @@ xfs_bmapi_convert_unwritten( * xfs_bmap_add_extent_unwritten_real might have merged it into one * of the neighbouring ones. */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); + xfs_iext_get_extent(ifp, &bma->icur, &bma->got); /* * We may have combined previously unwritten space with written space, @@ -4496,9 +4330,9 @@ xfs_bmapi_write( end = bno + len; obno = bno; - if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got)) + if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got)) eof = true; - if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev)) + if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) bma.prev.br_startoff = NULLFILEOFF; bma.tp = tp; bma.ip = ip; @@ -4510,8 +4344,16 @@ xfs_bmapi_write( while (bno < end && n < *nmap) { bool need_alloc = false, wasdelay = false; - /* in hole or beyoned EOF? */ + /* in hole or beyond EOF? */ if (eof || bma.got.br_startoff > bno) { + /* + * CoW fork conversions should /never/ hit EOF or + * holes. There should always be something for us + * to work on. + */ + ASSERT(!((flags & XFS_BMAPI_CONVERT) && + (flags & XFS_BMAPI_COWFORK))); + if (flags & XFS_BMAPI_DELALLOC) { /* * For the COW fork we can reasonably get a @@ -4540,7 +4382,8 @@ xfs_bmapi_write( * First, deal with the hole before the allocated space * that we found, if any. */ - if (need_alloc || wasdelay) { + if ((need_alloc || wasdelay) && + !(flags & XFS_BMAPI_CONVERT_ONLY)) { bma.eof = eof; bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; @@ -4603,7 +4446,7 @@ xfs_bmapi_write( /* Else go on to the next record. */ bma.prev = bma.got; - if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got)) + if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got)) eof = true; } *nmap = n; @@ -4676,7 +4519,7 @@ xfs_bmapi_remap( struct xfs_btree_cur *cur = NULL; xfs_fsblock_t firstblock = NULLFSBLOCK; struct xfs_bmbt_irec got; - xfs_extnum_t idx; + struct xfs_iext_cursor icur; int logflags = 0, error; ASSERT(len > 0); @@ -4700,7 +4543,7 @@ xfs_bmapi_remap( return error; } - if (xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) { + if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { /* make sure we only reflink into a hole. */ ASSERT(got.br_startoff > bno); ASSERT(got.br_startoff - bno >= len); @@ -4721,8 +4564,8 @@ xfs_bmapi_remap( got.br_blockcount = len; got.br_state = XFS_EXT_NORM; - error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &idx, &cur, - &got, &firstblock, dfops, &logflags); + error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &icur, + &cur, &got, &firstblock, dfops, &logflags); if (error) goto error0; @@ -4838,7 +4681,7 @@ int xfs_bmap_del_extent_delay( struct xfs_inode *ip, int whichfork, - xfs_extnum_t *idx, + struct xfs_iext_cursor *icur, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del) { @@ -4848,7 +4691,8 @@ xfs_bmap_del_extent_delay( int64_t da_old, da_new, da_diff = 0; xfs_fileoff_t del_endoff, got_endoff; xfs_filblks_t got_indlen, new_indlen, stolen; - int error = 0, state = 0; + int state = xfs_bmap_fork_to_state(whichfork); + int error = 0; bool isrt; XFS_STATS_INC(mp, xs_del_exlist); @@ -4859,8 +4703,6 @@ xfs_bmap_del_extent_delay( da_old = startblockval(got->br_startblock); da_new = 0; - ASSERT(*idx >= 0); - ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(del->br_blockcount > 0); ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); @@ -4884,46 +4726,39 @@ xfs_bmap_del_extent_delay( return error; ip->i_delayed_blks -= del->br_blockcount; - if (whichfork == XFS_COW_FORK) - state |= BMAP_COWFORK; - if (got->br_startoff == del->br_startoff) - state |= BMAP_LEFT_CONTIG; + state |= BMAP_LEFT_FILLING; if (got_endoff == del_endoff) - state |= BMAP_RIGHT_CONTIG; + state |= BMAP_RIGHT_FILLING; - switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: /* * Matches the whole extent. Delete the entry. */ - xfs_iext_remove(ip, *idx, 1, state); - --*idx; + xfs_iext_remove(ip, icur, state); + xfs_iext_prev(ifp, icur); break; - case BMAP_LEFT_CONTIG: + case BMAP_LEFT_FILLING: /* * Deleting the first part of the extent. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); got->br_startoff = del_endoff; got->br_blockcount -= del->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, got->br_blockcount), da_old); got->br_startblock = nullstartblock((int)da_new); - xfs_iext_update_extent(ifp, *idx, got); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_update_extent(ip, state, icur, got); break; - case BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING: /* * Deleting the last part of the extent. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); got->br_blockcount = got->br_blockcount - del->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, got->br_blockcount), da_old); got->br_startblock = nullstartblock((int)da_new); - xfs_iext_update_extent(ifp, *idx, got); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_update_extent(ip, state, icur, got); break; case 0: /* @@ -4935,8 +4770,6 @@ xfs_bmap_del_extent_delay( * Warn if either of the new indlen reservations is zero as this * can lead to delalloc problems. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - got->br_blockcount = del->br_startoff - got->br_startoff; got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); @@ -4948,15 +4781,14 @@ xfs_bmap_del_extent_delay( del->br_blockcount); got->br_startblock = nullstartblock((int)got_indlen); - xfs_iext_update_extent(ifp, *idx, got); - trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_); new.br_startoff = del_endoff; new.br_state = got->br_state; new.br_startblock = nullstartblock((int)new_indlen); - ++*idx; - xfs_iext_insert(ip, *idx, 1, &new, state); + xfs_iext_update_extent(ip, state, icur, got); + xfs_iext_next(ifp, icur); + xfs_iext_insert(ip, icur, &new, state); da_new = got_indlen + new_indlen - stolen; del->br_blockcount -= stolen; @@ -4975,7 +4807,7 @@ xfs_bmap_del_extent_delay( void xfs_bmap_del_extent_cow( struct xfs_inode *ip, - xfs_extnum_t *idx, + struct xfs_iext_cursor *icur, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del) { @@ -4990,75 +4822,68 @@ xfs_bmap_del_extent_cow( del_endoff = del->br_startoff + del->br_blockcount; got_endoff = got->br_startoff + got->br_blockcount; - ASSERT(*idx >= 0); - ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(del->br_blockcount > 0); ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); ASSERT(!isnullstartblock(got->br_startblock)); if (got->br_startoff == del->br_startoff) - state |= BMAP_LEFT_CONTIG; + state |= BMAP_LEFT_FILLING; if (got_endoff == del_endoff) - state |= BMAP_RIGHT_CONTIG; + state |= BMAP_RIGHT_FILLING; - switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { - case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: /* * Matches the whole extent. Delete the entry. */ - xfs_iext_remove(ip, *idx, 1, state); - --*idx; + xfs_iext_remove(ip, icur, state); + xfs_iext_prev(ifp, icur); break; - case BMAP_LEFT_CONTIG: + case BMAP_LEFT_FILLING: /* * Deleting the first part of the extent. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); got->br_startoff = del_endoff; got->br_blockcount -= del->br_blockcount; got->br_startblock = del->br_startblock + del->br_blockcount; - xfs_iext_update_extent(ifp, *idx, got); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_update_extent(ip, state, icur, got); break; - case BMAP_RIGHT_CONTIG: + case BMAP_RIGHT_FILLING: /* * Deleting the last part of the extent. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); got->br_blockcount -= del->br_blockcount; - xfs_iext_update_extent(ifp, *idx, got); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_update_extent(ip, state, icur, got); break; case 0: /* * Deleting the middle of the extent. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); got->br_blockcount = del->br_startoff - got->br_startoff; - xfs_iext_update_extent(ifp, *idx, got); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); new.br_startoff = del_endoff; new.br_blockcount = got_endoff - del_endoff; new.br_state = got->br_state; new.br_startblock = del->br_startblock + del->br_blockcount; - ++*idx; - xfs_iext_insert(ip, *idx, 1, &new, state); + xfs_iext_update_extent(ip, state, icur, got); + xfs_iext_next(ifp, icur); + xfs_iext_insert(ip, icur, &new, state); break; } + ip->i_delayed_blks -= del->br_blockcount; } /* * Called by xfs_bmapi to update file extent records and the btree - * after removing space (or undoing a delayed allocation). + * after removing space. */ STATIC int /* error */ -xfs_bmap_del_extent( +xfs_bmap_del_extent_real( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ - xfs_extnum_t *idx, /* extent number to update/delete */ + struct xfs_iext_cursor *icur, struct xfs_defer_ops *dfops, /* list of extents to be freed */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ @@ -5066,16 +4891,12 @@ xfs_bmap_del_extent( int whichfork, /* data or attr fork */ int bflags) /* bmapi flags */ { - xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ - xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ xfs_fsblock_t del_endblock=0; /* first block past del */ xfs_fileoff_t del_endoff; /* first offset past del */ - int delay; /* current block is delayed allocated */ int do_fx; /* free extent at end of routine */ - xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */ int error; /* error return value */ - int flags; /* inode logging flags */ - xfs_bmbt_irec_t got; /* current extent entry */ + int flags = 0;/* inode logging flags */ + struct xfs_bmbt_irec got; /* current extent entry */ xfs_fileoff_t got_endoff; /* first offset past got */ int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ @@ -5084,103 +4905,81 @@ xfs_bmap_del_extent( xfs_bmbt_irec_t new; /* new record to be inserted */ /* REFERENCED */ uint qfield; /* quota field to update */ - xfs_filblks_t temp; /* for indirect length calculations */ - xfs_filblks_t temp2; /* for indirect length calculations */ - int state = 0; + int state = xfs_bmap_fork_to_state(whichfork); + struct xfs_bmbt_irec old; mp = ip->i_mount; XFS_STATS_INC(mp, xs_del_exlist); - if (whichfork == XFS_ATTR_FORK) - state |= BMAP_ATTRFORK; - else if (whichfork == XFS_COW_FORK) - state |= BMAP_COWFORK; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp))); ASSERT(del->br_blockcount > 0); - ep = xfs_iext_get_ext(ifp, *idx); - xfs_bmbt_get_all(ep, &got); + xfs_iext_get_extent(ifp, icur, &got); ASSERT(got.br_startoff <= del->br_startoff); del_endoff = del->br_startoff + del->br_blockcount; got_endoff = got.br_startoff + got.br_blockcount; ASSERT(got_endoff >= del_endoff); - delay = isnullstartblock(got.br_startblock); - ASSERT(isnullstartblock(del->br_startblock) == delay); - flags = 0; + ASSERT(!isnullstartblock(got.br_startblock)); qfield = 0; error = 0; + /* - * If deleting a real allocation, must free up the disk space. + * If it's the case where the directory code is running with no block + * reservation, and the deleted block is in the middle of its extent, + * and the resulting insert of an extent would cause transformation to + * btree format, then reject it. The calling code will then swap blocks + * around instead. We have to do this now, rather than waiting for the + * conversion to btree format, since the transaction will be dirty then. */ - if (!delay) { - flags = XFS_ILOG_CORE; - /* - * Realtime allocation. Free it and record di_nblocks update. - */ - if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { - xfs_fsblock_t bno; - xfs_filblks_t len; - - ASSERT(do_mod(del->br_blockcount, - mp->m_sb.sb_rextsize) == 0); - ASSERT(do_mod(del->br_startblock, - mp->m_sb.sb_rextsize) == 0); - bno = del->br_startblock; - len = del->br_blockcount; - do_div(bno, mp->m_sb.sb_rextsize); - do_div(len, mp->m_sb.sb_rextsize); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) - goto done; - do_fx = 0; - nblks = len * mp->m_sb.sb_rextsize; - qfield = XFS_TRANS_DQ_RTBCOUNT; - } - /* - * Ordinary allocation. - */ - else { - do_fx = 1; - nblks = del->br_blockcount; - qfield = XFS_TRANS_DQ_BCOUNT; - } - /* - * Set up del_endblock and cur for later. - */ - del_endblock = del->br_startblock + del->br_blockcount; - if (cur) { - if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, got.br_blockcount, - &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - } - da_old = da_new = 0; - } else { - da_old = startblockval(got.br_startblock); - da_new = 0; - nblks = 0; + if (tp->t_blk_res == 0 && + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) >= + XFS_IFORK_MAXEXT(ip, whichfork) && + del->br_startoff > got.br_startoff && del_endoff < got_endoff) + return -ENOSPC; + + flags = XFS_ILOG_CORE; + if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { + xfs_fsblock_t bno; + xfs_filblks_t len; + + ASSERT(do_mod(del->br_blockcount, mp->m_sb.sb_rextsize) == 0); + ASSERT(do_mod(del->br_startblock, mp->m_sb.sb_rextsize) == 0); + bno = del->br_startblock; + len = del->br_blockcount; + do_div(bno, mp->m_sb.sb_rextsize); + do_div(len, mp->m_sb.sb_rextsize); + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) + goto done; do_fx = 0; + nblks = len * mp->m_sb.sb_rextsize; + qfield = XFS_TRANS_DQ_RTBCOUNT; + } else { + do_fx = 1; + nblks = del->br_blockcount; + qfield = XFS_TRANS_DQ_BCOUNT; } - /* - * Set flag value to use in switch statement. - * Left-contig is 2, right-contig is 1. - */ - switch (((got.br_startoff == del->br_startoff) << 1) | - (got_endoff == del_endoff)) { - case 3: + del_endblock = del->br_startblock + del->br_blockcount; + if (cur) { + error = xfs_bmbt_lookup_eq(cur, &got, &i); + if (error) + goto done; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + } + + if (got.br_startoff == del->br_startoff) + state |= BMAP_LEFT_FILLING; + if (got_endoff == del_endoff) + state |= BMAP_RIGHT_FILLING; + + switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { + case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: /* * Matches the whole extent. Delete the entry. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, *idx, 1, - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); - --*idx; - if (delay) - break; - + xfs_iext_remove(ip, icur, state); + xfs_iext_prev(ifp, icur); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); flags |= XFS_ILOG_CORE; @@ -5192,168 +4991,106 @@ xfs_bmap_del_extent( goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); break; - - case 2: + case BMAP_LEFT_FILLING: /* * Deleting the first part of the extent. */ - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_startoff(ep, del_endoff); - temp = got.br_blockcount - del->br_blockcount; - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - da_new = temp; - break; - } - xfs_bmbt_set_startblock(ep, del_endblock); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + got.br_startoff = del_endoff; + got.br_startblock = del_endblock; + got.br_blockcount -= del->br_blockcount; + xfs_iext_update_extent(ip, state, icur, &got); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; } - if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock, - got.br_blockcount - del->br_blockcount, - got.br_state))) + error = xfs_bmbt_update(cur, &got); + if (error) goto done; break; - - case 1: + case BMAP_RIGHT_FILLING: /* * Deleting the last part of the extent. */ - temp = got.br_blockcount - del->br_blockcount; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - da_new = temp; - break; - } - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + got.br_blockcount -= del->br_blockcount; + xfs_iext_update_extent(ip, state, icur, &got); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; } - if ((error = xfs_bmbt_update(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount - del->br_blockcount, - got.br_state))) + error = xfs_bmbt_update(cur, &got); + if (error) goto done; break; - case 0: /* * Deleting the middle of the extent. */ - temp = del->br_startoff - got.br_startoff; - trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); + old = got; + + got.br_blockcount = del->br_startoff - got.br_startoff; + xfs_iext_update_extent(ip, state, icur, &got); + new.br_startoff = del_endoff; - temp2 = got_endoff - del_endoff; - new.br_blockcount = temp2; + new.br_blockcount = got_endoff - del_endoff; new.br_state = got.br_state; - if (!delay) { - new.br_startblock = del_endblock; - flags |= XFS_ILOG_CORE; - if (cur) { - if ((error = xfs_bmbt_update(cur, - got.br_startoff, - got.br_startblock, temp, - got.br_state))) - goto done; - if ((error = xfs_btree_increment(cur, 0, &i))) - goto done; - cur->bc_rec.b = new; - error = xfs_btree_insert(cur, &i); - if (error && error != -ENOSPC) - goto done; + new.br_startblock = del_endblock; + + flags |= XFS_ILOG_CORE; + if (cur) { + error = xfs_bmbt_update(cur, &got); + if (error) + goto done; + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto done; + cur->bc_rec.b = new; + error = xfs_btree_insert(cur, &i); + if (error && error != -ENOSPC) + goto done; + /* + * If get no-space back from btree insert, it tried a + * split, and we have a zero block reservation. Fix up + * our state and return the error. + */ + if (error == -ENOSPC) { /* - * If get no-space back from btree insert, - * it tried a split, and we have a zero - * block reservation. - * Fix up our state and return the error. + * Reset the cursor, don't trust it after any + * insert operation. */ - if (error == -ENOSPC) { - /* - * Reset the cursor, don't trust - * it after any insert operation. - */ - if ((error = xfs_bmbt_lookup_eq(cur, - got.br_startoff, - got.br_startblock, - temp, &i))) - goto done; - XFS_WANT_CORRUPTED_GOTO(mp, - i == 1, done); - /* - * Update the btree record back - * to the original value. - */ - if ((error = xfs_bmbt_update(cur, - got.br_startoff, - got.br_startblock, - got.br_blockcount, - got.br_state))) - goto done; - /* - * Reset the extent record back - * to the original value. - */ - xfs_bmbt_set_blockcount(ep, - got.br_blockcount); - flags = 0; - error = -ENOSPC; + error = xfs_bmbt_lookup_eq(cur, &got, &i); + if (error) goto done; - } XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - } else - flags |= xfs_ilog_fext(whichfork); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); - } else { - xfs_filblks_t stolen; - ASSERT(whichfork == XFS_DATA_FORK); - - /* - * Distribute the original indlen reservation across the - * two new extents. Steal blocks from the deleted extent - * if necessary. Stealing blocks simply fudges the - * fdblocks accounting in xfs_bunmapi(). - */ - temp = xfs_bmap_worst_indlen(ip, got.br_blockcount); - temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount); - stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2, - del->br_blockcount); - da_new = temp + temp2 - stolen; - del->br_blockcount -= stolen; - - /* - * Set the reservation for each extent. Warn if either - * is zero as this can lead to delalloc problems. - */ - WARN_ON_ONCE(!temp || !temp2); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - new.br_startblock = nullstartblock((int)temp2); - } - trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_insert(ip, *idx + 1, 1, &new, state); - ++*idx; + /* + * Update the btree record back + * to the original value. + */ + error = xfs_bmbt_update(cur, &old); + if (error) + goto done; + /* + * Reset the extent record back + * to the original value. + */ + xfs_iext_update_extent(ip, state, icur, &old); + flags = 0; + error = -ENOSPC; + goto done; + } + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); + } else + flags |= xfs_ilog_fext(whichfork); + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + xfs_iext_next(ifp, icur); + xfs_iext_insert(ip, icur, &new, state); break; } /* remove reverse mapping */ - if (!delay) { - error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del); - if (error) - goto done; - } + error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del); + if (error) + goto done; /* * If we need to, add to list of extents to delete. @@ -5379,13 +5116,6 @@ xfs_bmap_del_extent( if (qfield && !(bflags & XFS_BMAPI_REMAP)) xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); - /* - * Account for change in delayed indirect blocks. - * Nothing to do for disk quota accounting here. - */ - ASSERT(da_old >= da_new); - if (da_old > da_new) - xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false); done: *logflagsp = flags; return error; @@ -5401,7 +5131,7 @@ int /* error */ __xfs_bunmapi( xfs_trans_t *tp, /* transaction pointer */ struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t bno, /* starting offset to unmap */ + xfs_fileoff_t start, /* first file offset deleted */ xfs_filblks_t *rlen, /* i/o: amount remaining */ int flags, /* misc flags */ xfs_extnum_t nexts, /* number of extents max */ @@ -5416,11 +5146,9 @@ __xfs_bunmapi( xfs_bmbt_irec_t got; /* current extent record */ xfs_ifork_t *ifp; /* inode fork pointer */ int isrt; /* freeing in rt area */ - xfs_extnum_t lastx; /* last extent index used */ int logflags; /* transaction logging flags */ xfs_extlen_t mod; /* rt extent offset */ xfs_mount_t *mp; /* mount structure */ - xfs_fileoff_t start; /* first file offset deleted */ int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ int whichfork; /* data or attribute fork */ @@ -5428,8 +5156,11 @@ __xfs_bunmapi( xfs_filblks_t len = *rlen; /* length to unmap in file */ xfs_fileoff_t max_len; xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; + xfs_fileoff_t end; + struct xfs_iext_cursor icur; + bool done = false; - trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); + trace_xfs_bunmap(ip, start, len, flags, _RET_IP_); whichfork = xfs_bmapi_whichfork(flags); ASSERT(whichfork != XFS_COW_FORK); @@ -5454,7 +5185,7 @@ __xfs_bunmapi( * blowing out the transaction with a mix of EFIs and reflink * adjustments. */ - if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) + if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); else max_len = len; @@ -5468,18 +5199,13 @@ __xfs_bunmapi( } XFS_STATS_INC(mp, xs_blk_unmap); isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); - start = bno; - bno = start + len - 1; + end = start + len; - /* - * Check to see if the given block number is past the end of the - * file, back up to the last block if so... - */ - if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) { - ASSERT(lastx > 0); - xfs_iext_get_extent(ifp, --lastx, &got); - bno = got.br_startoff + got.br_blockcount - 1; + if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { + *rlen = 0; + return 0; } + end--; logflags = 0; if (ifp->if_flags & XFS_IFBROOT) { @@ -5502,24 +5228,24 @@ __xfs_bunmapi( } extno = 0; - while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && + while (end != (xfs_fileoff_t)-1 && end >= start && (nexts == 0 || extno < nexts) && max_len > 0) { /* - * Is the found extent after a hole in which bno lives? + * Is the found extent after a hole in which end lives? * Just back up to the previous extent, if so. */ - if (got.br_startoff > bno) { - if (--lastx < 0) - break; - xfs_iext_get_extent(ifp, lastx, &got); + if (got.br_startoff > end && + !xfs_iext_prev_extent(ifp, &icur, &got)) { + done = true; + break; } /* * Is the last block of this extent before the range * we're supposed to delete? If so, we're done. */ - bno = XFS_FILEOFF_MIN(bno, + end = XFS_FILEOFF_MIN(end, got.br_startoff + got.br_blockcount - 1); - if (bno < start) + if (end < start) break; /* * Then deal with the (possibly delayed) allocated space @@ -5544,8 +5270,8 @@ __xfs_bunmapi( if (!wasdel) del.br_startblock += start - got.br_startoff; } - if (del.br_startoff + del.br_blockcount > bno + 1) - del.br_blockcount = bno + 1 - del.br_startoff; + if (del.br_startoff + del.br_blockcount > end + 1) + del.br_blockcount = end + 1 - del.br_startoff; /* How much can we safely unmap? */ if (max_len < del.br_blockcount) { @@ -5571,13 +5297,13 @@ __xfs_bunmapi( * This piece is unwritten, or we're not * using unwritten extents. Skip over it. */ - ASSERT(bno >= mod); - bno -= mod > del.br_blockcount ? + ASSERT(end >= mod); + end -= mod > del.br_blockcount ? del.br_blockcount : mod; - if (bno < got.br_startoff) { - if (--lastx >= 0) - xfs_bmbt_get_all(xfs_iext_get_ext( - ifp, lastx), &got); + if (end < got.br_startoff && + !xfs_iext_prev_extent(ifp, &icur, &got)) { + done = true; + break; } continue; } @@ -5598,7 +5324,7 @@ __xfs_bunmapi( } del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent_unwritten_real(tp, ip, - whichfork, &lastx, &cur, &del, + whichfork, &icur, &cur, &del, firstblock, dfops, &logflags); if (error) goto error0; @@ -5623,10 +5349,13 @@ __xfs_bunmapi( * Can't make it unwritten. There isn't * a full extent here so just skip it. */ - ASSERT(bno >= del.br_blockcount); - bno -= del.br_blockcount; - if (got.br_startoff > bno && --lastx >= 0) - xfs_iext_get_extent(ifp, lastx, &got); + ASSERT(end >= del.br_blockcount); + end -= del.br_blockcount; + if (got.br_startoff > end && + !xfs_iext_prev_extent(ifp, &icur, &got)) { + done = true; + break; + } continue; } else if (del.br_state == XFS_EXT_UNWRITTEN) { struct xfs_bmbt_irec prev; @@ -5637,8 +5366,8 @@ __xfs_bunmapi( * Unwrite the killed part of that one and * try again. */ - ASSERT(lastx > 0); - xfs_iext_get_extent(ifp, lastx - 1, &prev); + if (!xfs_iext_prev_extent(ifp, &icur, &prev)) + ASSERT(0); ASSERT(prev.br_state == XFS_EXT_NORM); ASSERT(!isnullstartblock(prev.br_startblock)); ASSERT(del.br_startblock == @@ -5650,9 +5379,8 @@ __xfs_bunmapi( prev.br_startoff = start; } prev.br_state = XFS_EXT_UNWRITTEN; - lastx--; error = xfs_bmap_add_extent_unwritten_real(tp, - ip, whichfork, &lastx, &cur, + ip, whichfork, &icur, &cur, &prev, firstblock, dfops, &logflags); if (error) @@ -5662,7 +5390,7 @@ __xfs_bunmapi( ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent_unwritten_real(tp, - ip, whichfork, &lastx, &cur, + ip, whichfork, &icur, &cur, &del, firstblock, dfops, &logflags); if (error) @@ -5671,85 +5399,39 @@ __xfs_bunmapi( } } - /* - * If it's the case where the directory code is running - * with no block reservation, and the deleted block is in - * the middle of its extent, and the resulting insert - * of an extent would cause transformation to btree format, - * then reject it. The calling code will then swap - * blocks around instead. - * We have to do this now, rather than waiting for the - * conversion to btree format, since the transaction - * will be dirty. - */ - if (!wasdel && tp->t_blk_res == 0 && - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ - XFS_IFORK_MAXEXT(ip, whichfork) && - del.br_startoff > got.br_startoff && - del.br_startoff + del.br_blockcount < - got.br_startoff + got.br_blockcount) { - error = -ENOSPC; - goto error0; + if (wasdel) { + error = xfs_bmap_del_extent_delay(ip, whichfork, &icur, + &got, &del); + } else { + error = xfs_bmap_del_extent_real(ip, tp, &icur, dfops, + cur, &del, &tmp_logflags, whichfork, + flags); + logflags |= tmp_logflags; } - /* - * Unreserve quota and update realtime free space, if - * appropriate. If delayed allocation, update the inode delalloc - * counter now and wait to update the sb counters as - * xfs_bmap_del_extent() might need to borrow some blocks. - */ - if (wasdel) { - ASSERT(startblockval(del.br_startblock) > 0); - if (isrt) { - xfs_filblks_t rtexts; - - rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); - do_div(rtexts, mp->m_sb.sb_rextsize); - xfs_mod_frextents(mp, (int64_t)rtexts); - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, - XFS_QMOPT_RES_RTBLKS); - } else { - (void)xfs_trans_reserve_quota_nblks(NULL, - ip, -((long)del.br_blockcount), 0, - XFS_QMOPT_RES_REGBLKS); - } - ip->i_delayed_blks -= del.br_blockcount; - if (cur) - cur->bc_private.b.flags |= - XFS_BTCUR_BPRV_WASDEL; - } else if (cur) - cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; - - error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del, - &tmp_logflags, whichfork, flags); - logflags |= tmp_logflags; if (error) goto error0; - if (!isrt && wasdel) - xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false); - max_len -= del.br_blockcount; - bno = del.br_startoff - 1; + end = del.br_startoff - 1; nodelete: /* * If not done go on to the next (previous) record. */ - if (bno != (xfs_fileoff_t)-1 && bno >= start) { - if (lastx >= 0) { - xfs_iext_get_extent(ifp, lastx, &got); - if (got.br_startoff > bno && --lastx >= 0) - xfs_iext_get_extent(ifp, lastx, &got); + if (end != (xfs_fileoff_t)-1 && end >= start) { + if (!xfs_iext_get_extent(ifp, &icur, &got) || + (got.br_startoff > end && + !xfs_iext_prev_extent(ifp, &icur, &got))) { + done = true; + break; } extno++; } } - if (bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0) + if (done || end == (xfs_fileoff_t)-1 || end < start) *rlen = 0; else - *rlen = bno - start + 1; + *rlen = end - start + 1; /* * Convert to a btree if necessary. @@ -5867,14 +5549,13 @@ xfs_bmse_merge( struct xfs_inode *ip, int whichfork, xfs_fileoff_t shift, /* shift fsb */ - int current_ext, /* idx of gotp */ + struct xfs_iext_cursor *icur, struct xfs_bmbt_irec *got, /* extent to shift */ struct xfs_bmbt_irec *left, /* preceding extent */ struct xfs_btree_cur *cur, int *logflags, /* output */ struct xfs_defer_ops *dfops) { - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_bmbt_irec new; xfs_filblks_t blockcount; int error, i; @@ -5902,8 +5583,7 @@ xfs_bmse_merge( } /* lookup and remove the extent to merge */ - error = xfs_bmbt_lookup_eq(cur, got->br_startoff, got->br_startblock, - got->br_blockcount, &i); + error = xfs_bmbt_lookup_eq(cur, got, &i); if (error) return error; XFS_WANT_CORRUPTED_RETURN(mp, i == 1); @@ -5914,20 +5594,20 @@ xfs_bmse_merge( XFS_WANT_CORRUPTED_RETURN(mp, i == 1); /* lookup and update size of the previous extent */ - error = xfs_bmbt_lookup_eq(cur, left->br_startoff, left->br_startblock, - left->br_blockcount, &i); + error = xfs_bmbt_lookup_eq(cur, left, &i); if (error) return error; XFS_WANT_CORRUPTED_RETURN(mp, i == 1); - error = xfs_bmbt_update(cur, new.br_startoff, new.br_startblock, - new.br_blockcount, new.br_state); + error = xfs_bmbt_update(cur, &new); if (error) return error; done: - xfs_iext_update_extent(ifp, current_ext - 1, &new); - xfs_iext_remove(ip, current_ext, 1, 0); + xfs_iext_remove(ip, icur, 0); + xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur); + xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, + &new); /* update reverse mapping. rmap functions merge the rmaps for us */ error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got); @@ -5938,183 +5618,83 @@ done: return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new); } -/* - * Shift a single extent. - */ -STATIC int -xfs_bmse_shift_one( - struct xfs_inode *ip, - int whichfork, - xfs_fileoff_t offset_shift_fsb, - int *current_ext, - struct xfs_bmbt_irec *got, - struct xfs_btree_cur *cur, - int *logflags, - enum shift_direction direction, - struct xfs_defer_ops *dfops) +static int +xfs_bmap_shift_update_extent( + struct xfs_inode *ip, + int whichfork, + struct xfs_iext_cursor *icur, + struct xfs_bmbt_irec *got, + struct xfs_btree_cur *cur, + int *logflags, + struct xfs_defer_ops *dfops, + xfs_fileoff_t startoff) { - struct xfs_ifork *ifp; - struct xfs_mount *mp; - xfs_fileoff_t startoff; - struct xfs_bmbt_irec adj_irec, new; - int error; - int i; - int total_extents; - - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); - total_extents = xfs_iext_count(ifp); - - /* delalloc extents should be prevented by caller */ - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got->br_startblock)); - - if (direction == SHIFT_LEFT) { - startoff = got->br_startoff - offset_shift_fsb; - - /* - * Check for merge if we've got an extent to the left, - * otherwise make sure there's enough room at the start - * of the file for the shift. - */ - if (!*current_ext) { - if (got->br_startoff < offset_shift_fsb) - return -EINVAL; - goto update_current_ext; - } - - /* - * grab the left extent and check for a large enough hole. - */ - xfs_iext_get_extent(ifp, *current_ext - 1, &adj_irec); - if (startoff < adj_irec.br_startoff + adj_irec.br_blockcount) - return -EINVAL; - - /* check whether to merge the extent or shift it down */ - if (xfs_bmse_can_merge(&adj_irec, got, offset_shift_fsb)) { - return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, - *current_ext, got, &adj_irec, - cur, logflags, dfops); - } - } else { - startoff = got->br_startoff + offset_shift_fsb; - /* nothing to move if this is the last extent */ - if (*current_ext >= (total_extents - 1)) - goto update_current_ext; - - /* - * If this is not the last extent in the file, make sure there - * is enough room between current extent and next extent for - * accommodating the shift. - */ - xfs_iext_get_extent(ifp, *current_ext + 1, &adj_irec); - if (startoff + got->br_blockcount > adj_irec.br_startoff) - return -EINVAL; - - /* - * Unlike a left shift (which involves a hole punch), - * a right shift does not modify extent neighbors - * in any way. We should never find mergeable extents - * in this scenario. Check anyways and warn if we - * encounter two extents that could be one. - */ - if (xfs_bmse_can_merge(got, &adj_irec, offset_shift_fsb)) - WARN_ON_ONCE(1); - } + struct xfs_mount *mp = ip->i_mount; + struct xfs_bmbt_irec prev = *got; + int error, i; - /* - * Increment the extent index for the next iteration, update the start - * offset of the in-core extent and update the btree if applicable. - */ -update_current_ext: *logflags |= XFS_ILOG_CORE; - new = *got; - new.br_startoff = startoff; + got->br_startoff = startoff; if (cur) { - error = xfs_bmbt_lookup_eq(cur, got->br_startoff, - got->br_startblock, got->br_blockcount, &i); + error = xfs_bmbt_lookup_eq(cur, &prev, &i); if (error) return error; XFS_WANT_CORRUPTED_RETURN(mp, i == 1); - error = xfs_bmbt_update(cur, new.br_startoff, - new.br_startblock, new.br_blockcount, - new.br_state); + error = xfs_bmbt_update(cur, got); if (error) return error; } else { *logflags |= XFS_ILOG_DEXT; } - xfs_iext_update_extent(ifp, *current_ext, &new); - - if (direction == SHIFT_LEFT) - (*current_ext)++; - else - (*current_ext)--; + xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, + got); /* update reverse mapping */ - error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got); + error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &prev); if (error) return error; - return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new); + return xfs_rmap_map_extent(mp, dfops, ip, whichfork, got); } -/* - * Shift extent records to the left/right to cover/create a hole. - * - * The maximum number of extents to be shifted in a single operation is - * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the - * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb - * is the length by which each extent is shifted. If there is no hole to shift - * the extents into, this will be considered invalid operation and we abort - * immediately. - */ int -xfs_bmap_shift_extents( +xfs_bmap_collapse_extents( struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, - int *done, + bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, - struct xfs_defer_ops *dfops, - enum shift_direction direction, - int num_exts) + struct xfs_defer_ops *dfops) { - struct xfs_btree_cur *cur = NULL; - struct xfs_bmbt_irec got; - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp; - xfs_extnum_t nexts = 0; - xfs_extnum_t current_ext; - xfs_extnum_t total_extents; - xfs_extnum_t stop_extent; - int error = 0; - int whichfork = XFS_DATA_FORK; - int logflags = 0; + int whichfork = XFS_DATA_FORK; + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_cur *cur = NULL; + struct xfs_bmbt_irec got, prev; + struct xfs_iext_cursor icur; + xfs_fileoff_t new_startoff; + int error = 0; + int logflags = 0; if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmap_shift_extents", - XFS_ERRLEVEL_LOW, mp); + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); - ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS)) { - /* Read in all the extents */ error = xfs_iread_extents(tp, ip, whichfork); if (error) return error; @@ -6127,107 +5707,167 @@ xfs_bmap_shift_extents( cur->bc_private.b.flags = 0; } - /* - * There may be delalloc extents in the data fork before the range we - * are collapsing out, so we cannot use the count of real extents here. - * Instead we have to calculate it from the incore fork. - */ - total_extents = xfs_iext_count(ifp); - if (total_extents == 0) { - *done = 1; + if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { + *done = true; goto del_cursor; } + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); - /* - * In case of first right shift, we need to initialize next_fsb - */ - if (*next_fsb == NULLFSBLOCK) { - ASSERT(direction == SHIFT_RIGHT); - - current_ext = total_extents - 1; - xfs_iext_get_extent(ifp, current_ext, &got); - if (stop_fsb > got.br_startoff) { - *done = 1; + new_startoff = got.br_startoff - offset_shift_fsb; + if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { + if (new_startoff < prev.br_startoff + prev.br_blockcount) { + error = -EINVAL; goto del_cursor; } - *next_fsb = got.br_startoff; + + if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) { + error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb, + &icur, &got, &prev, cur, &logflags, + dfops); + if (error) + goto del_cursor; + goto done; + } } else { - /* - * Look up the extent index for the fsb where we start shifting. We can - * henceforth iterate with current_ext as extent list changes are locked - * out via ilock. - * - * If next_fsb lies in a hole beyond which there are no extents we are - * done. - */ - if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, ¤t_ext, - &got)) { - *done = 1; + if (got.br_startoff < offset_shift_fsb) { + error = -EINVAL; goto del_cursor; } } - /* Lookup the extent index at which we have to stop */ - if (direction == SHIFT_RIGHT) { - struct xfs_bmbt_irec s; + error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, + &logflags, dfops, new_startoff); + if (error) + goto del_cursor; + +done: + if (!xfs_iext_next_extent(ifp, &icur, &got)) { + *done = true; + goto del_cursor; + } + + *next_fsb = got.br_startoff; +del_cursor: + if (cur) + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + return error; +} + +int +xfs_bmap_insert_extents( + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_fileoff_t *next_fsb, + xfs_fileoff_t offset_shift_fsb, + bool *done, + xfs_fileoff_t stop_fsb, + xfs_fsblock_t *firstblock, + struct xfs_defer_ops *dfops) +{ + int whichfork = XFS_DATA_FORK; + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_cur *cur = NULL; + struct xfs_bmbt_irec got, next; + struct xfs_iext_cursor icur; + xfs_fileoff_t new_startoff; + int error = 0; + int logflags = 0; + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT))) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); - xfs_iext_lookup_extent(ip, ifp, stop_fsb, &stop_extent, &s); - /* Make stop_extent exclusive of shift range */ - stop_extent--; - if (current_ext <= stop_extent) { - error = -EIO; + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; + } + + if (ifp->if_flags & XFS_IFBROOT) { + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.dfops = dfops; + cur->bc_private.b.flags = 0; + } + + if (*next_fsb == NULLFSBLOCK) { + xfs_iext_last(ifp, &icur); + if (!xfs_iext_get_extent(ifp, &icur, &got) || + stop_fsb > got.br_startoff) { + *done = true; goto del_cursor; } } else { - stop_extent = total_extents; - if (current_ext >= stop_extent) { - error = -EIO; + if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { + *done = true; goto del_cursor; } } + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); - while (nexts++ < num_exts) { - error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, - ¤t_ext, &got, cur, &logflags, - direction, dfops); - if (error) + if (stop_fsb >= got.br_startoff + got.br_blockcount) { + error = -EIO; + goto del_cursor; + } + + new_startoff = got.br_startoff + offset_shift_fsb; + if (xfs_iext_peek_next_extent(ifp, &icur, &next)) { + if (new_startoff + got.br_blockcount > next.br_startoff) { + error = -EINVAL; goto del_cursor; - /* - * If there was an extent merge during the shift, the extent - * count can change. Update the total and grade the next record. - */ - if (direction == SHIFT_LEFT) { - total_extents = xfs_iext_count(ifp); - stop_extent = total_extents; } - if (current_ext == stop_extent) { - *done = 1; - *next_fsb = NULLFSBLOCK; - break; - } - xfs_iext_get_extent(ifp, current_ext, &got); + /* + * Unlike a left shift (which involves a hole punch), a right + * shift does not modify extent neighbors in any way. We should + * never find mergeable extents in this scenario. Check anyways + * and warn if we encounter two extents that could be one. + */ + if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb)) + WARN_ON_ONCE(1); } - if (!*done) - *next_fsb = got.br_startoff; + error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, + &logflags, dfops, new_startoff); + if (error) + goto del_cursor; + if (!xfs_iext_prev_extent(ifp, &icur, &got) || + stop_fsb >= got.br_startoff + got.br_blockcount) { + *done = true; + goto del_cursor; + } + + *next_fsb = got.br_startoff; del_cursor: if (cur) xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - if (logflags) xfs_trans_log_inode(tp, ip, logflags); - return error; } /* - * Splits an extent into two extents at split_fsb block such that it is - * the first block of the current_ext. @current_ext is a target extent - * to be split. @split_fsb is a block where the extents is split. - * If split_fsb lies in a hole or the first block of extents, just return 0. + * Splits an extent into two extents at split_fsb block such that it is the + * first block of the current_ext. @ext is a target extent to be split. + * @split_fsb is a block where the extents is split. If split_fsb lies in a + * hole or the first block of extents, just return 0. */ STATIC int xfs_bmap_split_extent_at( @@ -6244,7 +5884,7 @@ xfs_bmap_split_extent_at( struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; xfs_fsblock_t gotblkcnt; /* new block count for got */ - xfs_extnum_t current_ext; + struct xfs_iext_cursor icur; int error = 0; int logflags = 0; int i = 0; @@ -6272,7 +5912,7 @@ xfs_bmap_split_extent_at( /* * If there are not extents, or split_fsb lies in a hole we are done. */ - if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, ¤t_ext, &got) || + if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) || got.br_startoff >= split_fsb) return 0; @@ -6287,44 +5927,35 @@ xfs_bmap_split_extent_at( cur->bc_private.b.firstblock = *firstfsb; cur->bc_private.b.dfops = dfops; cur->bc_private.b.flags = 0; - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) goto del_cursor; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); } got.br_blockcount = gotblkcnt; - xfs_iext_update_extent(ifp, current_ext, &got); + xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur, + &got); logflags = XFS_ILOG_CORE; if (cur) { - error = xfs_bmbt_update(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - got.br_state); + error = xfs_bmbt_update(cur, &got); if (error) goto del_cursor; } else logflags |= XFS_ILOG_DEXT; /* Add new extent */ - current_ext++; - xfs_iext_insert(ip, current_ext, 1, &new, 0); + xfs_iext_next(ifp, &icur); + xfs_iext_insert(ip, &icur, &new, 0); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur) { - error = xfs_bmbt_lookup_eq(cur, new.br_startoff, - new.br_startblock, new.br_blockcount, - &i); + error = xfs_bmbt_lookup_eq(cur, &new, &i); if (error) goto del_cursor; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor); - cur->bc_rec.b.br_state = new.br_state; - error = xfs_btree_insert(cur, &i); if (error) goto del_cursor; @@ -6528,3 +6159,39 @@ xfs_bmap_finish_one( return error; } + +/* Check that an inode's extent does not have invalid flags or bad ranges. */ +xfs_failaddr_t +xfs_bmap_validate_extent( + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *irec) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_fsblock_t endfsb; + bool isrt; + + isrt = XFS_IS_REALTIME_INODE(ip); + endfsb = irec->br_startblock + irec->br_blockcount - 1; + if (isrt) { + if (!xfs_verify_rtbno(mp, irec->br_startblock)) + return __this_address; + if (!xfs_verify_rtbno(mp, endfsb)) + return __this_address; + } else { + if (!xfs_verify_fsbno(mp, irec->br_startblock)) + return __this_address; + if (!xfs_verify_fsbno(mp, endfsb)) + return __this_address; + if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) != + XFS_FSB_TO_AGNO(mp, endfsb)) + return __this_address; + } + if (irec->br_state != XFS_EXT_NORM) { + if (whichfork != XFS_DATA_FORK) + return __this_address; + if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) + return __this_address; + } + return NULL; +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 851982a5dfbc..f3be6416260b 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -43,7 +43,7 @@ struct xfs_bmalloca { xfs_fsblock_t blkno; /* starting block of new extent */ struct xfs_btree_cur *cur; /* btree cursor */ - xfs_extnum_t idx; /* current extent index */ + struct xfs_iext_cursor icur; /* incore extent cursor */ int nallocs;/* number of extents alloc'd */ int logflags;/* flags for transaction logging */ @@ -113,6 +113,9 @@ struct xfs_extent_free_item /* Only convert delalloc space, don't allocate entirely new extents */ #define XFS_BMAPI_DELALLOC 0x400 +/* Only convert unwritten extents, don't allocate new blocks */ +#define XFS_BMAPI_CONVERT_ONLY 0x800 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -124,7 +127,8 @@ struct xfs_extent_free_item { XFS_BMAPI_ZERO, "ZERO" }, \ { XFS_BMAPI_REMAP, "REMAP" }, \ { XFS_BMAPI_COWFORK, "COWFORK" }, \ - { XFS_BMAPI_DELALLOC, "DELALLOC" } + { XFS_BMAPI_DELALLOC, "DELALLOC" }, \ + { XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" } static inline int xfs_bmapi_aflag(int w) @@ -183,31 +187,9 @@ static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec) !isnullstartblock(irec->br_startblock); } -/* - * This macro is used to determine how many extents will be shifted - * in one write transaction. We could require two splits, - * an extent move on the first and an extent merge on the second, - * So it is proper that one extent is shifted inside write transaction - * at a time. - */ -#define XFS_BMAP_MAX_SHIFT_EXTENTS 1 - -enum shift_direction { - SHIFT_LEFT = 0, - SHIFT_RIGHT, -}; - -#ifdef DEBUG -void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, - int whichfork, unsigned long caller_ip); -#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ - xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_) -#else -#define XFS_BMAP_TRACE_EXLIST(ip,c,w) -#endif - void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); +void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, @@ -221,8 +203,6 @@ int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip, int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused, int whichfork); int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork); -int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip, - int whichfork); int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, struct xfs_bmbt_irec *mval, int *nmap, int flags); @@ -240,20 +220,25 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extnum_t nexts, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops, int *done); int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, - xfs_extnum_t *idx, struct xfs_bmbt_irec *got, + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del); +void xfs_bmap_del_extent_cow(struct xfs_inode *ip, + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); -void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx, - struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); uint xfs_default_attroffset(struct xfs_inode *ip); -int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, +int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, + bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, + struct xfs_defer_ops *dfops); +int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, - int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, - struct xfs_defer_ops *dfops, enum shift_direction direction, - int num_exts); + bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, + struct xfs_defer_ops *dfops); int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, - struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); + struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur, + int eof); enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, @@ -277,4 +262,19 @@ int xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, struct xfs_inode *ip, struct xfs_bmbt_irec *imap); +static inline int xfs_bmap_fork_to_state(int whichfork) +{ + switch (whichfork) { + case XFS_ATTR_FORK: + return BMAP_ATTRFORK; + case XFS_COW_FORK: + return BMAP_COWFORK; + default: + return 0; + } +} + +xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork, + struct xfs_bmbt_irec *irec); + #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index a6331ffa51e3..d89d06bea6e3 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -38,22 +38,6 @@ #include "xfs_rmap.h" /* - * Determine the extent state. - */ -/* ARGSUSED */ -STATIC xfs_exntst_t -xfs_extent_state( - xfs_filblks_t blks, - int extent_flag) -{ - if (extent_flag) { - ASSERT(blks != 0); /* saved for DMIG */ - return XFS_EXT_UNWRITTEN; - } - return XFS_EXT_NORM; -} - -/* * Convert on-disk form of btree root to in-memory form. */ void @@ -87,84 +71,21 @@ xfs_bmdr_to_bmbt( memcpy(tpp, fpp, sizeof(*fpp) * dmxr); } -/* - * Convert a compressed bmap extent record to an uncompressed form. - * This code must be in sync with the routines xfs_bmbt_get_startoff, - * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. - */ -STATIC void -__xfs_bmbt_get_all( - uint64_t l0, - uint64_t l1, - xfs_bmbt_irec_t *s) -{ - int ext_flag; - xfs_exntst_t st; - - ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); - s->br_startoff = ((xfs_fileoff_t)l0 & - xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; - s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | - (((xfs_fsblock_t)l1) >> 21); - s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); - /* This is xfs_extent_state() in-line */ - if (ext_flag) { - ASSERT(s->br_blockcount != 0); /* saved for DMIG */ - st = XFS_EXT_UNWRITTEN; - } else - st = XFS_EXT_NORM; - s->br_state = st; -} - void -xfs_bmbt_get_all( - xfs_bmbt_rec_host_t *r, - xfs_bmbt_irec_t *s) -{ - __xfs_bmbt_get_all(r->l0, r->l1, s); -} - -/* - * Extract the blockcount field from an in memory bmap extent record. - */ -xfs_filblks_t -xfs_bmbt_get_blockcount( - xfs_bmbt_rec_host_t *r) -{ - return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21)); -} - -/* - * Extract the startblock field from an in memory bmap extent record. - */ -xfs_fsblock_t -xfs_bmbt_get_startblock( - xfs_bmbt_rec_host_t *r) -{ - return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | - (((xfs_fsblock_t)r->l1) >> 21); -} - -/* - * Extract the startoff field from an in memory bmap extent record. - */ -xfs_fileoff_t -xfs_bmbt_get_startoff( - xfs_bmbt_rec_host_t *r) -{ - return ((xfs_fileoff_t)r->l0 & - xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; -} - -xfs_exntst_t -xfs_bmbt_get_state( - xfs_bmbt_rec_host_t *r) -{ - int ext_flag; - - ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN)); - return xfs_extent_state(xfs_bmbt_get_blockcount(r), - ext_flag); +xfs_bmbt_disk_get_all( + struct xfs_bmbt_rec *rec, + struct xfs_bmbt_irec *irec) +{ + uint64_t l0 = get_unaligned_be64(&rec->l0); + uint64_t l1 = get_unaligned_be64(&rec->l1); + + irec->br_startoff = (l0 & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; + irec->br_startblock = ((l0 & xfs_mask64lo(9)) << 43) | (l1 >> 21); + irec->br_blockcount = l1 & xfs_mask64lo(21); + if (l0 >> (64 - BMBT_EXNTFLAG_BITLEN)) + irec->br_state = XFS_EXT_UNWRITTEN; + else + irec->br_state = XFS_EXT_NORM; } /* @@ -188,142 +109,29 @@ xfs_bmbt_disk_get_startoff( xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; } - -/* - * Set all the fields in a bmap extent record from the arguments. - */ -void -xfs_bmbt_set_allf( - xfs_bmbt_rec_host_t *r, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t blockcount, - xfs_exntst_t state) -{ - int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; - - ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); - ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); - ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); - - ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); - - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - ((xfs_bmbt_rec_base_t)startblock >> 43); - r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); -} - /* * Set all the fields in a bmap extent record from the uncompressed form. */ void -xfs_bmbt_set_all( - xfs_bmbt_rec_host_t *r, - xfs_bmbt_irec_t *s) -{ - xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock, - s->br_blockcount, s->br_state); -} - - -/* - * Set all the fields in a disk format bmap extent record from the arguments. - */ -void -xfs_bmbt_disk_set_allf( - xfs_bmbt_rec_t *r, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t blockcount, - xfs_exntst_t state) -{ - int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; - - ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); - ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); - ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); - ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); - - r->l0 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - ((xfs_bmbt_rec_base_t)startblock >> 43)); - r->l1 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); -} - -/* - * Set all the fields in a bmap extent record from the uncompressed form. - */ -STATIC void xfs_bmbt_disk_set_all( - xfs_bmbt_rec_t *r, - xfs_bmbt_irec_t *s) + struct xfs_bmbt_rec *r, + struct xfs_bmbt_irec *s) { - xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock, - s->br_blockcount, s->br_state); -} + int extent_flag = (s->br_state != XFS_EXT_NORM); -/* - * Set the blockcount field in a bmap extent record. - */ -void -xfs_bmbt_set_blockcount( - xfs_bmbt_rec_host_t *r, - xfs_filblks_t v) -{ - ASSERT((v & xfs_mask64hi(43)) == 0); - r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) | - (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21)); -} + ASSERT(s->br_state == XFS_EXT_NORM || s->br_state == XFS_EXT_UNWRITTEN); + ASSERT(!(s->br_startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN))); + ASSERT(!(s->br_blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN))); + ASSERT(!(s->br_startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN))); -/* - * Set the startblock field in a bmap extent record. - */ -void -xfs_bmbt_set_startblock( - xfs_bmbt_rec_host_t *r, - xfs_fsblock_t v) -{ - ASSERT((v & xfs_mask64hi(12)) == 0); - r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) | - (xfs_bmbt_rec_base_t)(v >> 43); - r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) | - (xfs_bmbt_rec_base_t)(v << 21); -} - -/* - * Set the startoff field in a bmap extent record. - */ -void -xfs_bmbt_set_startoff( - xfs_bmbt_rec_host_t *r, - xfs_fileoff_t v) -{ - ASSERT((v & xfs_mask64hi(9)) == 0); - r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) | - ((xfs_bmbt_rec_base_t)v << 9) | - (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); -} - -/* - * Set the extent state field in a bmap extent record. - */ -void -xfs_bmbt_set_state( - xfs_bmbt_rec_host_t *r, - xfs_exntst_t v) -{ - ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN); - if (v == XFS_EXT_NORM) - r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN); - else - r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN); + put_unaligned_be64( + ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)s->br_startoff << 9) | + ((xfs_bmbt_rec_base_t)s->br_startblock >> 43), &r->l0); + put_unaligned_be64( + ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | + ((xfs_bmbt_rec_base_t)s->br_blockcount & + (xfs_bmbt_rec_base_t)xfs_mask64lo(21)), &r->l1); } /* @@ -464,10 +272,10 @@ xfs_bmbt_alloc_block( cur->bc_private.b.dfops->dop_low = true; } if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } + ASSERT(args.len == 1); cur->bc_private.b.firstblock = args.fsbno; cur->bc_private.b.allocated++; @@ -478,12 +286,10 @@ xfs_bmbt_alloc_block( new->l = cpu_to_be64(args.fsbno); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -617,33 +423,29 @@ xfs_bmbt_diff_two_keys( be64_to_cpu(k2->bmbt.br_startoff); } -static bool +static xfs_failaddr_t xfs_bmbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; unsigned int level; switch (block->bb_magic) { case cpu_to_be32(XFS_BMAP_CRC_MAGIC): - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) - return false; - if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) - return false; /* * XXX: need a better way of verifying the owner here. Right now * just make sure there has been one set. */ - if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) - return false; + fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_BMAP_MAGIC): break; default: - return false; + return __this_address; } /* @@ -655,46 +457,39 @@ xfs_bmbt_verify( */ level = be16_to_cpu(block->bb_level); if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) - return false; - if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) - return false; - - /* sibling pointer verification */ - if (!block->bb_u.l.bb_leftsib || - (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) - return false; - if (!block->bb_u.l.bb_rightsib || - (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) - return false; - - return true; + return __this_address; + + return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]); } static void xfs_bmbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_lblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_bmbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_bmbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_bmbt_write_verify( struct xfs_buf *bp) { - if (!xfs_bmbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_bmbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_lblock_calc_crc(bp); @@ -704,6 +499,7 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = { .name = "xfs_bmbt", .verify_read = xfs_bmbt_read_verify, .verify_write = xfs_bmbt_write_verify, + .verify_struct = xfs_bmbt_verify, }; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 9da5a8d4f184..e4505746ccaa 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -98,25 +98,11 @@ struct xfs_trans; */ extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int, struct xfs_btree_block *, int); -extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); -extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); -extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); -extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); -extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); +void xfs_bmbt_disk_set_all(struct xfs_bmbt_rec *r, struct xfs_bmbt_irec *s); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); - -extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); -extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, - xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); -extern void xfs_bmbt_set_blockcount(xfs_bmbt_rec_host_t *r, xfs_filblks_t v); -extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v); -extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v); -extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v); - -extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, - xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); +extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, xfs_bmdr_block_t *, int); @@ -132,18 +118,4 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); -/* - * Check that the extent does not contain an invalid unwritten extent flag. - */ -static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork, - struct xfs_bmbt_rec_host *ep) -{ - if (ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN) == 0) - return true; - if (whichfork == XFS_DATA_FORK && - xfs_sb_version_hasextflgbit(&mp->m_sb)) - return true; - return false; -} - #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 5bfb88261c7e..edc0193358a5 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -29,6 +29,7 @@ #include "xfs_inode_item.h" #include "xfs_buf_item.h" #include "xfs_btree.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" @@ -63,44 +64,63 @@ xfs_btree_magic( return magic; } -STATIC int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lblock( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_block *block, /* btree long form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp) /* buffer for block, if any */ +/* + * Check a long btree block header. Return the address of the failing check, + * or NULL if everything is ok. + */ +xfs_failaddr_t +__xfs_btree_check_lblock( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + int level, + struct xfs_buf *bp) { - int lblock_ok = 1; /* block passes checks */ - struct xfs_mount *mp; /* file system mount point */ + struct xfs_mount *mp = cur->bc_mp; xfs_btnum_t btnum = cur->bc_btnum; - int crc; - - mp = cur->bc_mp; - crc = xfs_sb_version_hascrc(&mp->m_sb); + int crc = xfs_sb_version_hascrc(&mp->m_sb); if (crc) { - lblock_ok = lblock_ok && - uuid_equal(&block->bb_u.l.bb_uuid, - &mp->m_sb.sb_meta_uuid) && - block->bb_u.l.bb_blkno == cpu_to_be64( - bp ? bp->b_bn : XFS_BUF_DADDR_NULL); + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + if (block->bb_u.l.bb_blkno != + cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) + return __this_address; + if (block->bb_u.l.bb_pad != cpu_to_be32(0)) + return __this_address; } - lblock_ok = lblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && - be16_to_cpu(block->bb_level) == level && - be16_to_cpu(block->bb_numrecs) <= - cur->bc_ops->get_maxrecs(cur, level) && - block->bb_u.l.bb_leftsib && - (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) || - XFS_FSB_SANITY_CHECK(mp, - be64_to_cpu(block->bb_u.l.bb_leftsib))) && - block->bb_u.l.bb_rightsib && - (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) || - XFS_FSB_SANITY_CHECK(mp, - be64_to_cpu(block->bb_u.l.bb_rightsib))); - - if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, + if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) + return __this_address; + if (be16_to_cpu(block->bb_level) != level) + return __this_address; + if (be16_to_cpu(block->bb_numrecs) > + cur->bc_ops->get_maxrecs(cur, level)) + return __this_address; + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib), + level + 1)) + return __this_address; + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib), + level + 1)) + return __this_address; + + return NULL; +} + +/* Check a long btree block header. */ +static int +xfs_btree_check_lblock( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + int level, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = cur->bc_mp; + xfs_failaddr_t fa; + + fa = __xfs_btree_check_lblock(cur, block, level, bp); + if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK))) { if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -110,48 +130,61 @@ xfs_btree_check_lblock( return 0; } -STATIC int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sblock( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_block *block, /* btree short form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp) /* buffer containing block */ +/* + * Check a short btree block header. Return the address of the failing check, + * or NULL if everything is ok. + */ +xfs_failaddr_t +__xfs_btree_check_sblock( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + int level, + struct xfs_buf *bp) { - struct xfs_mount *mp; /* file system mount point */ - struct xfs_buf *agbp; /* buffer for ag. freespace struct */ - struct xfs_agf *agf; /* ag. freespace structure */ - xfs_agblock_t agflen; /* native ag. freespace length */ - int sblock_ok = 1; /* block passes checks */ + struct xfs_mount *mp = cur->bc_mp; xfs_btnum_t btnum = cur->bc_btnum; - int crc; - - mp = cur->bc_mp; - crc = xfs_sb_version_hascrc(&mp->m_sb); - agbp = cur->bc_private.a.agbp; - agf = XFS_BUF_TO_AGF(agbp); - agflen = be32_to_cpu(agf->agf_length); + int crc = xfs_sb_version_hascrc(&mp->m_sb); if (crc) { - sblock_ok = sblock_ok && - uuid_equal(&block->bb_u.s.bb_uuid, - &mp->m_sb.sb_meta_uuid) && - block->bb_u.s.bb_blkno == cpu_to_be64( - bp ? bp->b_bn : XFS_BUF_DADDR_NULL); + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + if (block->bb_u.s.bb_blkno != + cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) + return __this_address; } - sblock_ok = sblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && - be16_to_cpu(block->bb_level) == level && - be16_to_cpu(block->bb_numrecs) <= - cur->bc_ops->get_maxrecs(cur, level) && - (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || - be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && - block->bb_u.s.bb_leftsib && - (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || - be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && - block->bb_u.s.bb_rightsib; - - if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, + if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) + return __this_address; + if (be16_to_cpu(block->bb_level) != level) + return __this_address; + if (be16_to_cpu(block->bb_numrecs) > + cur->bc_ops->get_maxrecs(cur, level)) + return __this_address; + if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib), + level + 1)) + return __this_address; + if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib), + level + 1)) + return __this_address; + + return NULL; +} + +/* Check a short btree block header. */ +STATIC int +xfs_btree_check_sblock( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + int level, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = cur->bc_mp; + xfs_failaddr_t fa; + + fa = __xfs_btree_check_sblock(cur, block, level, bp); + if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK))) { if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -177,59 +210,53 @@ xfs_btree_check_block( return xfs_btree_check_sblock(cur, block, level, bp); } -/* - * Check that (long) pointer is ok. - */ -int /* error (0 or EFSCORRUPTED) */ +/* Check that this long pointer is valid and points within the fs. */ +bool xfs_btree_check_lptr( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_fsblock_t bno, /* btree block disk address */ - int level) /* btree block level */ + struct xfs_btree_cur *cur, + xfs_fsblock_t fsbno, + int level) { - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, - level > 0 && - bno != NULLFSBLOCK && - XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); - return 0; + if (level <= 0) + return false; + return xfs_verify_fsbno(cur->bc_mp, fsbno); } -#ifdef DEBUG -/* - * Check that (short) pointer is ok. - */ -STATIC int /* error (0 or EFSCORRUPTED) */ +/* Check that this short pointer is valid and points within the AG. */ +bool xfs_btree_check_sptr( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t bno, /* btree block disk address */ - int level) /* btree block level */ + struct xfs_btree_cur *cur, + xfs_agblock_t agbno, + int level) { - xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; - - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, - level > 0 && - bno != NULLAGBLOCK && - bno != 0 && - bno < agblocks); - return 0; + if (level <= 0) + return false; + return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno); } +#ifdef DEBUG /* - * Check that block ptr is ok. + * Check that a given (indexed) btree pointer at a certain level of a + * btree is valid and doesn't point past where it should. */ -STATIC int /* error (0 or EFSCORRUPTED) */ +static int xfs_btree_check_ptr( - struct xfs_btree_cur *cur, /* btree cursor */ - union xfs_btree_ptr *ptr, /* btree block disk address */ - int index, /* offset from ptr to check */ - int level) /* btree block level */ + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int index, + int level) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - return xfs_btree_check_lptr(cur, - be64_to_cpu((&ptr->l)[index]), level); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, + xfs_btree_check_lptr(cur, + be64_to_cpu((&ptr->l)[index]), level)); } else { - return xfs_btree_check_sptr(cur, - be32_to_cpu((&ptr->s)[index]), level); + XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, + xfs_btree_check_sptr(cur, + be32_to_cpu((&ptr->s)[index]), level)); } + + return 0; } #endif @@ -246,7 +273,7 @@ xfs_btree_lblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) return; @@ -284,7 +311,7 @@ xfs_btree_sblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) return; @@ -302,7 +329,7 @@ xfs_btree_sblock_verify_crc( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) - return false; + return __this_address; return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); } @@ -826,7 +853,7 @@ xfs_btree_read_bufl( xfs_daddr_t d; /* real disk block address */ int error; - if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) + if (!xfs_verify_fsbno(mp, fsbno)) return -EFSCORRUPTED; d = XFS_FSB_TO_DADDR(mp, fsbno); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, @@ -1027,7 +1054,7 @@ xfs_btree_setbuf( } } -STATIC int +bool xfs_btree_ptr_is_null( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) @@ -1052,7 +1079,7 @@ xfs_btree_set_ptr_null( /* * Get/set/init sibling pointers */ -STATIC void +void xfs_btree_get_sibling( struct xfs_btree_cur *cur, struct xfs_btree_block *block, @@ -1411,8 +1438,6 @@ xfs_btree_log_keys( int first, int last) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); if (bp) { xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); @@ -1423,8 +1448,6 @@ xfs_btree_log_keys( xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, xfs_ilog_fbroot(cur->bc_private.b.whichfork)); } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } /* @@ -1437,15 +1460,12 @@ xfs_btree_log_recs( int first, int last) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(cur->bc_tp, bp, xfs_btree_rec_offset(cur, first), xfs_btree_rec_offset(cur, last + 1) - 1); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } /* @@ -1458,8 +1478,6 @@ xfs_btree_log_ptrs( int first, /* index of first pointer to log */ int last) /* index of last pointer to log */ { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); if (bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); @@ -1474,7 +1492,6 @@ xfs_btree_log_ptrs( xfs_ilog_fbroot(cur->bc_private.b.whichfork)); } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } /* @@ -1516,9 +1533,6 @@ xfs_btree_log_block( XFS_BTREE_LBLOCK_CRC_LEN }; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGBI(cur, bp, fields); - if (bp) { int nbits; @@ -1546,8 +1560,6 @@ xfs_btree_log_block( xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, xfs_ilog_fbroot(cur->bc_private.b.whichfork)); } - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } /* @@ -1566,9 +1578,6 @@ xfs_btree_increment( int error; /* error return value */ int lev; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - ASSERT(level < cur->bc_nlevels); /* Read-ahead to the right at this level. */ @@ -1644,17 +1653,14 @@ xfs_btree_increment( cur->bc_ptrs[lev] = 1; } out1: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -1674,9 +1680,6 @@ xfs_btree_decrement( int lev; union xfs_btree_ptr ptr; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - ASSERT(level < cur->bc_nlevels); /* Read-ahead to the left at this level. */ @@ -1742,17 +1745,14 @@ xfs_btree_decrement( cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); } out1: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -1854,9 +1854,6 @@ xfs_btree_lookup( union xfs_btree_ptr *pp; /* ptr to btree block */ union xfs_btree_ptr ptr; /* ptr to btree block */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, dir); - XFS_BTREE_STATS_INC(cur, lookup); /* No such thing as a zero-level tree. */ @@ -1902,7 +1899,6 @@ xfs_btree_lookup( ASSERT(level == 0 && cur->bc_nlevels == 1); cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -1977,7 +1973,6 @@ xfs_btree_lookup( if (error) goto error0; XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; } @@ -1992,16 +1987,14 @@ xfs_btree_lookup( *stat = 1; else *stat = 0; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } /* Find the high key storage area from a regular key. */ -STATIC union xfs_btree_key * +union xfs_btree_key * xfs_btree_high_key_from_key( struct xfs_btree_cur *cur, union xfs_btree_key *key) @@ -2075,7 +2068,7 @@ xfs_btree_get_node_keys( } /* Derive the keys for any btree block. */ -STATIC void +void xfs_btree_get_keys( struct xfs_btree_cur *cur, struct xfs_btree_block *block, @@ -2142,10 +2135,8 @@ __xfs_btree_updkeys( trace_xfs_btree_updkeys(cur, level, bp); #ifdef DEBUG error = xfs_btree_check_block(cur, block, level, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } #endif ptr = cur->bc_ptrs[level]; nlkey = xfs_btree_key_addr(cur, ptr, block); @@ -2197,9 +2188,6 @@ xfs_btree_update_keys( if (cur->bc_flags & XFS_BTREE_OVERLAPPING) return __xfs_btree_updkeys(cur, level, block, bp, false); - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGIK(cur, level, keyp); - /* * Go up the tree from this level toward the root. * At each level, update the key value to the value input. @@ -2214,10 +2202,8 @@ xfs_btree_update_keys( block = xfs_btree_get_block(cur, level, &bp); #ifdef DEBUG error = xfs_btree_check_block(cur, block, level, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } #endif ptr = cur->bc_ptrs[level]; kp = xfs_btree_key_addr(cur, ptr, block); @@ -2225,7 +2211,6 @@ xfs_btree_update_keys( xfs_btree_log_keys(cur, bp, ptr, ptr); } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } @@ -2245,9 +2230,6 @@ xfs_btree_update( int ptr; union xfs_btree_rec *rp; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGR(cur, rec); - /* Pick up the current block. */ block = xfs_btree_get_block(cur, 0, &bp); @@ -2280,11 +2262,9 @@ xfs_btree_update( goto error0; } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -2312,9 +2292,6 @@ xfs_btree_lshift( int error; /* error return value */ int i; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && level == cur->bc_nlevels - 1) goto out0; @@ -2473,21 +2450,17 @@ xfs_btree_lshift( /* Slide the cursor value left one. */ cur->bc_ptrs[level]--; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; error1: - XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; } @@ -2514,9 +2487,6 @@ xfs_btree_rshift( int error; /* error return value */ int i; /* loop counter */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && (level == cur->bc_nlevels - 1)) goto out0; @@ -2649,21 +2619,17 @@ xfs_btree_rshift( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; error1: - XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; } @@ -2699,9 +2665,6 @@ __xfs_btree_split( int i; #endif - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); - XFS_BTREE_STATS_INC(cur, split); /* Set up left block (current one). */ @@ -2851,16 +2814,13 @@ __xfs_btree_split( (*curp)->bc_ptrs[level + 1]++; } *ptrp = rptr; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -2967,7 +2927,6 @@ xfs_btree_new_iroot( int i; /* loop counter */ #endif - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); XFS_BTREE_STATS_INC(cur, newroot); ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); @@ -2981,10 +2940,9 @@ xfs_btree_new_iroot( error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); if (error) goto error0; - if (*stat == 0) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + if (*stat == 0) return 0; - } + XFS_BTREE_STATS_INC(cur, alloc); /* Copy the root into a real block. */ @@ -3047,10 +3005,8 @@ xfs_btree_new_iroot( *logflags |= XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); *stat = 1; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -3075,7 +3031,6 @@ xfs_btree_new_root( union xfs_btree_ptr rptr; union xfs_btree_ptr lptr; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); XFS_BTREE_STATS_INC(cur, newroot); /* initialise our start point from the cursor */ @@ -3175,14 +3130,11 @@ xfs_btree_new_root( xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); cur->bc_ptrs[cur->bc_nlevels] = nptr; cur->bc_nlevels++; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -3203,7 +3155,7 @@ xfs_btree_make_block_unfull( if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && level == cur->bc_nlevels - 1) { - struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_inode *ip = cur->bc_private.b.ip; if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { /* A root block that can be made bigger. */ @@ -3282,9 +3234,6 @@ xfs_btree_insrec( #endif xfs_daddr_t old_bn; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec); - ncur = NULL; lkey = &nkey; @@ -3297,14 +3246,12 @@ xfs_btree_insrec( error = xfs_btree_new_root(cur, stat); xfs_btree_set_ptr_null(cur, ptrp); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return error; } /* If we're off the left edge, return failure. */ ptr = cur->bc_ptrs[level]; if (ptr == 0) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -3462,12 +3409,10 @@ xfs_btree_insrec( *curp = ncur; } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -3545,11 +3490,9 @@ xfs_btree_insert( } } while (!xfs_btree_ptr_is_null(cur, &nptr)); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = i; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -3584,8 +3527,6 @@ xfs_btree_kill_iroot( int i; #endif - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); ASSERT(cur->bc_nlevels > 1); @@ -3643,19 +3584,15 @@ xfs_btree_kill_iroot( #ifdef DEBUG for (i = 0; i < numrecs; i++) { error = xfs_btree_check_ptr(cur, cpp, i, level - 1); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } } #endif xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); error = xfs_btree_free_block(cur, cbp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } cur->bc_bufs[level - 1] = NULL; be16_add_cpu(&block->bb_level, -1); @@ -3663,7 +3600,6 @@ xfs_btree_kill_iroot( XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); cur->bc_nlevels--; out0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } @@ -3679,7 +3615,6 @@ xfs_btree_kill_root( { int error; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); XFS_BTREE_STATS_INC(cur, killroot); /* @@ -3689,16 +3624,13 @@ xfs_btree_kill_root( cur->bc_ops->set_root(cur, newroot, -1); error = xfs_btree_free_block(cur, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } cur->bc_bufs[level] = NULL; cur->bc_ra[level] = 0; cur->bc_nlevels--; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } @@ -3717,7 +3649,6 @@ xfs_btree_dec_cursor( return error; } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; } @@ -3753,15 +3684,11 @@ xfs_btree_delrec( struct xfs_btree_cur *tcur; /* temporary btree cursor */ int numrecs; /* temporary numrec count */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_TRACE_ARGI(cur, level); - tcur = NULL; /* Get the index of the entry being deleted, check for nothing there. */ ptr = cur->bc_ptrs[level]; if (ptr == 0) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -3778,7 +3705,6 @@ xfs_btree_delrec( /* Fail if we're off the end of the block. */ if (ptr > numrecs) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -4053,7 +3979,7 @@ xfs_btree_delrec( tcur = NULL; if (level == 0) cur->bc_ptrs[0]++; - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; return 0; } @@ -4223,13 +4149,11 @@ xfs_btree_delrec( * call updkeys directly. */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); /* Return value means the next level up has something to do. */ *stat = 2; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); if (tcur) xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; @@ -4250,8 +4174,6 @@ xfs_btree_delete( int i; bool joined = false; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - /* * Go up the tree, starting at leaf level. * @@ -4287,11 +4209,9 @@ xfs_btree_delete( } } - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = i; return 0; error0: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -4502,6 +4422,51 @@ xfs_btree_change_owner( &bbcoi); } +/* Verify the v5 fields of a long-format btree block. */ +xfs_failaddr_t +xfs_btree_lblock_v5hdr_verify( + struct xfs_buf *bp, + uint64_t owner) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return __this_address; + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) + return __this_address; + if (owner != XFS_RMAP_OWN_UNKNOWN && + be64_to_cpu(block->bb_u.l.bb_owner) != owner) + return __this_address; + return NULL; +} + +/* Verify a long-format btree block. */ +xfs_failaddr_t +xfs_btree_lblock_verify( + struct xfs_buf *bp, + unsigned int max_recs) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > max_recs) + return __this_address; + + /* sibling pointer verification */ + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) + return __this_address; + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) + return __this_address; + + return NULL; +} + /** * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format * btree block @@ -4510,7 +4475,7 @@ xfs_btree_change_owner( * @max_recs: pointer to the m_*_mxr max records field in the xfs mount * @pag_max_level: pointer to the per-ag max level field */ -bool +xfs_failaddr_t xfs_btree_sblock_v5hdr_verify( struct xfs_buf *bp) { @@ -4519,14 +4484,14 @@ xfs_btree_sblock_v5hdr_verify( struct xfs_perag *pag = bp->b_pag; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) - return false; + return __this_address; if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) - return false; - return true; + return __this_address; + return NULL; } /** @@ -4535,29 +4500,29 @@ xfs_btree_sblock_v5hdr_verify( * @bp: buffer containing the btree block * @max_recs: maximum records allowed in this btree node */ -bool +xfs_failaddr_t xfs_btree_sblock_verify( struct xfs_buf *bp, unsigned int max_recs) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_agblock_t agno; /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) - return false; + return __this_address; /* sibling pointer verification */ - if (!block->bb_u.s.bb_leftsib || - (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) - return false; - if (!block->bb_u.s.bb_rightsib || - (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) - return false; - - return true; + agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); + if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) + return __this_address; + if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) + return __this_address; + + return NULL; } /* @@ -4914,3 +4879,45 @@ xfs_btree_count_blocks( return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, blocks); } + +/* Compare two btree pointers. */ +int64_t +xfs_btree_diff_two_ptrs( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *a, + const union xfs_btree_ptr *b) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); + return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); +} + +/* If there's an extent, we're done. */ +STATIC int +xfs_btree_has_record_helper( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + return XFS_BTREE_QUERY_RANGE_ABORT; +} + +/* Is there a record covering a given range of keys? */ +int +xfs_btree_has_record( + struct xfs_btree_cur *cur, + union xfs_btree_irec *low, + union xfs_btree_irec *high, + bool *exists) +{ + int error; + + error = xfs_btree_query_range(cur, low, high, + &xfs_btree_has_record_helper, NULL); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) { + *exists = true; + return 0; + } + *exists = false; + return error; +} diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index f2a88c3b1159..58e30c0975c3 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -255,6 +255,14 @@ typedef struct xfs_btree_cur */ #define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr)) +/* + * Internal long and short btree block checks. They return NULL if the + * block is ok or the address of the failed check otherwise. + */ +xfs_failaddr_t __xfs_btree_check_lblock(struct xfs_btree_cur *cur, + struct xfs_btree_block *block, int level, struct xfs_buf *bp); +xfs_failaddr_t __xfs_btree_check_sblock(struct xfs_btree_cur *cur, + struct xfs_btree_block *block, int level, struct xfs_buf *bp); /* * Check that block header is ok. @@ -269,10 +277,19 @@ xfs_btree_check_block( /* * Check that (long) pointer is ok. */ -int /* error (0 or EFSCORRUPTED) */ +bool /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lptr( struct xfs_btree_cur *cur, /* btree cursor */ - xfs_fsblock_t ptr, /* btree block disk address */ + xfs_fsblock_t fsbno, /* btree block disk address */ + int level); /* btree block level */ + +/* + * Check that (short) pointer is ok. + */ +bool /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_sptr( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t agbno, /* btree block disk address */ int level); /* btree block level */ /* @@ -456,31 +473,14 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) -#define XFS_FSB_SANITY_CHECK(mp,fsb) \ - (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ - XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) +xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); +xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp, + unsigned int max_recs); +xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, + uint64_t owner); +xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, + unsigned int max_recs); -/* - * Trace hooks. Currently not implemented as they need to be ported - * over to the generic tracing functionality, which is some effort. - * - * i,j = integer (32 bit) - * b = btree block buffer (xfs_buf_t) - * p = btree ptr - * r = btree record - * k = btree key - */ -#define XFS_BTREE_TRACE_ARGBI(c, b, i) -#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) -#define XFS_BTREE_TRACE_ARGI(c, i) -#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) -#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) -#define XFS_BTREE_TRACE_ARGIK(c, i, k) -#define XFS_BTREE_TRACE_ARGR(c, r) -#define XFS_BTREE_TRACE_CURSOR(c, t) - -bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); -bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, unsigned long len); xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits, @@ -517,5 +517,18 @@ int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level, union xfs_btree_ptr *pp, struct xfs_btree_block **blkp); struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur, int level, struct xfs_buf **bpp); +bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr); +int64_t xfs_btree_diff_two_ptrs(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *a, + const union xfs_btree_ptr *b); +void xfs_btree_get_sibling(struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_ptr *ptr, int lr); +void xfs_btree_get_keys(struct xfs_btree_cur *cur, + struct xfs_btree_block *block, union xfs_btree_key *key); +union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, + union xfs_btree_key *key); +int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, + union xfs_btree_irec *high, bool *exists); #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h index 8211f48b98e6..999a290cfd72 100644 --- a/fs/xfs/libxfs/xfs_cksum.h +++ b/fs/xfs/libxfs/xfs_cksum.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _XFS_CKSUM_H #define _XFS_CKSUM_H 1 diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 6d4335815c3f..ea187b4a7991 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -128,7 +128,7 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_zone_free(xfs_da_state_zone, state); } -static bool +static xfs_failaddr_t xfs_da3_node_verify( struct xfs_buf *bp) { @@ -145,24 +145,24 @@ xfs_da3_node_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (ichdr.magic != XFS_DA3_NODE_MAGIC) - return false; + return __this_address; if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) - return false; + return __this_address; } else { if (ichdr.magic != XFS_DA_NODE_MAGIC) - return false; + return __this_address; } if (ichdr.level == 0) - return false; + return __this_address; if (ichdr.level > XFS_DA_NODE_MAXDEPTH) - return false; + return __this_address; if (ichdr.count == 0) - return false; + return __this_address; /* * we don't know if the node is for and attribute or directory tree, @@ -170,11 +170,11 @@ xfs_da3_node_verify( */ if (ichdr.count > mp->m_dir_geo->node_ents && ichdr.count > mp->m_attr_geo->node_ents) - return false; + return __this_address; /* XXX: hash order check? */ - return true; + return NULL; } static void @@ -182,12 +182,13 @@ xfs_da3_node_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_da3_node_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -211,19 +212,20 @@ xfs_da3_node_read_verify( struct xfs_buf *bp) { struct xfs_da_blkinfo *info = bp->b_addr; + xfs_failaddr_t fa; switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC, + __this_address); break; } /* fall through */ case XFS_DA_NODE_MAGIC: - if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - break; - } + fa = xfs_da3_node_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: @@ -236,18 +238,40 @@ xfs_da3_node_read_verify( bp->b_ops->verify_read(bp); return; default: - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } +} - /* corrupt block */ - xfs_verifier_error(bp); +/* Verify the structure of a da3 block. */ +static xfs_failaddr_t +xfs_da3_node_verify_struct( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DA3_NODE_MAGIC: + case XFS_DA_NODE_MAGIC: + return xfs_da3_node_verify(bp); + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + bp->b_ops = &xfs_attr3_leaf_buf_ops; + return bp->b_ops->verify_struct(bp); + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + bp->b_ops = &xfs_dir3_leafn_buf_ops; + return bp->b_ops->verify_struct(bp); + default: + return __this_address; + } } const struct xfs_buf_ops xfs_da3_node_buf_ops = { .name = "xfs_da3_node", .verify_read = xfs_da3_node_read_verify, .verify_write = xfs_da3_node_write_verify, + .verify_struct = xfs_da3_node_verify_struct, }; int @@ -1466,6 +1490,7 @@ xfs_da3_node_lookup_int( int max; int error; int retval; + unsigned int expected_level = 0; struct xfs_inode *dp = state->args->dp; args = state->args; @@ -1474,7 +1499,7 @@ xfs_da3_node_lookup_int( * Descend thru the B-tree searching each level for the right * node to use, until the right hashval is found. */ - blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0; + blkno = args->geo->leafblk; for (blk = &state->path.blk[0], state->path.active = 1; state->path.active <= XFS_DA_NODE_MAXDEPTH; blk++, state->path.active++) { @@ -1517,6 +1542,18 @@ xfs_da3_node_lookup_int( dp->d_ops->node_hdr_from_disk(&nodehdr, node); btree = dp->d_ops->node_tree_p(node); + /* Tree taller than we can handle; bail out! */ + if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) + return -EFSCORRUPTED; + + /* Check the level from the root. */ + if (blkno == args->geo->leafblk) + expected_level = nodehdr.level - 1; + else if (expected_level != nodehdr.level) + return -EFSCORRUPTED; + else + expected_level--; + max = nodehdr.count; blk->hashval = be32_to_cpu(btree[max - 1].hashval); @@ -1562,8 +1599,15 @@ xfs_da3_node_lookup_int( blk->index = probe; blkno = be32_to_cpu(btree[probe].before); } + + /* We can't point back to the root. */ + if (blkno == args->geo->leafblk) + return -EFSCORRUPTED; } + if (expected_level != 0) + return -EFSCORRUPTED; + /* * A leaf block that ends in the hashval that we are interested in * (final hashval == search hashval) means that the next block may diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 3771edcb301d..7e77299b7789 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -875,4 +875,10 @@ struct xfs_attr3_rmt_hdr { ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ sizeof(struct xfs_attr3_rmt_hdr) : 0)) +/* Number of bytes in a directory block. */ +static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp) +{ + return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog); +} + #endif /* __XFS_DA_FORMAT_H__ */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 072ebfe1d6ae..087fea02c389 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -249,6 +249,10 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); + /* Hold the (previously bjoin'd) buffer locked across the roll. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) + xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); + trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); /* Roll the transaction. */ @@ -264,6 +268,12 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { + xfs_trans_bjoin(*tp, dop->dop_bufs[i]); + xfs_trans_bhold(*tp, dop->dop_bufs[i]); + } + return error; } @@ -295,6 +305,31 @@ xfs_defer_ijoin( } } + ASSERT(0); + return -EFSCORRUPTED; +} + +/* + * Add this buffer to the deferred op. Each joined buffer is relogged + * each time we roll the transaction. + */ +int +xfs_defer_bjoin( + struct xfs_defer_ops *dop, + struct xfs_buf *bp) +{ + int i; + + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { + if (dop->dop_bufs[i] == bp) + return 0; + else if (dop->dop_bufs[i] == NULL) { + dop->dop_bufs[i] = bp; + return 0; + } + } + + ASSERT(0); return -EFSCORRUPTED; } @@ -493,9 +528,7 @@ xfs_defer_init( struct xfs_defer_ops *dop, xfs_fsblock_t *fbp) { - dop->dop_committed = false; - dop->dop_low = false; - memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); + memset(dop, 0, sizeof(struct xfs_defer_ops)); *fbp = NULLFSBLOCK; INIT_LIST_HEAD(&dop->dop_intake); INIT_LIST_HEAD(&dop->dop_pending); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index d4f046dd44bd..045beacdd37d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -59,6 +59,7 @@ enum xfs_defer_ops_type { }; #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ struct xfs_defer_ops { bool dop_committed; /* did any trans commit? */ @@ -66,8 +67,9 @@ struct xfs_defer_ops { struct list_head dop_intake; /* unlogged pending work */ struct list_head dop_pending; /* logged pending work */ - /* relog these inodes with each roll */ + /* relog these with each roll */ struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; + struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; }; void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, @@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop); void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); +int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); /* Description of a deferred type. */ struct xfs_defer_op_type { diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index ccf9783fd3f0..92f94e190f04 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -30,6 +30,8 @@ #include "xfs_bmap.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" +#include "xfs_ialloc.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -38,7 +40,9 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; /* * Convert inode mode to directory entry filetype */ -unsigned char xfs_mode_to_ftype(int mode) +unsigned char +xfs_mode_to_ftype( + int mode) { switch (mode & S_IFMT) { case S_IFREG: @@ -115,8 +119,7 @@ xfs_da_mount( ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); - ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= - XFS_MAX_BLOCKSIZE); + ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE); mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); @@ -136,7 +139,7 @@ xfs_da_mount( dageo = mp->m_dir_geo; dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; dageo->fsblog = mp->m_sb.sb_blocklog; - dageo->blksize = 1 << dageo->blklog; + dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb); dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; /* @@ -202,22 +205,8 @@ xfs_dir_ino_validate( xfs_mount_t *mp, xfs_ino_t ino) { - xfs_agblock_t agblkno; - xfs_agino_t agino; - xfs_agnumber_t agno; - int ino_ok; - int ioff; - - agno = XFS_INO_TO_AGNO(mp, ino); - agblkno = XFS_INO_TO_AGBNO(mp, ino); - ioff = XFS_INO_TO_OFFSET(mp, ino); - agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff); - ino_ok = - agno < mp->m_sb.sb_agcount && - agblkno < mp->m_sb.sb_agblocks && - agblkno != 0 && - ioff < (1 << mp->m_sb.sb_inopblog) && - XFS_AGINO_TO_INO(mp, agno, agino) == ino; + bool ino_ok = xfs_verify_dir_ino(mp, ino); + if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) { xfs_warn(mp, "Invalid inode number 0x%Lx", (unsigned long long) ino); diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 21c8f8bf94d5..989e95a53db2 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -173,7 +173,7 @@ extern void xfs_dir2_data_log_unused(struct xfs_da_args *args, extern void xfs_dir2_data_make_free(struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_da_args *args, +extern int xfs_dir2_data_use_free(struct xfs_da_args *args, struct xfs_buf *bp, struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); @@ -324,4 +324,23 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) sizeof(struct xfs_dir2_leaf_tail)); } +/* + * The Linux API doesn't pass down the total size of the buffer + * we read into down to the filesystem. With the filldir concept + * it's not needed for correct information, but the XFS dir2 leaf + * code wants an estimate of the buffer size to calculate it's + * readahead window and size the buffers used for mapping to + * physical blocks. + * + * Try to give it an estimate that's good enough, maybe at some + * point we can change the ->readdir prototype to include the + * buffer size. For now we use the current glibc buffer size. + * musl libc hardcodes 2k and dietlibc uses PAGE_SIZE. + */ +#define XFS_READDIR_BUFSIZE (32768) + +unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype); +void *xfs_dir3_data_endp(struct xfs_da_geometry *geo, + struct xfs_dir2_data_hdr *hdr); + #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 43c902f7a68d..875893ded514 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -58,7 +58,7 @@ xfs_dir_startup(void) xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); } -static bool +static xfs_failaddr_t xfs_dir3_block_verify( struct xfs_buf *bp) { @@ -67,20 +67,18 @@ xfs_dir3_block_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) - return false; + return __this_address; } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; + return __xfs_dir3_data_check(NULL, bp); } static void @@ -88,15 +86,16 @@ xfs_dir3_block_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_block_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_block_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -104,12 +103,13 @@ xfs_dir3_block_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_block_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_block_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -126,6 +126,7 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = { .name = "xfs_dir3_block", .verify_read = xfs_dir3_block_read_verify, .verify_write = xfs_dir3_block_write_verify, + .verify_struct = xfs_dir3_block_verify, }; int @@ -450,15 +451,19 @@ xfs_dir2_block_addname( * No stale entries, will use enddup space to hold new leaf. */ if (!btp->stale) { + xfs_dir2_data_aoff_t aoff; + /* * Mark the space needed for the new leaf entry, now in use. */ - xfs_dir2_data_use_free(args, bp, enddup, - (xfs_dir2_data_aoff_t) - ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - - sizeof(*blp)), - (xfs_dir2_data_aoff_t)sizeof(*blp), - &needlog, &needscan); + aoff = (xfs_dir2_data_aoff_t)((char *)enddup - (char *)hdr + + be16_to_cpu(enddup->length) - sizeof(*blp)); + error = xfs_dir2_data_use_free(args, bp, enddup, aoff, + (xfs_dir2_data_aoff_t)sizeof(*blp), &needlog, + &needscan); + if (error) + return error; + /* * Update the tail (entry count). */ @@ -540,9 +545,11 @@ xfs_dir2_block_addname( /* * Mark space for the data entry used. */ - xfs_dir2_data_use_free(args, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), - (xfs_dir2_data_aoff_t)len, &needlog, &needscan); + error = xfs_dir2_data_use_free(args, bp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + (xfs_dir2_data_aoff_t)len, &needlog, &needscan); + if (error) + return error; /* * Create the new data entry. */ @@ -996,8 +1003,10 @@ xfs_dir2_leaf_to_block( /* * Use up the space at the end of the block (blp/btp). */ - xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size, - &needlog, &needscan); + error = xfs_dir2_data_use_free(args, dbp, dup, + args->geo->blksize - size, size, &needlog, &needscan); + if (error) + return error; /* * Initialize the block tail. */ @@ -1109,18 +1118,14 @@ xfs_dir2_sf_to_block( * Add block 0 to the inode. */ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); - if (error) { - kmem_free(sfp); - return error; - } + if (error) + goto out_free; /* * Initialize the data block, then convert it to block format. */ error = xfs_dir3_data_init(args, blkno, &bp); - if (error) { - kmem_free(sfp); - return error; - } + if (error) + goto out_free; xfs_dir3_block_init(mp, tp, bp, dp); hdr = bp->b_addr; @@ -1135,8 +1140,10 @@ xfs_dir2_sf_to_block( */ dup = dp->d_ops->data_unused_p(hdr); needlog = needscan = 0; - xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, - i, &needlog, &needscan); + error = xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, + i, &needlog, &needscan); + if (error) + goto out_free; ASSERT(needscan == 0); /* * Fill in the tail. @@ -1149,9 +1156,11 @@ xfs_dir2_sf_to_block( /* * Remove the freespace, we'll manage it. */ - xfs_dir2_data_use_free(args, bp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), - be16_to_cpu(dup->length), &needlog, &needscan); + error = xfs_dir2_data_use_free(args, bp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + be16_to_cpu(dup->length), &needlog, &needscan); + if (error) + goto out_free; /* * Create entry for . */ @@ -1255,4 +1264,7 @@ xfs_dir2_sf_to_block( xfs_dir2_block_log_tail(tp, bp); xfs_dir3_data_check(dp, bp); return 0; +out_free: + kmem_free(sfp); + return error; } diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 8727a43115ef..cb67ec730b9b 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -36,9 +36,9 @@ /* * Check the consistency of the data block. * The input can also be a block-format directory. - * Return 0 is the buffer is good, otherwise an error. + * Return NULL if the buffer is good, otherwise the address of the error. */ -int +xfs_failaddr_t __xfs_dir3_data_check( struct xfs_inode *dp, /* incore inode pointer */ struct xfs_buf *bp) /* data block's buffer */ @@ -73,6 +73,14 @@ __xfs_dir3_data_check( */ ops = xfs_dir_get_ops(mp, dp); + /* + * If this isn't a directory, or we don't get handed the dir ops, + * something is seriously wrong. Bail out. + */ + if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) || + ops != xfs_dir_get_ops(mp, NULL)) + return __this_address; + hdr = bp->b_addr; p = (char *)ops->data_entry_p(hdr); @@ -81,7 +89,6 @@ __xfs_dir3_data_check( case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): btp = xfs_dir2_block_tail_p(geo, hdr); lep = xfs_dir2_block_leaf_p(btp); - endp = (char *)lep; /* * The number of leaf entries is limited by the size of the @@ -90,17 +97,19 @@ __xfs_dir3_data_check( * so just ensure that the count falls somewhere inside the * block right now. */ - XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) < - ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); + if (be32_to_cpu(btp->count) >= + ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)) + return __this_address; break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): - endp = (char *)hdr + geo->blksize; break; default: - XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; + return __this_address; } + endp = xfs_dir3_data_endp(geo, hdr); + if (!endp) + return __this_address; /* * Account for zero bestfree entries. @@ -108,22 +117,25 @@ __xfs_dir3_data_check( bf = ops->data_bestfree_p(hdr); count = lastfree = freeseen = 0; if (!bf[0].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset); + if (bf[0].offset) + return __this_address; freeseen |= 1 << 0; } if (!bf[1].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset); + if (bf[1].offset) + return __this_address; freeseen |= 1 << 1; } if (!bf[2].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset); + if (bf[2].offset) + return __this_address; freeseen |= 1 << 2; } - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >= - be16_to_cpu(bf[1].length)); - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >= - be16_to_cpu(bf[2].length)); + if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length)) + return __this_address; + if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length)) + return __this_address; /* * Loop over the data/unused entries. */ @@ -135,22 +147,23 @@ __xfs_dir3_data_check( * doesn't need to be there. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= - p + be16_to_cpu(dup->length)); - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == - (char *)dup - (char *)hdr); + if (lastfree != 0) + return __this_address; + if (endp < p + be16_to_cpu(dup->length)) + return __this_address; + if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != + (char *)dup - (char *)hdr) + return __this_address; dfp = xfs_dir2_data_freefind(hdr, bf, dup); if (dfp) { i = (int)(dfp - bf); - XFS_WANT_CORRUPTED_RETURN(mp, - (freeseen & (1 << i)) == 0); + if ((freeseen & (1 << i)) != 0) + return __this_address; freeseen |= 1 << i; } else { - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(dup->length) <= - be16_to_cpu(bf[2].length)); + if (be16_to_cpu(dup->length) > + be16_to_cpu(bf[2].length)) + return __this_address; } p += be16_to_cpu(dup->length); lastfree = 1; @@ -163,16 +176,17 @@ __xfs_dir3_data_check( * The linear search is crude but this is DEBUG code. */ dep = (xfs_dir2_data_entry_t *)p; - XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); - XFS_WANT_CORRUPTED_RETURN(mp, - !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= - p + ops->data_entsize(dep->namelen)); - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(*ops->data_entry_tag_p(dep)) == - (char *)dep - (char *)hdr); - XFS_WANT_CORRUPTED_RETURN(mp, - ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); + if (dep->namelen == 0) + return __this_address; + if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) + return __this_address; + if (endp < p + ops->data_entsize(dep->namelen)) + return __this_address; + if (be16_to_cpu(*ops->data_entry_tag_p(dep)) != + (char *)dep - (char *)hdr) + return __this_address; + if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX) + return __this_address; count++; lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -188,34 +202,52 @@ __xfs_dir3_data_check( be32_to_cpu(lep[i].hashval) == hash) break; } - XFS_WANT_CORRUPTED_RETURN(mp, - i < be32_to_cpu(btp->count)); + if (i >= be32_to_cpu(btp->count)) + return __this_address; } p += ops->data_entsize(dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. */ - XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7); + if (freeseen != 7) + return __this_address; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { if (lep[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; - if (i > 0) - XFS_WANT_CORRUPTED_RETURN(mp, - be32_to_cpu(lep[i].hashval) >= - be32_to_cpu(lep[i - 1].hashval)); + if (i > 0 && be32_to_cpu(lep[i].hashval) < + be32_to_cpu(lep[i - 1].hashval)) + return __this_address; } - XFS_WANT_CORRUPTED_RETURN(mp, count == - be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); - XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale)); + if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)) + return __this_address; + if (stale != be32_to_cpu(btp->stale)) + return __this_address; } - return 0; + return NULL; } -static bool +#ifdef DEBUG +void +xfs_dir3_data_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = __xfs_dir3_data_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} +#endif + +static xfs_failaddr_t xfs_dir3_data_verify( struct xfs_buf *bp) { @@ -224,20 +256,18 @@ xfs_dir3_data_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) - return false; + return __this_address; } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; + return __xfs_dir3_data_check(NULL, bp); } /* @@ -263,8 +293,7 @@ xfs_dir3_data_reada_verify( bp->b_ops->verify_read(bp); return; default: - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } } @@ -274,15 +303,16 @@ xfs_dir3_data_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_data_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_data_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -290,12 +320,13 @@ xfs_dir3_data_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_data_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_data_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -312,6 +343,7 @@ const struct xfs_buf_ops xfs_dir3_data_buf_ops = { .name = "xfs_dir3_data", .verify_read = xfs_dir3_data_read_verify, .verify_write = xfs_dir3_data_write_verify, + .verify_struct = xfs_dir3_data_verify, }; static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { @@ -515,7 +547,6 @@ xfs_dir2_data_freescan_int( struct xfs_dir2_data_hdr *hdr, int *loghead) { - xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* active data entry */ xfs_dir2_data_unused_t *dup; /* unused data entry */ struct xfs_dir2_data_free *bf; @@ -537,12 +568,7 @@ xfs_dir2_data_freescan_int( * Set up pointers. */ p = (char *)ops->data_entry_p(hdr); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(geo, hdr); - endp = (char *)xfs_dir2_block_leaf_p(btp); - } else - endp = (char *)hdr + geo->blksize; + endp = xfs_dir3_data_endp(geo, hdr); /* * Loop over the block's entries. */ @@ -755,17 +781,9 @@ xfs_dir2_data_make_free( /* * Figure out where the end of the data area is. */ - if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - endptr = (char *)hdr + args->geo->blksize; - else { - xfs_dir2_block_tail_t *btp; /* block tail */ + endptr = xfs_dir3_data_endp(args->geo, hdr); + ASSERT(endptr != NULL); - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - btp = xfs_dir2_block_tail_p(args->geo, hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); - } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. @@ -914,10 +932,51 @@ xfs_dir2_data_make_free( *needscanp = needscan; } +/* Check our free data for obvious signs of corruption. */ +static inline xfs_failaddr_t +xfs_dir2_data_check_free( + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_data_unused *dup, + xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len) +{ + if (hdr->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC) && + hdr->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC) && + hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) && + hdr->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) + return __this_address; + if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG) + return __this_address; + if (offset < (char *)dup - (char *)hdr) + return __this_address; + if (offset + len > (char *)dup + be16_to_cpu(dup->length) - (char *)hdr) + return __this_address; + if ((char *)dup - (char *)hdr != + be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))) + return __this_address; + return NULL; +} + +/* Sanity-check a new bestfree entry. */ +static inline xfs_failaddr_t +xfs_dir2_data_check_new_free( + struct xfs_dir2_data_hdr *hdr, + struct xfs_dir2_data_free *dfp, + struct xfs_dir2_data_unused *newdup) +{ + if (dfp == NULL) + return __this_address; + if (dfp->length != newdup->length) + return __this_address; + if (be16_to_cpu(dfp->offset) != (char *)newdup - (char *)hdr) + return __this_address; + return NULL; +} + /* * Take a byte range out of an existing unused space and make it un-free. */ -void +int xfs_dir2_data_use_free( struct xfs_da_args *args, struct xfs_buf *bp, @@ -929,23 +988,19 @@ xfs_dir2_data_use_free( { xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ + xfs_dir2_data_unused_t *newdup; /* new unused entry */ + xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ + struct xfs_dir2_data_free *bf; + xfs_failaddr_t fa; int matchback; /* matches end of freespace */ int matchfront; /* matches start of freespace */ int needscan; /* need to regen bestfree */ - xfs_dir2_data_unused_t *newdup; /* new unused entry */ - xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ int oldlen; /* old unused entry's length */ - struct xfs_dir2_data_free *bf; hdr = bp->b_addr; - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); - ASSERT(offset >= (char *)dup - (char *)hdr); - ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); - ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); + fa = xfs_dir2_data_check_free(hdr, dup, offset, len); + if (fa) + goto corrupt; /* * Look up the entry in the bestfree table. */ @@ -990,9 +1045,9 @@ xfs_dir2_data_use_free( xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); - ASSERT(dfp != NULL); - ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup); + if (fa) + goto corrupt; /* * If we got inserted at the last slot, * that means we don't know if there was a better @@ -1018,9 +1073,9 @@ xfs_dir2_data_use_free( xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); - ASSERT(dfp != NULL); - ASSERT(dfp->length == newdup->length); - ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); + fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup); + if (fa) + goto corrupt; /* * If we got inserted at the last slot, * that means we don't know if there was a better @@ -1066,4 +1121,27 @@ xfs_dir2_data_use_free( } } *needscanp = needscan; + return 0; +corrupt: + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount, + hdr, __FILE__, __LINE__, fa); + return -EFSCORRUPTED; +} + +/* Find the end of the entry data in a data/block format dir block. */ +void * +xfs_dir3_data_endp( + struct xfs_da_geometry *geo, + struct xfs_dir2_data_hdr *hdr) +{ + switch (hdr->magic) { + case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): + case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): + return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); + case cpu_to_be32(XFS_DIR3_DATA_MAGIC): + case cpu_to_be32(XFS_DIR2_DATA_MAGIC): + return (char *)hdr + geo->blksize; + default: + return NULL; + } } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 27297a689d9c..50fc9c0c5e2b 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -50,13 +50,7 @@ static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, * Pop an assert if something is wrong. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leaf1_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -STATIC bool +static xfs_failaddr_t xfs_dir3_leaf1_check( struct xfs_inode *dp, struct xfs_buf *bp) @@ -69,17 +63,32 @@ xfs_dir3_leaf1_check( if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) - return false; + return __this_address; return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } + +static inline void +xfs_dir3_leaf_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = xfs_dir3_leaf1_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} #else #define xfs_dir3_leaf_check(dp, bp) #endif -bool +xfs_failaddr_t xfs_dir3_leaf_check_int( struct xfs_mount *mp, struct xfs_inode *dp, @@ -114,27 +123,27 @@ xfs_dir3_leaf_check_int( * We can deduce a value for that from di_size. */ if (hdr->count > ops->leaf_max_ents(geo)) - return false; + return __this_address; /* Leaves and bests don't overlap in leaf format. */ if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || hdr->magic == XFS_DIR3_LEAF1_MAGIC) && (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) - return false; + return __this_address; /* Check hash value order, count stale entries. */ for (i = stale = 0; i < hdr->count; i++) { if (i + 1 < hdr->count) { if (be32_to_cpu(ents[i].hashval) > be32_to_cpu(ents[i + 1].hashval)) - return false; + return __this_address; } if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } if (hdr->stale != stale) - return false; - return true; + return __this_address; + return NULL; } /* @@ -142,7 +151,7 @@ xfs_dir3_leaf_check_int( * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due * to incorrect magic numbers. */ -static bool +static xfs_failaddr_t xfs_dir3_leaf_verify( struct xfs_buf *bp, uint16_t magic) @@ -160,16 +169,16 @@ xfs_dir3_leaf_verify( : XFS_DIR3_LEAFN_MAGIC; if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) - return false; + return __this_address; if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) - return false; + return __this_address; } else { if (leaf->hdr.info.magic != cpu_to_be16(magic)) - return false; + return __this_address; } return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); @@ -181,15 +190,16 @@ __read_verify( uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_leaf_verify(bp, magic)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_leaf_verify(bp, magic); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -198,12 +208,13 @@ __write_verify( uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_leaf_verify(bp, magic)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_leaf_verify(bp, magic); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -216,6 +227,13 @@ __write_verify( xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); } +static xfs_failaddr_t +xfs_dir3_leaf1_verify( + struct xfs_buf *bp) +{ + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC); +} + static void xfs_dir3_leaf1_read_verify( struct xfs_buf *bp) @@ -230,6 +248,13 @@ xfs_dir3_leaf1_write_verify( __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); } +static xfs_failaddr_t +xfs_dir3_leafn_verify( + struct xfs_buf *bp) +{ + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC); +} + static void xfs_dir3_leafn_read_verify( struct xfs_buf *bp) @@ -248,12 +273,14 @@ const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = { .name = "xfs_dir3_leaf1", .verify_read = xfs_dir3_leaf1_read_verify, .verify_write = xfs_dir3_leaf1_write_verify, + .verify_struct = xfs_dir3_leaf1_verify, }; const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { .name = "xfs_dir3_leafn", .verify_read = xfs_dir3_leafn_read_verify, .verify_write = xfs_dir3_leafn_write_verify, + .verify_struct = xfs_dir3_leafn_verify, }; int @@ -850,9 +877,13 @@ xfs_dir2_leaf_addname( /* * Mark the initial part of our freespace in use for the new entry. */ - xfs_dir2_data_use_free(args, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, - &needlog, &needscan); + error = xfs_dir2_data_use_free(args, dbp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), + length, &needlog, &needscan); + if (error) { + xfs_trans_brelse(tp, lbp); + return error; + } /* * Initialize our new entry (at last). */ @@ -1388,7 +1419,8 @@ xfs_dir2_leaf_removename( oldbest = be16_to_cpu(bf[0].length); ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); - ASSERT(be16_to_cpu(bestsp[db]) == oldbest); + if (be16_to_cpu(bestsp[db]) != oldbest) + return -EFSCORRUPTED; /* * Mark the former data entry unused. */ diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 682e2bf370c7..9df096cc3c37 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -53,13 +53,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args, * Check internal consistency of a leafn block. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leafn_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -static bool +static xfs_failaddr_t xfs_dir3_leafn_check( struct xfs_inode *dp, struct xfs_buf *bp) @@ -72,17 +66,32 @@ xfs_dir3_leafn_check( if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) - return false; + return __this_address; return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } + +static inline void +xfs_dir3_leaf_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = xfs_dir3_leafn_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} #else #define xfs_dir3_leaf_check(dp, bp) #endif -static bool +static xfs_failaddr_t xfs_dir3_free_verify( struct xfs_buf *bp) { @@ -93,21 +102,21 @@ xfs_dir3_free_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) - return false; + return __this_address; } /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ - return true; + return NULL; } static void @@ -115,15 +124,16 @@ xfs_dir3_free_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_free_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_free_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -131,12 +141,13 @@ xfs_dir3_free_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_free_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_free_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -153,10 +164,11 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { .name = "xfs_dir3_free", .verify_read = xfs_dir3_free_read_verify, .verify_write = xfs_dir3_free_write_verify, + .verify_struct = xfs_dir3_free_verify, }; /* Everything ok in the free block header? */ -static bool +static xfs_failaddr_t xfs_dir3_free_header_check( struct xfs_inode *dp, xfs_dablk_t fbno, @@ -174,22 +186,22 @@ xfs_dir3_free_header_check( struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; if (be32_to_cpu(hdr3->firstdb) != firstdb) - return false; + return __this_address; if (be32_to_cpu(hdr3->nvalid) > maxbests) - return false; + return __this_address; if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) - return false; + return __this_address; } else { struct xfs_dir2_free_hdr *hdr = bp->b_addr; if (be32_to_cpu(hdr->firstdb) != firstdb) - return false; + return __this_address; if (be32_to_cpu(hdr->nvalid) > maxbests) - return false; + return __this_address; if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) - return false; + return __this_address; } - return true; + return NULL; } static int @@ -200,6 +212,7 @@ __xfs_dir3_free_read( xfs_daddr_t mappedbno, struct xfs_buf **bpp) { + xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, @@ -208,9 +221,9 @@ __xfs_dir3_free_read( return err; /* Check things that we can't do in the verifier. */ - if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { - xfs_buf_ioerror(*bpp, -EFSCORRUPTED); - xfs_verifier_error(*bpp); + fa = xfs_dir3_free_header_check(dp, fbno, *bpp); + if (fa) { + xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); xfs_trans_brelse(tp, *bpp); return -EFSCORRUPTED; } @@ -374,8 +387,9 @@ xfs_dir2_leaf_to_node( dp->d_ops->free_hdr_from_disk(&freehdr, free); leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); - ASSERT(be32_to_cpu(ltp->bestcount) <= - (uint)dp->i_d.di_size / args->geo->blksize); + if (be32_to_cpu(ltp->bestcount) > + (uint)dp->i_d.di_size / args->geo->blksize) + return -EFSCORRUPTED; /* * Copy freespace entries from the leaf block to the new block. @@ -1715,6 +1729,7 @@ xfs_dir2_node_addname_int( __be16 *bests; struct xfs_dir3_icfree_hdr freehdr; struct xfs_dir2_data_free *bf; + xfs_dir2_data_aoff_t aoff; dp = args->dp; mp = dp->i_mount; @@ -1906,7 +1921,7 @@ xfs_dir2_node_addname_int( (unsigned long long)ifbno, lastfbno); if (fblk) { xfs_alert(mp, - " fblk 0x%p blkno %llu index %d magic 0x%x", + " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", fblk, (unsigned long long)fblk->blkno, fblk->index, @@ -2009,9 +2024,13 @@ xfs_dir2_node_addname_int( /* * Mark the first part of the unused space, inuse for us. */ - xfs_dir2_data_use_free(args, dbp, dup, - (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, - &needlog, &needscan); + aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); + error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length, + &needlog, &needscan); + if (error) { + xfs_trans_brelse(tp, dbp); + return error; + } /* * Fill in the new entry and log it. */ diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 4badd26c47e6..753aeeeffc18 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -39,12 +39,13 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, /* xfs_dir2_data.c */ #ifdef DEBUG -#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); +extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); #else #define xfs_dir3_data_check(dp,bp) #endif -extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); +extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp, + struct xfs_buf *bp); extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, @@ -89,8 +90,9 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, int lowstale, int highstale, int *lfloglow, int *lfloghigh); extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); -extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, - struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); +extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp, + struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, + struct xfs_dir2_leaf *leaf); /* xfs_dir2_node.c */ extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, @@ -127,7 +129,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); -extern int xfs_dir2_sf_verify(struct xfs_inode *ip); +extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index be8b9755f66a..0c75a7f00883 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -156,7 +156,6 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ { xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused data pointer */ @@ -192,9 +191,8 @@ xfs_dir2_block_to_sf( /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(args->geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); + endptr = xfs_dir3_data_endp(args->geo, hdr); sfep = xfs_dir2_sf_firstentry(sfp); /* * Loop over the active and unused entries. @@ -630,7 +628,7 @@ xfs_dir2_sf_check( #endif /* DEBUG */ /* Verify the consistency of an inline directory. */ -int +xfs_failaddr_t xfs_dir2_sf_verify( struct xfs_inode *ip) { @@ -665,7 +663,7 @@ xfs_dir2_sf_verify( */ if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || size < xfs_dir2_sf_hdr_size(sfp->i8count)) - return -EFSCORRUPTED; + return __this_address; endp = (char *)sfp + size; @@ -674,7 +672,7 @@ xfs_dir2_sf_verify( i8count = ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) - return error; + return __this_address; offset = dops->data_first_offset; /* Check all reported entries */ @@ -686,11 +684,11 @@ xfs_dir2_sf_verify( * within the data buffer. */ if (((char *)sfep + sizeof(*sfep)) >= endp) - return -EFSCORRUPTED; + return __this_address; /* Don't allow names with known bad length. */ if (sfep->namelen == 0) - return -EFSCORRUPTED; + return __this_address; /* * Check that the variable-length part of the structure is @@ -699,23 +697,23 @@ xfs_dir2_sf_verify( */ next_sfep = dops->sf_nextentry(sfp, sfep); if (endp < (char *)next_sfep) - return -EFSCORRUPTED; + return __this_address; /* Check that the offsets always increase. */ if (xfs_dir2_sf_get_offset(sfep) < offset) - return -EFSCORRUPTED; + return __this_address; /* Check the inode number. */ ino = dops->sf_get_ino(sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) - return error; + return __this_address; /* Check the file type. */ filetype = dops->sf_get_ftype(sfep); if (filetype >= XFS_DIR3_FT_MAX) - return -EFSCORRUPTED; + return __this_address; offset = xfs_dir2_sf_get_offset(sfep) + dops->data_entsize(sfep->namelen); @@ -723,16 +721,16 @@ xfs_dir2_sf_verify( sfep = next_sfep; } if (i8count != sfp->i8count) - return -EFSCORRUPTED; + return __this_address; if ((void *)sfep != (void *)endp) - return -EFSCORRUPTED; + return __this_address; /* Make sure this whole thing ought to be in local format. */ if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) - return -EFSCORRUPTED; + return __this_address; - return 0; + return NULL; } /* diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 747085b4ef44..8b7a6c3cb599 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -42,18 +42,14 @@ xfs_calc_dquots_per_chunk( /* * Do some primitive error checking on ondisk dquot data structures. */ -int -xfs_dqcheck( +xfs_failaddr_t +xfs_dquot_verify( struct xfs_mount *mp, xfs_disk_dquot_t *ddq, xfs_dqid_t id, uint type, /* used only when IO_dorepair is true */ - uint flags, - const char *str) + uint flags) { - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; - int errs = 0; - /* * We can encounter an uninitialized dquot buffer for 2 reasons: * 1. If we crash while deleting the quotainode(s), and those blks got @@ -69,87 +65,57 @@ xfs_dqcheck( * This is all fine; things are still consistent, and we haven't lost * any quota information. Just don't complain about bad dquot blks. */ - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", - str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); - errs++; - } - if (ddq->d_version != XFS_DQUOT_VERSION) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", - str, id, ddq->d_version, XFS_DQUOT_VERSION); - errs++; - } + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) + return __this_address; + if (ddq->d_version != XFS_DQUOT_VERSION) + return __this_address; if (ddq->d_flags != XFS_DQ_USER && ddq->d_flags != XFS_DQ_PROJ && - ddq->d_flags != XFS_DQ_GROUP) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, unknown flags 0x%x", - str, id, ddq->d_flags); - errs++; - } + ddq->d_flags != XFS_DQ_GROUP) + return __this_address; - if (id != -1 && id != be32_to_cpu(ddq->d_id)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : ondisk-dquot 0x%p, ID mismatch: " - "0x%x expected, found id 0x%x", - str, ddq, id, be32_to_cpu(ddq->d_id)); - errs++; - } + if (id != -1 && id != be32_to_cpu(ddq->d_id)) + return __this_address; - if (!errs && ddq->d_id) { - if (ddq->d_blk_softlimit && - be64_to_cpu(ddq->d_bcount) > - be64_to_cpu(ddq->d_blk_softlimit)) { - if (!ddq->d_btimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_ino_softlimit && - be64_to_cpu(ddq->d_icount) > - be64_to_cpu(ddq->d_ino_softlimit)) { - if (!ddq->d_itimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_rtb_softlimit && - be64_to_cpu(ddq->d_rtbcount) > - be64_to_cpu(ddq->d_rtb_softlimit)) { - if (!ddq->d_rtbtimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - } + if (!ddq->d_id) + return NULL; + + if (ddq->d_blk_softlimit && + be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) && + !ddq->d_btimer) + return __this_address; + + if (ddq->d_ino_softlimit && + be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) && + !ddq->d_itimer) + return __this_address; - if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) - return errs; + if (ddq->d_rtb_softlimit && + be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) && + !ddq->d_rtbtimer) + return __this_address; + + return NULL; +} + +/* + * Do some primitive error checking on ondisk dquot data structures. + */ +int +xfs_dquot_repair( + struct xfs_mount *mp, + struct xfs_disk_dquot *ddq, + xfs_dqid_t id, + uint type) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)ddq; - if (flags & XFS_QMOPT_DOWARN) - xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); /* * Typically, a repair is only requested by quotacheck. */ ASSERT(id != -1); - ASSERT(flags & XFS_QMOPT_DQREPAIR); memset(d, 0, sizeof(xfs_dqblk_t)); d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); @@ -163,7 +129,7 @@ xfs_dqcheck( XFS_DQUOT_CRC_OFF); } - return errs; + return 0; } STATIC bool @@ -198,13 +164,13 @@ xfs_dquot_buf_verify_crc( return true; } -STATIC bool +STATIC xfs_failaddr_t xfs_dquot_buf_verify( struct xfs_mount *mp, - struct xfs_buf *bp, - int warn) + struct xfs_buf *bp) { struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + xfs_failaddr_t fa; xfs_dqid_t id = 0; int ndquots; int i; @@ -228,33 +194,43 @@ xfs_dquot_buf_verify( */ for (i = 0; i < ndquots; i++) { struct xfs_disk_dquot *ddq; - int error; ddq = &d[i].dd_diskdq; if (i == 0) id = be32_to_cpu(ddq->d_id); - error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); - if (error) - return false; + fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0); + if (fa) + return fa; } - return true; + + return NULL; +} + +static xfs_failaddr_t +xfs_dquot_buf_verify_struct( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + return xfs_dquot_buf_verify(mp, bp); } static void xfs_dquot_buf_read_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (!xfs_dquot_buf_verify_crc(mp, bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dquot_buf_verify(mp, bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); + } } /* @@ -270,7 +246,7 @@ xfs_dquot_buf_readahead_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify_crc(mp, bp) || - !xfs_dquot_buf_verify(mp, bp, 0)) { + xfs_dquot_buf_verify(mp, bp) != NULL) { xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; } @@ -283,21 +259,21 @@ xfs_dquot_buf_readahead_verify( */ static void xfs_dquot_buf_write_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; - if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } + fa = xfs_dquot_buf_verify(mp, bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } const struct xfs_buf_ops xfs_dquot_buf_ops = { .name = "xfs_dquot", .verify_read = xfs_dquot_buf_read_verify, .verify_write = xfs_dquot_buf_write_verify, + .verify_struct = xfs_dquot_buf_verify_struct, }; const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h new file mode 100644 index 000000000000..bc1789d95152 --- /dev/null +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * Copyright (C) 2017 Oracle. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_ERRORTAG_H_ +#define __XFS_ERRORTAG_H_ + +/* + * error injection tags - the labels can be anything you want + * but each tag should have its own unique number + */ + +#define XFS_ERRTAG_NOERROR 0 +#define XFS_ERRTAG_IFLUSH_1 1 +#define XFS_ERRTAG_IFLUSH_2 2 +#define XFS_ERRTAG_IFLUSH_3 3 +#define XFS_ERRTAG_IFLUSH_4 4 +#define XFS_ERRTAG_IFLUSH_5 5 +#define XFS_ERRTAG_IFLUSH_6 6 +#define XFS_ERRTAG_DA_READ_BUF 7 +#define XFS_ERRTAG_BTREE_CHECK_LBLOCK 8 +#define XFS_ERRTAG_BTREE_CHECK_SBLOCK 9 +#define XFS_ERRTAG_ALLOC_READ_AGF 10 +#define XFS_ERRTAG_IALLOC_READ_AGI 11 +#define XFS_ERRTAG_ITOBP_INOTOBP 12 +#define XFS_ERRTAG_IUNLINK 13 +#define XFS_ERRTAG_IUNLINK_REMOVE 14 +#define XFS_ERRTAG_DIR_INO_VALIDATE 15 +#define XFS_ERRTAG_BULKSTAT_READ_CHUNK 16 +#define XFS_ERRTAG_IODONE_IOERR 17 +#define XFS_ERRTAG_STRATREAD_IOERR 18 +#define XFS_ERRTAG_STRATCMPL_IOERR 19 +#define XFS_ERRTAG_DIOWRITE_IOERR 20 +#define XFS_ERRTAG_BMAPIFORMAT 21 +#define XFS_ERRTAG_FREE_EXTENT 22 +#define XFS_ERRTAG_RMAP_FINISH_ONE 23 +#define XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE 24 +#define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 +#define XFS_ERRTAG_BMAP_FINISH_ONE 26 +#define XFS_ERRTAG_AG_RESV_CRITICAL 27 +/* + * DEBUG mode instrumentation to test and/or trigger delayed allocation + * block killing in the event of failed writes. When enabled, all + * buffered writes are silenty dropped and handled as if they failed. + * All delalloc blocks in the range of the write (including pre-existing + * delalloc blocks!) are tossed as part of the write failure error + * handling sequence. + */ +#define XFS_ERRTAG_DROP_WRITES 28 +#define XFS_ERRTAG_LOG_BAD_CRC 29 +#define XFS_ERRTAG_LOG_ITEM_PIN 30 +#define XFS_ERRTAG_BUF_LRU_REF 31 +#define XFS_ERRTAG_MAX 32 + +/* + * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. + */ +#define XFS_RANDOM_DEFAULT 100 +#define XFS_RANDOM_IFLUSH_1 XFS_RANDOM_DEFAULT +#define XFS_RANDOM_IFLUSH_2 XFS_RANDOM_DEFAULT +#define XFS_RANDOM_IFLUSH_3 XFS_RANDOM_DEFAULT +#define XFS_RANDOM_IFLUSH_4 XFS_RANDOM_DEFAULT +#define XFS_RANDOM_IFLUSH_5 XFS_RANDOM_DEFAULT +#define XFS_RANDOM_IFLUSH_6 XFS_RANDOM_DEFAULT +#define XFS_RANDOM_DA_READ_BUF XFS_RANDOM_DEFAULT +#define XFS_RANDOM_BTREE_CHECK_LBLOCK (XFS_RANDOM_DEFAULT/4) +#define XFS_RANDOM_BTREE_CHECK_SBLOCK XFS_RANDOM_DEFAULT +#define XFS_RANDOM_ALLOC_READ_AGF XFS_RANDOM_DEFAULT +#define XFS_RANDOM_IALLOC_READ_AGI XFS_RANDOM_DEFAULT +#define XFS_RANDOM_ITOBP_INOTOBP XFS_RANDOM_DEFAULT +#define XFS_RANDOM_IUNLINK XFS_RANDOM_DEFAULT +#define XFS_RANDOM_IUNLINK_REMOVE XFS_RANDOM_DEFAULT +#define XFS_RANDOM_DIR_INO_VALIDATE XFS_RANDOM_DEFAULT +#define XFS_RANDOM_BULKSTAT_READ_CHUNK XFS_RANDOM_DEFAULT +#define XFS_RANDOM_IODONE_IOERR (XFS_RANDOM_DEFAULT/10) +#define XFS_RANDOM_STRATREAD_IOERR (XFS_RANDOM_DEFAULT/10) +#define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10) +#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) +#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT +#define XFS_RANDOM_FREE_EXTENT 1 +#define XFS_RANDOM_RMAP_FINISH_ONE 1 +#define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE 1 +#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 +#define XFS_RANDOM_BMAP_FINISH_ONE 1 +#define XFS_RANDOM_AG_RESV_CRITICAL 4 +#define XFS_RANDOM_DROP_WRITES 1 +#define XFS_RANDOM_LOG_BAD_CRC 1 +#define XFS_RANDOM_LOG_ITEM_PIN 1 +#define XFS_RANDOM_BUF_LRU_REF 2 + +#endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 23229f0c5b15..42956d8d95ed 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -315,6 +315,11 @@ static inline bool xfs_sb_good_version(struct xfs_sb *sbp) return false; } +static inline bool xfs_sb_version_hasrealtime(struct xfs_sb *sbp) +{ + return sbp->sb_rblocks > 0; +} + /* * Detect a mismatched features2 field. Older kernels read/wrote * this into the wrong slot, so to be safe we keep them in sync. @@ -500,12 +505,12 @@ xfs_sb_has_incompat_log_feature( /* * V5 superblock specific feature checks */ -static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp) +static inline bool xfs_sb_version_hascrc(struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } -static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp) +static inline bool xfs_sb_version_has_pquotino(struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } @@ -518,7 +523,7 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp) (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); } -static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasfinobt(xfs_sb_t *sbp) { return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); @@ -798,24 +803,13 @@ typedef struct xfs_agi { &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \ (__be32 *)(bp)->b_addr) -/* - * Size of the AGFL. For CRC-enabled filesystes we steal a couple of - * slots in the beginning of the block for a proper header with the - * location information and CRC. - */ -#define XFS_AGFL_SIZE(mp) \ - (((mp)->m_sb.sb_sectsize - \ - (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ - sizeof(struct xfs_agfl) : 0)) / \ - sizeof(xfs_agblock_t)) - typedef struct xfs_agfl { __be32 agfl_magicnum; __be32 agfl_seqno; uuid_t agfl_uuid; __be64 agfl_lsn; __be32 agfl_crc; - __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ + __be32 agfl_bno[]; /* actually xfs_agfl_size(mp) */ } __attribute__((packed)) xfs_agfl_t; #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) @@ -941,7 +935,7 @@ typedef enum xfs_dinode_fmt { XFS_DINODE_FMT_LOCAL, /* bulk data */ XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ - XFS_DINODE_FMT_UUID /* uuid_t */ + XFS_DINODE_FMT_UUID /* added long ago, but never used */ } xfs_dinode_fmt_t; /* @@ -1142,7 +1136,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) * Dquot and dquot block format definitions */ #define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ -#define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */ +#define XFS_DQUOT_VERSION (uint8_t)0x01 /* latest version number */ /* * This is the main portion of the on-disk representation of quota @@ -1548,10 +1542,6 @@ typedef struct xfs_bmbt_rec { typedef uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; -typedef struct xfs_bmbt_rec_host { - uint64_t l0, l1; -} xfs_bmbt_rec_host_t; - /* * Values and macros for delayed-allocation startblock fields. */ @@ -1577,24 +1567,6 @@ static inline xfs_filblks_t startblockval(xfs_fsblock_t x) } /* - * Possible extent states. - */ -typedef enum { - XFS_EXT_NORM, XFS_EXT_UNWRITTEN, -} xfs_exntst_t; - -/* - * Incore version of above. - */ -typedef struct xfs_bmbt_irec -{ - xfs_fileoff_t br_startoff; /* starting file offset */ - xfs_fsblock_t br_startblock; /* starting block number */ - xfs_filblks_t br_blockcount; /* number of blocks */ - xfs_exntst_t br_state; /* extent state */ -} xfs_bmbt_irec_t; - -/* * Key structure for non-leaf levels of the tree. */ typedef struct xfs_bmbt_key { diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 8c61f21535d4..faf1a4edd618 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -233,6 +233,13 @@ typedef struct xfs_fsop_resblks { #define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) #define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) +/* + * Limits on sb_agblocks/sb_agblklog -- mkfs won't format AGs smaller than + * 16MB or larger than 1TB. + */ +#define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */ +#define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */ + /* keep the maximum size under 2^31 by a small amount */ #define XFS_MAX_LOG_BYTES \ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) @@ -468,6 +475,82 @@ typedef struct xfs_swapext #define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ +/* metadata scrubbing */ +struct xfs_scrub_metadata { + __u32 sm_type; /* What to check? */ + __u32 sm_flags; /* flags; see below. */ + __u64 sm_ino; /* inode number. */ + __u32 sm_gen; /* inode generation. */ + __u32 sm_agno; /* ag number. */ + __u64 sm_reserved[5]; /* pad to 64 bytes */ +}; + +/* + * Metadata types and flags for scrub operation. + */ + +/* Scrub subcommands. */ +#define XFS_SCRUB_TYPE_PROBE 0 /* presence test ioctl */ +#define XFS_SCRUB_TYPE_SB 1 /* superblock */ +#define XFS_SCRUB_TYPE_AGF 2 /* AG free header */ +#define XFS_SCRUB_TYPE_AGFL 3 /* AG free list */ +#define XFS_SCRUB_TYPE_AGI 4 /* AG inode header */ +#define XFS_SCRUB_TYPE_BNOBT 5 /* freesp by block btree */ +#define XFS_SCRUB_TYPE_CNTBT 6 /* freesp by length btree */ +#define XFS_SCRUB_TYPE_INOBT 7 /* inode btree */ +#define XFS_SCRUB_TYPE_FINOBT 8 /* free inode btree */ +#define XFS_SCRUB_TYPE_RMAPBT 9 /* reverse mapping btree */ +#define XFS_SCRUB_TYPE_REFCNTBT 10 /* reference count btree */ +#define XFS_SCRUB_TYPE_INODE 11 /* inode record */ +#define XFS_SCRUB_TYPE_BMBTD 12 /* data fork block mapping */ +#define XFS_SCRUB_TYPE_BMBTA 13 /* attr fork block mapping */ +#define XFS_SCRUB_TYPE_BMBTC 14 /* CoW fork block mapping */ +#define XFS_SCRUB_TYPE_DIR 15 /* directory */ +#define XFS_SCRUB_TYPE_XATTR 16 /* extended attribute */ +#define XFS_SCRUB_TYPE_SYMLINK 17 /* symbolic link */ +#define XFS_SCRUB_TYPE_PARENT 18 /* parent pointers */ +#define XFS_SCRUB_TYPE_RTBITMAP 19 /* realtime bitmap */ +#define XFS_SCRUB_TYPE_RTSUM 20 /* realtime summary */ +#define XFS_SCRUB_TYPE_UQUOTA 21 /* user quotas */ +#define XFS_SCRUB_TYPE_GQUOTA 22 /* group quotas */ +#define XFS_SCRUB_TYPE_PQUOTA 23 /* project quotas */ + +/* Number of scrub subcommands. */ +#define XFS_SCRUB_TYPE_NR 24 + +/* i: Repair this metadata. */ +#define XFS_SCRUB_IFLAG_REPAIR (1 << 0) + +/* o: Metadata object needs repair. */ +#define XFS_SCRUB_OFLAG_CORRUPT (1 << 1) + +/* + * o: Metadata object could be optimized. It's not corrupt, but + * we could improve on it somehow. + */ +#define XFS_SCRUB_OFLAG_PREEN (1 << 2) + +/* o: Cross-referencing failed. */ +#define XFS_SCRUB_OFLAG_XFAIL (1 << 3) + +/* o: Metadata object disagrees with cross-referenced metadata. */ +#define XFS_SCRUB_OFLAG_XCORRUPT (1 << 4) + +/* o: Scan was not complete. */ +#define XFS_SCRUB_OFLAG_INCOMPLETE (1 << 5) + +/* o: Metadata object looked funny but isn't corrupt. */ +#define XFS_SCRUB_OFLAG_WARNING (1 << 6) + +#define XFS_SCRUB_FLAGS_IN (XFS_SCRUB_IFLAG_REPAIR) +#define XFS_SCRUB_FLAGS_OUT (XFS_SCRUB_OFLAG_CORRUPT | \ + XFS_SCRUB_OFLAG_PREEN | \ + XFS_SCRUB_OFLAG_XFAIL | \ + XFS_SCRUB_OFLAG_XCORRUPT | \ + XFS_SCRUB_OFLAG_INCOMPLETE | \ + XFS_SCRUB_OFLAG_WARNING) +#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT) + /* * ioctl limits */ @@ -511,6 +594,7 @@ typedef struct xfs_swapext #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) #define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks) /* XFS_IOC_GETFSMAP ------ hoisted 59 */ +#define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 988bb3f31446..0e2cf5f0be1f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -31,6 +31,7 @@ #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_rtalloc.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_bmap.h" #include "xfs_cksum.h" @@ -919,8 +920,7 @@ STATIC xfs_agnumber_t xfs_ialloc_ag_select( xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t parent, /* parent directory inode number */ - umode_t mode, /* bits set to indicate file type */ - int okalloc) /* ok to allocate more space */ + umode_t mode) /* bits set to indicate file type */ { xfs_agnumber_t agcount; /* number of ag's in the filesystem */ xfs_agnumber_t agno; /* current ag number */ @@ -977,9 +977,6 @@ xfs_ialloc_ag_select( return agno; } - if (!okalloc) - goto nextag; - if (!pag->pagf_init) { error = xfs_alloc_pagf_init(mp, tp, agno, flags); if (error) @@ -1679,7 +1676,6 @@ xfs_dialloc( struct xfs_trans *tp, xfs_ino_t parent, umode_t mode, - int okalloc, struct xfs_buf **IO_agbp, xfs_ino_t *inop) { @@ -1691,6 +1687,7 @@ xfs_dialloc( int noroom = 0; xfs_agnumber_t start_agno; struct xfs_perag *pag; + int okalloc = 1; if (*IO_agbp) { /* @@ -1706,7 +1703,7 @@ xfs_dialloc( * We do not have an agbp, so select an initial allocation * group for inode allocation. */ - start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + start_agno = xfs_ialloc_ag_select(tp, parent, mode); if (start_agno == NULLAGNUMBER) { *inop = NULLFSINO; return 0; @@ -1962,7 +1959,7 @@ xfs_difree_inobt( if (!(mp->m_flags & XFS_MOUNT_IKEEP) && rec.ir_free == XFS_INOBT_ALL_FREE && mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { - xic->deleted = 1; + xic->deleted = true; xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); xic->alloc = xfs_inobt_irec_to_allocmask(&rec); @@ -1989,7 +1986,7 @@ xfs_difree_inobt( xfs_difree_inode_chunk(mp, agno, &rec, dfops); } else { - xic->deleted = 0; + xic->deleted = false; error = xfs_inobt_update(cur, &rec); if (error) { @@ -2494,7 +2491,7 @@ xfs_check_agi_unlinked( #define xfs_check_agi_unlinked(agi) #endif -static bool +static xfs_failaddr_t xfs_agi_verify( struct xfs_buf *bp) { @@ -2503,28 +2500,28 @@ xfs_agi_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) - return false; + return __this_address; } /* * Validate the magic number of the agi block. */ if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) - return false; + return __this_address; if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) - return false; + return __this_address; if (be32_to_cpu(agi->agi_level) < 1 || be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) - return false; + return __this_address; if (xfs_sb_version_hasfinobt(&mp->m_sb) && (be32_to_cpu(agi->agi_free_level) < 1 || be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, @@ -2533,10 +2530,10 @@ xfs_agi_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; xfs_check_agi_unlinked(agi); - return true; + return NULL; } static void @@ -2544,28 +2541,29 @@ xfs_agi_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, - XFS_ERRTAG_IALLOC_READ_AGI)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agi_verify(bp); + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void xfs_agi_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; - if (!xfs_agi_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_agi_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -2581,6 +2579,7 @@ const struct xfs_buf_ops xfs_agi_buf_ops = { .name = "xfs_agi", .verify_read = xfs_agi_read_verify, .verify_write = xfs_agi_write_verify, + .verify_struct = xfs_agi_verify, }; /* @@ -2664,3 +2663,192 @@ xfs_ialloc_pagi_init( xfs_trans_brelse(tp, bp); return 0; } + +/* Calculate the first and last possible inode number in an AG. */ +void +xfs_ialloc_agino_range( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agino_t *first, + xfs_agino_t *last) +{ + xfs_agblock_t bno; + xfs_agblock_t eoag; + + eoag = xfs_ag_block_count(mp, agno); + + /* + * Calculate the first inode, which will be in the first + * cluster-aligned block after the AGFL. + */ + bno = round_up(XFS_AGFL_BLOCK(mp) + 1, + xfs_ialloc_cluster_alignment(mp)); + *first = XFS_OFFBNO_TO_AGINO(mp, bno, 0); + + /* + * Calculate the last inode, which will be at the end of the + * last (aligned) cluster that can be allocated in the AG. + */ + bno = round_down(eoag, xfs_ialloc_cluster_alignment(mp)); + *last = XFS_OFFBNO_TO_AGINO(mp, bno, 0) - 1; +} + +/* + * Verify that an AG inode number pointer neither points outside the AG + * nor points at static metadata. + */ +bool +xfs_verify_agino( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agino_t agino) +{ + xfs_agino_t first; + xfs_agino_t last; + + xfs_ialloc_agino_range(mp, agno, &first, &last); + return agino >= first && agino <= last; +} + +/* + * Verify that an FS inode number pointer neither points outside the + * filesystem nor points at static AG metadata. + */ +bool +xfs_verify_ino( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); + + if (agno >= mp->m_sb.sb_agcount) + return false; + if (XFS_AGINO_TO_INO(mp, agno, agino) != ino) + return false; + return xfs_verify_agino(mp, agno, agino); +} + +/* Is this an internal inode number? */ +bool +xfs_internal_inum( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + return ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || + (xfs_sb_version_hasquota(&mp->m_sb) && + xfs_is_quota_inode(&mp->m_sb, ino)); +} + +/* + * Verify that a directory entry's inode number doesn't point at an internal + * inode, empty space, or static AG metadata. + */ +bool +xfs_verify_dir_ino( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + if (xfs_internal_inum(mp, ino)) + return false; + return xfs_verify_ino(mp, ino); +} + +/* Is there an inode record covering a given range of inode numbers? */ +int +xfs_ialloc_has_inode_record( + struct xfs_btree_cur *cur, + xfs_agino_t low, + xfs_agino_t high, + bool *exists) +{ + struct xfs_inobt_rec_incore irec; + xfs_agino_t agino; + uint16_t holemask; + int has_record; + int i; + int error; + + *exists = false; + error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record); + while (error == 0 && has_record) { + error = xfs_inobt_get_rec(cur, &irec, &has_record); + if (error || irec.ir_startino > high) + break; + + agino = irec.ir_startino; + holemask = irec.ir_holemask; + for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1, + i++, agino += XFS_INODES_PER_HOLEMASK_BIT) { + if (holemask & 1) + continue; + if (agino + XFS_INODES_PER_HOLEMASK_BIT > low && + agino <= high) { + *exists = true; + return 0; + } + } + + error = xfs_btree_increment(cur, 0, &has_record); + } + return error; +} + +/* Is there an inode record covering a given extent? */ +int +xfs_ialloc_has_inodes_at_extent( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + xfs_agino_t low; + xfs_agino_t high; + + low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0); + high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1; + + return xfs_ialloc_has_inode_record(cur, low, high, exists); +} + +struct xfs_ialloc_count_inodes { + xfs_agino_t count; + xfs_agino_t freecount; +}; + +/* Record inode counts across all inobt records. */ +STATIC int +xfs_ialloc_count_inodes_rec( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + struct xfs_inobt_rec_incore irec; + struct xfs_ialloc_count_inodes *ci = priv; + + xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); + ci->count += irec.ir_count; + ci->freecount += irec.ir_freecount; + + return 0; +} + +/* Count allocated and free inodes under an inobt. */ +int +xfs_ialloc_count_inodes( + struct xfs_btree_cur *cur, + xfs_agino_t *count, + xfs_agino_t *freecount) +{ + struct xfs_ialloc_count_inodes ci = {0}; + int error; + + ASSERT(cur->bc_btnum == XFS_BTNUM_INO); + error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci); + if (error) + return error; + + *count = ci.count; + *freecount = ci.freecount; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index b32cfb5aeb5b..c5402bb4ce0c 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -81,7 +81,6 @@ xfs_dialloc( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t parent, /* parent inode (directory) */ umode_t mode, /* mode bits for new inode */ - int okalloc, /* ok to allocate more space */ struct xfs_buf **agbp, /* buf for a.g. inode header */ xfs_ino_t *inop); /* inode number allocated */ @@ -171,7 +170,20 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, union xfs_btree_rec; void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec); +int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); +int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low, + xfs_agino_t high, bool *exists); +int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count, + xfs_agino_t *freecount); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); +void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agino_t *first, xfs_agino_t *last); +bool xfs_verify_agino(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agino_t agino); +bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); +bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); +bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 317caba9faa6..a2dd7f4a2719 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -93,8 +93,6 @@ __xfs_inobt_alloc_block( int error; /* error return value */ xfs_agblock_t sbno = be32_to_cpu(start->s); - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; @@ -107,17 +105,14 @@ __xfs_inobt_alloc_block( args.resv = resv; error = xfs_alloc_vextent(&args); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } + if (args.fsbno == NULLFSBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } ASSERT(args.len == 1); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); *stat = 1; @@ -141,21 +136,42 @@ xfs_finobt_alloc_block( union xfs_btree_ptr *new, int *stat) { + if (cur->bc_mp->m_inotbt_nores) + return xfs_inobt_alloc_block(cur, start, new, stat); return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_METADATA); } STATIC int -xfs_inobt_free_block( +__xfs_inobt_free_block( struct xfs_btree_cur *cur, - struct xfs_buf *bp) + struct xfs_buf *bp, + enum xfs_ag_resv_type resv) { struct xfs_owner_info oinfo; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); return xfs_free_extent(cur->bc_tp, XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, - &oinfo, XFS_AG_RESV_NONE); + &oinfo, resv); +} + +STATIC int +xfs_inobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE); +} + +STATIC int +xfs_finobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + if (cur->bc_mp->m_inotbt_nores) + return xfs_inobt_free_block(cur, bp); + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA); } STATIC int @@ -250,12 +266,13 @@ xfs_inobt_diff_two_keys( be32_to_cpu(k2->inobt.ir_startino); } -static int +static xfs_failaddr_t xfs_inobt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; unsigned int level; /* @@ -271,20 +288,21 @@ xfs_inobt_verify( switch (block->bb_magic) { case cpu_to_be32(XFS_IBT_CRC_MAGIC): case cpu_to_be32(XFS_FIBT_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_IBT_MAGIC): case cpu_to_be32(XFS_FIBT_MAGIC): break; default: - return 0; + return NULL; } /* level verification */ level = be16_to_cpu(block->bb_level); if (level >= mp->m_in_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); } @@ -293,25 +311,30 @@ static void xfs_inobt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_inobt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_inobt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_inobt_write_verify( struct xfs_buf *bp) { - if (!xfs_inobt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_inobt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -322,6 +345,7 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = { .name = "xfs_inobt", .verify_read = xfs_inobt_read_verify, .verify_write = xfs_inobt_write_verify, + .verify_struct = xfs_inobt_verify, }; STATIC int @@ -372,7 +396,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_finobt_set_root, .alloc_block = xfs_finobt_alloc_block, - .free_block = xfs_inobt_free_block, + .free_block = xfs_finobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c new file mode 100644 index 000000000000..b0f31791c7e6 --- /dev/null +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -0,0 +1,1043 @@ +/* + * Copyright (c) 2017 Christoph Hellwig. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/cache.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include "xfs.h" +#include "xfs_format.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_inode.h" +#include "xfs_inode_fork.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_trace.h" + +/* + * In-core extent record layout: + * + * +-------+----------------------------+ + * | 00:53 | all 54 bits of startoff | + * | 54:63 | low 10 bits of startblock | + * +-------+----------------------------+ + * | 00:20 | all 21 bits of length | + * | 21 | unwritten extent bit | + * | 22:63 | high 42 bits of startblock | + * +-------+----------------------------+ + */ +#define XFS_IEXT_STARTOFF_MASK xfs_mask64lo(BMBT_STARTOFF_BITLEN) +#define XFS_IEXT_LENGTH_MASK xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN) +#define XFS_IEXT_STARTBLOCK_MASK xfs_mask64lo(BMBT_STARTBLOCK_BITLEN) + +struct xfs_iext_rec { + uint64_t lo; + uint64_t hi; +}; + +/* + * Given that the length can't be a zero, only an empty hi value indicates an + * unused record. + */ +static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec) +{ + return rec->hi == 0; +} + +static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec) +{ + rec->lo = 0; + rec->hi = 0; +} + +static void +xfs_iext_set( + struct xfs_iext_rec *rec, + struct xfs_bmbt_irec *irec) +{ + ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0); + ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0); + ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0); + + rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK; + rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK; + + rec->lo |= (irec->br_startblock << 54); + rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10)); + + if (irec->br_state == XFS_EXT_UNWRITTEN) + rec->hi |= (1 << 21); +} + +static void +xfs_iext_get( + struct xfs_bmbt_irec *irec, + struct xfs_iext_rec *rec) +{ + irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK; + irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK; + + irec->br_startblock = rec->lo >> 54; + irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10); + + if (rec->hi & (1 << 21)) + irec->br_state = XFS_EXT_UNWRITTEN; + else + irec->br_state = XFS_EXT_NORM; +} + +enum { + NODE_SIZE = 256, + KEYS_PER_NODE = NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)), + RECS_PER_LEAF = (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) / + sizeof(struct xfs_iext_rec), +}; + +/* + * In-core extent btree block layout: + * + * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks. + * + * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each + * contain the startoffset, blockcount, startblock and unwritten extent flag. + * See above for the exact format, followed by pointers to the previous and next + * leaf blocks (if there are any). + * + * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed + * by an equal number of pointers to the btree blocks at the next lower level. + * + * +-------+-------+-------+-------+-------+----------+----------+ + * Leaf: | rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr | + * +-------+-------+-------+-------+-------+----------+----------+ + * + * +-------+-------+-------+-------+-------+-------+------+-------+ + * Inner: | key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N | + * +-------+-------+-------+-------+-------+-------+------+-------+ + */ +struct xfs_iext_node { + uint64_t keys[KEYS_PER_NODE]; +#define XFS_IEXT_KEY_INVALID (1ULL << 63) + void *ptrs[KEYS_PER_NODE]; +}; + +struct xfs_iext_leaf { + struct xfs_iext_rec recs[RECS_PER_LEAF]; + struct xfs_iext_leaf *prev; + struct xfs_iext_leaf *next; +}; + +inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp) +{ + return ifp->if_bytes / sizeof(struct xfs_iext_rec); +} + +static inline int xfs_iext_max_recs(struct xfs_ifork *ifp) +{ + if (ifp->if_height == 1) + return xfs_iext_count(ifp); + return RECS_PER_LEAF; +} + +static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur) +{ + return &cur->leaf->recs[cur->pos]; +} + +static inline bool xfs_iext_valid(struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur) +{ + if (!cur->leaf) + return false; + if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp)) + return false; + if (xfs_iext_rec_is_empty(cur_rec(cur))) + return false; + return true; +} + +static void * +xfs_iext_find_first_leaf( + struct xfs_ifork *ifp) +{ + struct xfs_iext_node *node = ifp->if_u1.if_root; + int height; + + if (!ifp->if_height) + return NULL; + + for (height = ifp->if_height; height > 1; height--) { + node = node->ptrs[0]; + ASSERT(node); + } + + return node; +} + +static void * +xfs_iext_find_last_leaf( + struct xfs_ifork *ifp) +{ + struct xfs_iext_node *node = ifp->if_u1.if_root; + int height, i; + + if (!ifp->if_height) + return NULL; + + for (height = ifp->if_height; height > 1; height--) { + for (i = 1; i < KEYS_PER_NODE; i++) + if (!node->ptrs[i]) + break; + node = node->ptrs[i - 1]; + ASSERT(node); + } + + return node; +} + +void +xfs_iext_first( + struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur) +{ + cur->pos = 0; + cur->leaf = xfs_iext_find_first_leaf(ifp); +} + +void +xfs_iext_last( + struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur) +{ + int i; + + cur->leaf = xfs_iext_find_last_leaf(ifp); + if (!cur->leaf) { + cur->pos = 0; + return; + } + + for (i = 1; i < xfs_iext_max_recs(ifp); i++) { + if (xfs_iext_rec_is_empty(&cur->leaf->recs[i])) + break; + } + cur->pos = i - 1; +} + +void +xfs_iext_next( + struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur) +{ + if (!cur->leaf) { + ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF); + xfs_iext_first(ifp, cur); + return; + } + + ASSERT(cur->pos >= 0); + ASSERT(cur->pos < xfs_iext_max_recs(ifp)); + + cur->pos++; + if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) && + cur->leaf->next) { + cur->leaf = cur->leaf->next; + cur->pos = 0; + } +} + +void +xfs_iext_prev( + struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur) +{ + if (!cur->leaf) { + ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF); + xfs_iext_last(ifp, cur); + return; + } + + ASSERT(cur->pos >= 0); + ASSERT(cur->pos <= RECS_PER_LEAF); + +recurse: + do { + cur->pos--; + if (xfs_iext_valid(ifp, cur)) + return; + } while (cur->pos > 0); + + if (ifp->if_height > 1 && cur->leaf->prev) { + cur->leaf = cur->leaf->prev; + cur->pos = RECS_PER_LEAF; + goto recurse; + } +} + +static inline int +xfs_iext_key_cmp( + struct xfs_iext_node *node, + int n, + xfs_fileoff_t offset) +{ + if (node->keys[n] > offset) + return 1; + if (node->keys[n] < offset) + return -1; + return 0; +} + +static inline int +xfs_iext_rec_cmp( + struct xfs_iext_rec *rec, + xfs_fileoff_t offset) +{ + uint64_t rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK; + uint32_t rec_len = rec->hi & XFS_IEXT_LENGTH_MASK; + + if (rec_offset > offset) + return 1; + if (rec_offset + rec_len <= offset) + return -1; + return 0; +} + +static void * +xfs_iext_find_level( + struct xfs_ifork *ifp, + xfs_fileoff_t offset, + int level) +{ + struct xfs_iext_node *node = ifp->if_u1.if_root; + int height, i; + + if (!ifp->if_height) + return NULL; + + for (height = ifp->if_height; height > level; height--) { + for (i = 1; i < KEYS_PER_NODE; i++) + if (xfs_iext_key_cmp(node, i, offset) > 0) + break; + + node = node->ptrs[i - 1]; + if (!node) + break; + } + + return node; +} + +static int +xfs_iext_node_pos( + struct xfs_iext_node *node, + xfs_fileoff_t offset) +{ + int i; + + for (i = 1; i < KEYS_PER_NODE; i++) { + if (xfs_iext_key_cmp(node, i, offset) > 0) + break; + } + + return i - 1; +} + +static int +xfs_iext_node_insert_pos( + struct xfs_iext_node *node, + xfs_fileoff_t offset) +{ + int i; + + for (i = 0; i < KEYS_PER_NODE; i++) { + if (xfs_iext_key_cmp(node, i, offset) > 0) + return i; + } + + return KEYS_PER_NODE; +} + +static int +xfs_iext_node_nr_entries( + struct xfs_iext_node *node, + int start) +{ + int i; + + for (i = start; i < KEYS_PER_NODE; i++) { + if (node->keys[i] == XFS_IEXT_KEY_INVALID) + break; + } + + return i; +} + +static int +xfs_iext_leaf_nr_entries( + struct xfs_ifork *ifp, + struct xfs_iext_leaf *leaf, + int start) +{ + int i; + + for (i = start; i < xfs_iext_max_recs(ifp); i++) { + if (xfs_iext_rec_is_empty(&leaf->recs[i])) + break; + } + + return i; +} + +static inline uint64_t +xfs_iext_leaf_key( + struct xfs_iext_leaf *leaf, + int n) +{ + return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK; +} + +static void +xfs_iext_grow( + struct xfs_ifork *ifp) +{ + struct xfs_iext_node *node = kmem_zalloc(NODE_SIZE, KM_NOFS); + int i; + + if (ifp->if_height == 1) { + struct xfs_iext_leaf *prev = ifp->if_u1.if_root; + + node->keys[0] = xfs_iext_leaf_key(prev, 0); + node->ptrs[0] = prev; + } else { + struct xfs_iext_node *prev = ifp->if_u1.if_root; + + ASSERT(ifp->if_height > 1); + + node->keys[0] = prev->keys[0]; + node->ptrs[0] = prev; + } + + for (i = 1; i < KEYS_PER_NODE; i++) + node->keys[i] = XFS_IEXT_KEY_INVALID; + + ifp->if_u1.if_root = node; + ifp->if_height++; +} + +static void +xfs_iext_update_node( + struct xfs_ifork *ifp, + xfs_fileoff_t old_offset, + xfs_fileoff_t new_offset, + int level, + void *ptr) +{ + struct xfs_iext_node *node = ifp->if_u1.if_root; + int height, i; + + for (height = ifp->if_height; height > level; height--) { + for (i = 0; i < KEYS_PER_NODE; i++) { + if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0) + break; + if (node->keys[i] == old_offset) + node->keys[i] = new_offset; + } + node = node->ptrs[i - 1]; + ASSERT(node); + } + + ASSERT(node == ptr); +} + +static struct xfs_iext_node * +xfs_iext_split_node( + struct xfs_iext_node **nodep, + int *pos, + int *nr_entries) +{ + struct xfs_iext_node *node = *nodep; + struct xfs_iext_node *new = kmem_zalloc(NODE_SIZE, KM_NOFS); + const int nr_move = KEYS_PER_NODE / 2; + int nr_keep = nr_move + (KEYS_PER_NODE & 1); + int i = 0; + + /* for sequential append operations just spill over into the new node */ + if (*pos == KEYS_PER_NODE) { + *nodep = new; + *pos = 0; + *nr_entries = 0; + goto done; + } + + + for (i = 0; i < nr_move; i++) { + new->keys[i] = node->keys[nr_keep + i]; + new->ptrs[i] = node->ptrs[nr_keep + i]; + + node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID; + node->ptrs[nr_keep + i] = NULL; + } + + if (*pos >= nr_keep) { + *nodep = new; + *pos -= nr_keep; + *nr_entries = nr_move; + } else { + *nr_entries = nr_keep; + } +done: + for (; i < KEYS_PER_NODE; i++) + new->keys[i] = XFS_IEXT_KEY_INVALID; + return new; +} + +static void +xfs_iext_insert_node( + struct xfs_ifork *ifp, + uint64_t offset, + void *ptr, + int level) +{ + struct xfs_iext_node *node, *new; + int i, pos, nr_entries; + +again: + if (ifp->if_height < level) + xfs_iext_grow(ifp); + + new = NULL; + node = xfs_iext_find_level(ifp, offset, level); + pos = xfs_iext_node_insert_pos(node, offset); + nr_entries = xfs_iext_node_nr_entries(node, pos); + + ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0); + ASSERT(nr_entries <= KEYS_PER_NODE); + + if (nr_entries == KEYS_PER_NODE) + new = xfs_iext_split_node(&node, &pos, &nr_entries); + + /* + * Update the pointers in higher levels if the first entry changes + * in an existing node. + */ + if (node != new && pos == 0 && nr_entries > 0) + xfs_iext_update_node(ifp, node->keys[0], offset, level, node); + + for (i = nr_entries; i > pos; i--) { + node->keys[i] = node->keys[i - 1]; + node->ptrs[i] = node->ptrs[i - 1]; + } + node->keys[pos] = offset; + node->ptrs[pos] = ptr; + + if (new) { + offset = new->keys[0]; + ptr = new; + level++; + goto again; + } +} + +static struct xfs_iext_leaf * +xfs_iext_split_leaf( + struct xfs_iext_cursor *cur, + int *nr_entries) +{ + struct xfs_iext_leaf *leaf = cur->leaf; + struct xfs_iext_leaf *new = kmem_zalloc(NODE_SIZE, KM_NOFS); + const int nr_move = RECS_PER_LEAF / 2; + int nr_keep = nr_move + (RECS_PER_LEAF & 1); + int i; + + /* for sequential append operations just spill over into the new node */ + if (cur->pos == RECS_PER_LEAF) { + cur->leaf = new; + cur->pos = 0; + *nr_entries = 0; + goto done; + } + + for (i = 0; i < nr_move; i++) { + new->recs[i] = leaf->recs[nr_keep + i]; + xfs_iext_rec_clear(&leaf->recs[nr_keep + i]); + } + + if (cur->pos >= nr_keep) { + cur->leaf = new; + cur->pos -= nr_keep; + *nr_entries = nr_move; + } else { + *nr_entries = nr_keep; + } +done: + if (leaf->next) + leaf->next->prev = new; + new->next = leaf->next; + new->prev = leaf; + leaf->next = new; + return new; +} + +static void +xfs_iext_alloc_root( + struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur) +{ + ASSERT(ifp->if_bytes == 0); + + ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS); + ifp->if_height = 1; + + /* now that we have a node step into it */ + cur->leaf = ifp->if_u1.if_root; + cur->pos = 0; +} + +static void +xfs_iext_realloc_root( + struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur) +{ + size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec); + void *new; + + /* account for the prev/next pointers */ + if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF) + new_size = NODE_SIZE; + + new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS); + memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes); + ifp->if_u1.if_root = new; + cur->leaf = new; +} + +void +xfs_iext_insert( + struct xfs_inode *ip, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *irec, + int state) +{ + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); + xfs_fileoff_t offset = irec->br_startoff; + struct xfs_iext_leaf *new = NULL; + int nr_entries, i; + + if (ifp->if_height == 0) + xfs_iext_alloc_root(ifp, cur); + else if (ifp->if_height == 1) + xfs_iext_realloc_root(ifp, cur); + + nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos); + ASSERT(nr_entries <= RECS_PER_LEAF); + ASSERT(cur->pos >= nr_entries || + xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0); + + if (nr_entries == RECS_PER_LEAF) + new = xfs_iext_split_leaf(cur, &nr_entries); + + /* + * Update the pointers in higher levels if the first entry changes + * in an existing node. + */ + if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) { + xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0), + offset, 1, cur->leaf); + } + + for (i = nr_entries; i > cur->pos; i--) + cur->leaf->recs[i] = cur->leaf->recs[i - 1]; + xfs_iext_set(cur_rec(cur), irec); + ifp->if_bytes += sizeof(struct xfs_iext_rec); + + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); + + if (new) + xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); +} + +static struct xfs_iext_node * +xfs_iext_rebalance_node( + struct xfs_iext_node *parent, + int *pos, + struct xfs_iext_node *node, + int nr_entries) +{ + /* + * If the neighbouring nodes are completely full, or have different + * parents, we might never be able to merge our node, and will only + * delete it once the number of entries hits zero. + */ + if (nr_entries == 0) + return node; + + if (*pos > 0) { + struct xfs_iext_node *prev = parent->ptrs[*pos - 1]; + int nr_prev = xfs_iext_node_nr_entries(prev, 0), i; + + if (nr_prev + nr_entries <= KEYS_PER_NODE) { + for (i = 0; i < nr_entries; i++) { + prev->keys[nr_prev + i] = node->keys[i]; + prev->ptrs[nr_prev + i] = node->ptrs[i]; + } + return node; + } + } + + if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) { + struct xfs_iext_node *next = parent->ptrs[*pos + 1]; + int nr_next = xfs_iext_node_nr_entries(next, 0), i; + + if (nr_entries + nr_next <= KEYS_PER_NODE) { + /* + * Merge the next node into this node so that we don't + * have to do an additional update of the keys in the + * higher levels. + */ + for (i = 0; i < nr_next; i++) { + node->keys[nr_entries + i] = next->keys[i]; + node->ptrs[nr_entries + i] = next->ptrs[i]; + } + + ++*pos; + return next; + } + } + + return NULL; +} + +static void +xfs_iext_remove_node( + struct xfs_ifork *ifp, + xfs_fileoff_t offset, + void *victim) +{ + struct xfs_iext_node *node, *parent; + int level = 2, pos, nr_entries, i; + + ASSERT(level <= ifp->if_height); + node = xfs_iext_find_level(ifp, offset, level); + pos = xfs_iext_node_pos(node, offset); +again: + ASSERT(node->ptrs[pos]); + ASSERT(node->ptrs[pos] == victim); + kmem_free(victim); + + nr_entries = xfs_iext_node_nr_entries(node, pos) - 1; + offset = node->keys[0]; + for (i = pos; i < nr_entries; i++) { + node->keys[i] = node->keys[i + 1]; + node->ptrs[i] = node->ptrs[i + 1]; + } + node->keys[nr_entries] = XFS_IEXT_KEY_INVALID; + node->ptrs[nr_entries] = NULL; + + if (pos == 0 && nr_entries > 0) { + xfs_iext_update_node(ifp, offset, node->keys[0], level, node); + offset = node->keys[0]; + } + + if (nr_entries >= KEYS_PER_NODE / 2) + return; + + if (level < ifp->if_height) { + /* + * If we aren't at the root yet try to find a neighbour node to + * merge with (or delete the node if it is empty), and then + * recurse up to the next level. + */ + level++; + parent = xfs_iext_find_level(ifp, offset, level); + pos = xfs_iext_node_pos(parent, offset); + + ASSERT(pos != KEYS_PER_NODE); + ASSERT(parent->ptrs[pos] == node); + + node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries); + if (node) { + victim = node; + node = parent; + goto again; + } + } else if (nr_entries == 1) { + /* + * If we are at the root and only one entry is left we can just + * free this node and update the root pointer. + */ + ASSERT(node == ifp->if_u1.if_root); + ifp->if_u1.if_root = node->ptrs[0]; + ifp->if_height--; + kmem_free(node); + } +} + +static void +xfs_iext_rebalance_leaf( + struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur, + struct xfs_iext_leaf *leaf, + xfs_fileoff_t offset, + int nr_entries) +{ + /* + * If the neighbouring nodes are completely full we might never be able + * to merge our node, and will only delete it once the number of + * entries hits zero. + */ + if (nr_entries == 0) + goto remove_node; + + if (leaf->prev) { + int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i; + + if (nr_prev + nr_entries <= RECS_PER_LEAF) { + for (i = 0; i < nr_entries; i++) + leaf->prev->recs[nr_prev + i] = leaf->recs[i]; + + if (cur->leaf == leaf) { + cur->leaf = leaf->prev; + cur->pos += nr_prev; + } + goto remove_node; + } + } + + if (leaf->next) { + int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i; + + if (nr_entries + nr_next <= RECS_PER_LEAF) { + /* + * Merge the next node into this node so that we don't + * have to do an additional update of the keys in the + * higher levels. + */ + for (i = 0; i < nr_next; i++) { + leaf->recs[nr_entries + i] = + leaf->next->recs[i]; + } + + if (cur->leaf == leaf->next) { + cur->leaf = leaf; + cur->pos += nr_entries; + } + + offset = xfs_iext_leaf_key(leaf->next, 0); + leaf = leaf->next; + goto remove_node; + } + } + + return; +remove_node: + if (leaf->prev) + leaf->prev->next = leaf->next; + if (leaf->next) + leaf->next->prev = leaf->prev; + xfs_iext_remove_node(ifp, offset, leaf); +} + +static void +xfs_iext_free_last_leaf( + struct xfs_ifork *ifp) +{ + ifp->if_height--; + kmem_free(ifp->if_u1.if_root); + ifp->if_u1.if_root = NULL; +} + +void +xfs_iext_remove( + struct xfs_inode *ip, + struct xfs_iext_cursor *cur, + int state) +{ + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); + struct xfs_iext_leaf *leaf = cur->leaf; + xfs_fileoff_t offset = xfs_iext_leaf_key(leaf, 0); + int i, nr_entries; + + trace_xfs_iext_remove(ip, cur, state, _RET_IP_); + + ASSERT(ifp->if_height > 0); + ASSERT(ifp->if_u1.if_root != NULL); + ASSERT(xfs_iext_valid(ifp, cur)); + + nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1; + for (i = cur->pos; i < nr_entries; i++) + leaf->recs[i] = leaf->recs[i + 1]; + xfs_iext_rec_clear(&leaf->recs[nr_entries]); + ifp->if_bytes -= sizeof(struct xfs_iext_rec); + + if (cur->pos == 0 && nr_entries > 0) { + xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1, + leaf); + offset = xfs_iext_leaf_key(leaf, 0); + } else if (cur->pos == nr_entries) { + if (ifp->if_height > 1 && leaf->next) + cur->leaf = leaf->next; + else + cur->leaf = NULL; + cur->pos = 0; + } + + if (nr_entries >= RECS_PER_LEAF / 2) + return; + + if (ifp->if_height > 1) + xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries); + else if (nr_entries == 0) + xfs_iext_free_last_leaf(ifp); +} + +/* + * Lookup the extent covering bno. + * + * If there is an extent covering bno return the extent index, and store the + * expanded extent structure in *gotp, and the extent cursor in *cur. + * If there is no extent covering bno, but there is an extent after it (e.g. + * it lies in a hole) return that extent in *gotp and its cursor in *cur + * instead. + * If bno is beyond the last extent return false, and return an invalid + * cursor value. + */ +bool +xfs_iext_lookup_extent( + struct xfs_inode *ip, + struct xfs_ifork *ifp, + xfs_fileoff_t offset, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *gotp) +{ + XFS_STATS_INC(ip->i_mount, xs_look_exlist); + + cur->leaf = xfs_iext_find_level(ifp, offset, 1); + if (!cur->leaf) { + cur->pos = 0; + return false; + } + + for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) { + struct xfs_iext_rec *rec = cur_rec(cur); + + if (xfs_iext_rec_is_empty(rec)) + break; + if (xfs_iext_rec_cmp(rec, offset) >= 0) + goto found; + } + + /* Try looking in the next node for an entry > offset */ + if (ifp->if_height == 1 || !cur->leaf->next) + return false; + cur->leaf = cur->leaf->next; + cur->pos = 0; + if (!xfs_iext_valid(ifp, cur)) + return false; +found: + xfs_iext_get(gotp, cur_rec(cur)); + return true; +} + +/* + * Returns the last extent before end, and if this extent doesn't cover + * end, update end to the end of the extent. + */ +bool +xfs_iext_lookup_extent_before( + struct xfs_inode *ip, + struct xfs_ifork *ifp, + xfs_fileoff_t *end, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *gotp) +{ + /* could be optimized to not even look up the next on a match.. */ + if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) && + gotp->br_startoff <= *end - 1) + return true; + if (!xfs_iext_prev_extent(ifp, cur, gotp)) + return false; + *end = gotp->br_startoff + gotp->br_blockcount; + return true; +} + +void +xfs_iext_update_extent( + struct xfs_inode *ip, + int state, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *new) +{ + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); + + if (cur->pos == 0) { + struct xfs_bmbt_irec old; + + xfs_iext_get(&old, cur_rec(cur)); + if (new->br_startoff != old.br_startoff) { + xfs_iext_update_node(ifp, old.br_startoff, + new->br_startoff, 1, cur->leaf); + } + } + + trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_); + xfs_iext_set(cur_rec(cur), new); + trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_); +} + +/* + * Return true if the cursor points at an extent and return the extent structure + * in gotp. Else return false. + */ +bool +xfs_iext_get_extent( + struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *gotp) +{ + if (!xfs_iext_valid(ifp, cur)) + return false; + xfs_iext_get(gotp, cur_rec(cur)); + return true; +} + +/* + * This is a recursive function, because of that we need to be extremely + * careful with stack usage. + */ +static void +xfs_iext_destroy_node( + struct xfs_iext_node *node, + int level) +{ + int i; + + if (level > 1) { + for (i = 0; i < KEYS_PER_NODE; i++) { + if (node->keys[i] == XFS_IEXT_KEY_INVALID) + break; + xfs_iext_destroy_node(node->ptrs[i], level - 1); + } + } + + kmem_free(node); +} + +void +xfs_iext_destroy( + struct xfs_ifork *ifp) +{ + xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height); + + ifp->if_bytes = 0; + ifp->if_height = 0; + ifp->if_u1.if_root = NULL; +} diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 378f8fbc91a7..ef68b1de006a 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -24,6 +24,7 @@ #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_cksum.h" #include "xfs_icache.h" @@ -31,6 +32,8 @@ #include "xfs_ialloc.h" #include "xfs_dir2.h" +#include <linux/iversion.h> + /* * Check that none of the inode's in the buffer have a next * unlinked field of 0. @@ -90,20 +93,26 @@ xfs_inode_buf_verify( bool readahead) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_agnumber_t agno; int i; int ni; /* * Validate the magic number and version of every inode in the buffer */ + agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { int di_ok; xfs_dinode_t *dip; + xfs_agino_t unlinked_ino; dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); + unlinked_ino = be32_to_cpu(dip->di_next_unlinked); di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && - xfs_dinode_good_version(mp, dip->di_version); + xfs_dinode_good_version(mp, dip->di_version) && + (unlinked_ino == NULLAGINO || + xfs_verify_agino(mp, agno, unlinked_ino)); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP))) { if (readahead) { @@ -112,17 +121,18 @@ xfs_inode_buf_verify( return; } - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", (unsigned long long)bp->b_bn, i, be16_to_cpu(dip->di_magic)); #endif + xfs_buf_verifier_error(bp, -EFSCORRUPTED, + __func__, dip, sizeof(*dip), + NULL); + return; } } - xfs_inobp_check(mp, bp); } @@ -263,7 +273,8 @@ xfs_inode_from_disk( to->di_flags = be16_to_cpu(from->di_flags); if (to->di_version == 3) { - inode->i_version = be64_to_cpu(from->di_changecount); + inode_set_iversion_queried(inode, + be64_to_cpu(from->di_changecount)); to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); to->di_flags2 = be64_to_cpu(from->di_flags2); @@ -313,7 +324,7 @@ xfs_inode_to_disk( to->di_flags = cpu_to_be16(from->di_flags); if (from->di_version == 3) { - to->di_changecount = cpu_to_be64(inode->i_version); + to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); @@ -380,7 +391,7 @@ xfs_log_dinode_to_disk( } } -bool +xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, xfs_ino_t ino, @@ -389,53 +400,122 @@ xfs_dinode_verify( uint16_t mode; uint16_t flags; uint64_t flags2; + uint64_t di_size; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) - return false; + return __this_address; + + /* Verify v3 integrity information first */ + if (dip->di_version >= 3) { + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return __this_address; + if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, + XFS_DINODE_CRC_OFF)) + return __this_address; + if (be64_to_cpu(dip->di_ino) != ino) + return __this_address; + if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + } /* don't allow invalid i_size */ - if (be64_to_cpu(dip->di_size) & (1ULL << 63)) - return false; + di_size = be64_to_cpu(dip->di_size); + if (di_size & (1ULL << 63)) + return __this_address; mode = be16_to_cpu(dip->di_mode); if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) - return false; + return __this_address; /* No zero-length symlinks/dirs. */ - if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) - return false; + if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) + return __this_address; + + /* Fork checks carried over from xfs_iformat_fork */ + if (mode && + be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > + be64_to_cpu(dip->di_nblocks)) + return __this_address; + + if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) + return __this_address; + + flags = be16_to_cpu(dip->di_flags); + + if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) + return __this_address; + + /* Do we have appropriate data fork formats for the mode? */ + switch (mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + if (dip->di_format != XFS_DINODE_FMT_DEV) + return __this_address; + break; + case S_IFREG: + case S_IFLNK: + case S_IFDIR: + switch (dip->di_format) { + case XFS_DINODE_FMT_LOCAL: + /* + * no local regular files yet + */ + if (S_ISREG(mode)) + return __this_address; + if (di_size > XFS_DFORK_DSIZE(dip, mp)) + return __this_address; + /* fall through */ + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + break; + default: + return __this_address; + } + break; + case 0: + /* Uninitialized inode ok. */ + break; + default: + return __this_address; + } + + if (XFS_DFORK_Q(dip)) { + switch (dip->di_aformat) { + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + break; + default: + return __this_address; + } + } /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) - return true; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, - XFS_DINODE_CRC_OFF)) - return false; - if (be64_to_cpu(dip->di_ino) != ino) - return false; - if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return NULL; - flags = be16_to_cpu(dip->di_flags); flags2 = be64_to_cpu(dip->di_flags2); /* don't allow reflink/cowextsize if we don't have reflink */ if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && !xfs_sb_version_hasreflink(&mp->m_sb)) - return false; + return __this_address; + + /* only regular files get reflink */ + if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) + return __this_address; /* don't let reflink and realtime mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) - return false; + return __this_address; /* don't let reflink and dax mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) - return false; + return __this_address; - return true; + return NULL; } void @@ -475,6 +555,7 @@ xfs_iread( { xfs_buf_t *bp; xfs_dinode_t *dip; + xfs_failaddr_t fa; int error; /* @@ -491,10 +572,7 @@ xfs_iread( /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); VFS_I(ip)->i_generation = prandom_u32(); - if (xfs_sb_version_hascrc(&mp->m_sb)) - ip->i_d.di_version = 3; - else - ip->i_d.di_version = 2; + ip->i_d.di_version = 3; return 0; } @@ -506,11 +584,10 @@ xfs_iread( return error; /* even unallocated inodes are verified */ - if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { - xfs_alert(mp, "%s: validation failed for inode %lld", - __func__, ip->i_ino); - - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); + fa = xfs_dinode_verify(mp, ip->i_ino, dip); + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, + sizeof(*dip), fa); error = -EFSCORRUPTED; goto out_brelse; } @@ -577,3 +654,108 @@ xfs_iread( xfs_trans_brelse(tp, bp); return error; } + +/* + * Validate di_extsize hint. + * + * The rules are documented at xfs_ioctl_setattr_check_extsize(). + * These functions must be kept in sync with each other. + */ +xfs_failaddr_t +xfs_inode_validate_extsize( + struct xfs_mount *mp, + uint32_t extsize, + uint16_t mode, + uint16_t flags) +{ + bool rt_flag; + bool hint_flag; + bool inherit_flag; + uint32_t extsize_bytes; + uint32_t blocksize_bytes; + + rt_flag = (flags & XFS_DIFLAG_REALTIME); + hint_flag = (flags & XFS_DIFLAG_EXTSIZE); + inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); + extsize_bytes = XFS_FSB_TO_B(mp, extsize); + + if (rt_flag) + blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; + else + blocksize_bytes = mp->m_sb.sb_blocksize; + + if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) + return __this_address; + + if (hint_flag && !S_ISREG(mode)) + return __this_address; + + if (inherit_flag && !S_ISDIR(mode)) + return __this_address; + + if ((hint_flag || inherit_flag) && extsize == 0) + return __this_address; + + if (!(hint_flag || inherit_flag) && extsize != 0) + return __this_address; + + if (extsize_bytes % blocksize_bytes) + return __this_address; + + if (extsize > MAXEXTLEN) + return __this_address; + + if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) + return __this_address; + + return NULL; +} + +/* + * Validate di_cowextsize hint. + * + * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). + * These functions must be kept in sync with each other. + */ +xfs_failaddr_t +xfs_inode_validate_cowextsize( + struct xfs_mount *mp, + uint32_t cowextsize, + uint16_t mode, + uint16_t flags, + uint64_t flags2) +{ + bool rt_flag; + bool hint_flag; + uint32_t cowextsize_bytes; + + rt_flag = (flags & XFS_DIFLAG_REALTIME); + hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); + cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); + + if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb)) + return __this_address; + + if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) + return __this_address; + + if (hint_flag && cowextsize == 0) + return __this_address; + + if (!hint_flag && cowextsize != 0) + return __this_address; + + if (hint_flag && rt_flag) + return __this_address; + + if (cowextsize_bytes % mp->m_sb.sb_blocksize) + return __this_address; + + if (cowextsize > MAXEXTLEN) + return __this_address; + + if (cowextsize > mp->m_sb.sb_agblocks / 2) + return __this_address; + + return NULL; +} diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index a9c97a356c30..d9a376a78ee2 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -82,7 +82,12 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #define xfs_inobp_check(mp, bp) #endif /* DEBUG */ -bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, - struct xfs_dinode *dip); +xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, + struct xfs_dinode *dip); +xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, + uint32_t extsize, uint16_t mode, uint16_t flags); +xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, + uint32_t cowextsize, uint16_t mode, uint16_t flags, + uint64_t flags2); #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 31840ca24018..701c42a28d05 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -35,6 +35,8 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" +#include "xfs_attr_leaf.h" +#include "xfs_shared.h" kmem_zone_t *xfs_ifork_zone; @@ -43,90 +45,32 @@ STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); /* - * Move inode type and inode format specific information from the - * on-disk inode to the in-core inode. For fifos, devs, and sockets - * this means set if_rdev to the proper value. For files, directories, - * and symlinks this means to bring in the in-line data or extent - * pointers. For a file in B-tree format, only the root is immediately - * brought in-core. The rest will be in-lined in if_extents when it - * is first referenced (see xfs_iread_extents()). + * Copy inode type and data and attr format specific information from the + * on-disk inode to the in-core inode and fork structures. For fifos, devices, + * and sockets this means set i_rdev to the proper value. For files, + * directories, and symlinks this means to bring in the in-line data or extent + * pointers as well as the attribute fork. For a fork in B-tree format, only + * the root is immediately brought in-core. The rest will be read in later when + * first referenced (see xfs_iread_extents()). */ int xfs_iformat_fork( - xfs_inode_t *ip, - xfs_dinode_t *dip) + struct xfs_inode *ip, + struct xfs_dinode *dip) { - xfs_attr_shortform_t *atp; + struct inode *inode = VFS_I(ip); + struct xfs_attr_shortform *atp; int size; int error = 0; xfs_fsize_t di_size; - if (unlikely(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents) > - be64_to_cpu(dip->di_nblocks))) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", - (unsigned long long)ip->i_ino, - (int)(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents)), - (unsigned long long) - be64_to_cpu(dip->di_nblocks)); - XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { - xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", - (unsigned long long)ip->i_ino, - dip->di_forkoff); - XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && - !ip->i_mount->m_rtdev_targp)) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, has realtime flag set.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(xfs_is_reflink_inode(ip) && - (VFS_I(ip)->i_mode & S_IFMT) != S_IFREG)) { - xfs_warn(ip->i_mount, - "corrupt dinode %llu, wrong file type for reflink.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(xfs_is_reflink_inode(ip) && - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) { - xfs_warn(ip->i_mount, - "corrupt dinode %llu, has reflink+realtime flag set.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - - switch (VFS_I(ip)->i_mode & S_IFMT) { + switch (inode->i_mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK: - if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { - XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } ip->i_d.di_size = 0; - ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); + inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); break; case S_IFREG: @@ -134,32 +78,7 @@ xfs_iformat_fork( case S_IFDIR: switch (dip->di_format) { case XFS_DINODE_FMT_LOCAL: - /* - * no local regular files yet - */ - if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (local format for regular file).", - (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(4)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - di_size = be64_to_cpu(dip->di_size); - if (unlikely(di_size < 0 || - di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad size %Ld for local inode).", - (unsigned long long) ip->i_ino, - (long long) di_size); - XFS_CORRUPTION_ERROR("xfs_iformat(5)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - size = (int)di_size; error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); break; @@ -170,29 +89,16 @@ xfs_iformat_fork( error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); break; default: - XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, - ip->i_mount); return -EFSCORRUPTED; } break; default: - XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); return -EFSCORRUPTED; } if (error) return error; - /* Check inline dir contents. */ - if (S_ISDIR(VFS_I(ip)->i_mode) && - dip->di_format == XFS_DINODE_FMT_LOCAL) { - error = xfs_dir2_sf_verify(ip); - if (error) { - xfs_idestroy_fork(ip, XFS_DATA_FORK); - return error; - } - } - if (xfs_is_reflink_inode(ip)) { ASSERT(ip->i_cowfp == NULL); xfs_ifork_init_cow(ip); @@ -209,18 +115,6 @@ xfs_iformat_fork( atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); size = be16_to_cpu(atp->hdr.totsize); - if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad attr fork size %Ld).", - (unsigned long long) ip->i_ino, - (long long) size); - XFS_CORRUPTION_ERROR("xfs_iformat(8)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - error = -EFSCORRUPTED; - break; - } - error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); break; case XFS_DINODE_FMT_EXTENTS: @@ -265,19 +159,14 @@ xfs_init_local_fork( if (zero_terminate) mem_size++; - if (size == 0) - ifp->if_u1.if_data = NULL; - else if (mem_size <= sizeof(ifp->if_u2.if_inline_data)) - ifp->if_u1.if_data = ifp->if_u2.if_inline_data; - else { + if (size) { real_size = roundup(mem_size, 4); ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); - } - - if (size) { memcpy(ifp->if_u1.if_data, data, size); if (zero_terminate) ifp->if_u1.if_data[size] = '\0'; + } else { + ifp->if_u1.if_data = NULL; } ifp->if_bytes = size; @@ -288,13 +177,6 @@ xfs_init_local_fork( /* * The file is in-lined in the on-disk inode. - * If it fits into if_inline_data, then copy - * it there, otherwise allocate a buffer for it - * and copy the data there. Either way, set - * if_data to point at the data. - * If we allocate a buffer for the data, make - * sure that its size is a multiple of 4 and - * record the real size in i_real_bytes. */ STATIC int xfs_iformat_local( @@ -313,8 +195,9 @@ xfs_iformat_local( "corrupt inode %Lu (bad size %d for local fork, size = %d).", (unsigned long long) ip->i_ino, size, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); - XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + "xfs_iformat_local", dip, sizeof(*dip), + __this_address); return -EFSCORRUPTED; } @@ -324,9 +207,7 @@ xfs_iformat_local( /* * The file consists of a set of extents all of which fit into the on-disk - * inode. If there are few enough extents to fit into the if_inline_ext, then - * copy them there. Otherwise allocate a buffer for them and copy them into it. - * Either way, set if_extents to point at the extents. + * inode. */ STATIC int xfs_iformat_extents( @@ -336,9 +217,12 @@ xfs_iformat_extents( { struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + int state = xfs_bmap_fork_to_state(whichfork); int nex = XFS_DFORK_NEXTENTS(dip, whichfork); int size = nex * sizeof(xfs_bmbt_rec_t); + struct xfs_iext_cursor icur; struct xfs_bmbt_rec *dp; + struct xfs_bmbt_irec new; int i; /* @@ -348,33 +232,36 @@ xfs_iformat_extents( if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", (unsigned long long) ip->i_ino, nex); - XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, - mp, dip); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + "xfs_iformat_extents(1)", dip, sizeof(*dip), + __this_address); return -EFSCORRUPTED; } ifp->if_real_bytes = 0; - if (nex == 0) - ifp->if_u1.if_extents = NULL; - else if (nex <= XFS_INLINE_EXTS) - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - else - xfs_iext_add(ifp, 0, nex); - - ifp->if_bytes = size; + ifp->if_bytes = 0; + ifp->if_u1.if_root = NULL; + ifp->if_height = 0; if (size) { dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); + + xfs_iext_first(ifp, &icur); for (i = 0; i < nex; i++, dp++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); - ep->l0 = get_unaligned_be64(&dp->l0); - ep->l1 = get_unaligned_be64(&dp->l1); - if (!xfs_bmbt_validate_extent(mp, whichfork, ep)) { - XFS_ERROR_REPORT("xfs_iformat_extents(2)", - XFS_ERRLEVEL_LOW, mp); + xfs_failaddr_t fa; + + xfs_bmbt_disk_get_all(dp, &new); + fa = xfs_bmap_validate_extent(ip, whichfork, &new); + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + "xfs_iformat_extents(2)", + dp, sizeof(*dp), fa); return -EFSCORRUPTED; } + + xfs_iext_insert(ip, &icur, &new, state); + trace_xfs_read_extent(ip, &icur, state, _THIS_IP_); + xfs_iext_next(ifp, &icur); } - XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); } ifp->if_flags |= XFS_IFEXTENTS; return 0; @@ -417,14 +304,16 @@ xfs_iformat_btree( */ if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= XFS_IFORK_MAXEXT(ip, whichfork) || + nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || level == 0 || level > XFS_BTREE_MAXLEVELS) { xfs_warn(mp, "corrupt inode %Lu (btree).", (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, - mp, dip); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, + "xfs_iformat_btree", dfp, size, + __this_address); return -EFSCORRUPTED; } @@ -440,47 +329,14 @@ xfs_iformat_btree( ifp->if_flags &= ~XFS_IFEXTENTS; ifp->if_flags |= XFS_IFBROOT; + ifp->if_real_bytes = 0; + ifp->if_bytes = 0; + ifp->if_u1.if_root = NULL; + ifp->if_height = 0; return 0; } /* - * Read in extents from a btree-format inode. - * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. - */ -int -xfs_iread_extents( - xfs_trans_t *tp, - xfs_inode_t *ip, - int whichfork) -{ - int error; - xfs_ifork_t *ifp; - xfs_extnum_t nextents; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { - XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, - ip->i_mount); - return -EFSCORRUPTED; - } - nextents = XFS_IFORK_NEXTENTS(ip, whichfork); - ifp = XFS_IFORK_PTR(ip, whichfork); - - /* - * We know that the size is valid (it's checked in iformat_btree) - */ - ifp->if_bytes = ifp->if_real_bytes = 0; - xfs_iext_add(ifp, 0, nextents); - error = xfs_bmap_read_extents(tp, ip, whichfork); - if (error) { - xfs_iext_destroy(ifp); - return error; - } - ifp->if_flags |= XFS_IFEXTENTS; - return 0; -} -/* * Reallocate the space for if_broot based on the number of records * being added or deleted as indicated in rec_diff. Move the records * and pointers in if_broot to fit the new size. When shrinking this @@ -644,26 +500,9 @@ xfs_idata_realloc( ASSERT(new_size >= 0); if (new_size == 0) { - if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { - kmem_free(ifp->if_u1.if_data); - } + kmem_free(ifp->if_u1.if_data); ifp->if_u1.if_data = NULL; real_size = 0; - } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { - /* - * If the valid extents/data can fit in if_inline_ext/data, - * copy them from the malloc'd vector and free it. - */ - if (ifp->if_u1.if_data == NULL) { - ifp->if_u1.if_data = ifp->if_u2.if_inline_data; - } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { - ASSERT(ifp->if_real_bytes != 0); - memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, - new_size); - kmem_free(ifp->if_u1.if_data); - ifp->if_u1.if_data = ifp->if_u2.if_inline_data; - } - real_size = 0; } else { /* * Stuck with malloc/realloc. @@ -677,7 +516,7 @@ xfs_idata_realloc( ASSERT(ifp->if_real_bytes == 0); ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); - } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { + } else { /* * Only do the realloc if the underlying size * is really changing. @@ -688,12 +527,6 @@ xfs_idata_realloc( real_size, KM_SLEEP | KM_NOFS); } - } else { - ASSERT(ifp->if_real_bytes == 0); - ifp->if_u1.if_data = kmem_alloc(real_size, - KM_SLEEP | KM_NOFS); - memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, - ifp->if_bytes); } } ifp->if_real_bytes = real_size; @@ -721,23 +554,18 @@ xfs_idestroy_fork( * so check and free it up if we do. */ if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && - (ifp->if_u1.if_data != NULL)) { + if (ifp->if_u1.if_data != NULL) { ASSERT(ifp->if_real_bytes != 0); kmem_free(ifp->if_u1.if_data); ifp->if_u1.if_data = NULL; ifp->if_real_bytes = 0; } - } else if ((ifp->if_flags & XFS_IFEXTENTS) && - ((ifp->if_flags & XFS_IFEXTIREC) || - ((ifp->if_u1.if_extents != NULL) && - (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { - ASSERT(ifp->if_real_bytes != 0); + } else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) { xfs_iext_destroy(ifp); } - ASSERT(ifp->if_u1.if_extents == NULL || - ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); + ASSERT(ifp->if_real_bytes == 0); + if (whichfork == XFS_ATTR_FORK) { kmem_zone_free(xfs_ifork_zone, ip->i_afp); ip->i_afp = NULL; @@ -747,19 +575,9 @@ xfs_idestroy_fork( } } -/* Count number of incore extents based on if_bytes */ -xfs_extnum_t -xfs_iext_count(struct xfs_ifork *ifp) -{ - return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); -} - /* * Convert in-core extents to on-disk form * - * For either the data or attr fork in extent format, we need to endian convert - * the in-core extent as we place them into the on-disk inode. - * * In the case of the data fork, the in-core and on-disk fork sizes can be * different due to delayed allocation extents. We only copy on-disk extents * here, so callers must always use the physical fork size to determine the @@ -768,53 +586,32 @@ xfs_iext_count(struct xfs_ifork *ifp) */ int xfs_iextents_copy( - xfs_inode_t *ip, - xfs_bmbt_rec_t *dp, + struct xfs_inode *ip, + struct xfs_bmbt_rec *dp, int whichfork) { - int copied; - int i; - xfs_ifork_t *ifp; - int nrecs; - xfs_fsblock_t start_block; + int state = xfs_bmap_fork_to_state(whichfork); + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec rec; + int copied = 0; - ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); ASSERT(ifp->if_bytes > 0); - nrecs = xfs_iext_count(ifp); - XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); - ASSERT(nrecs > 0); - - /* - * There are some delayed allocation extents in the - * inode, so copy the extents one at a time and skip - * the delayed ones. There must be at least one - * non-delayed extent. - */ - copied = 0; - for (i = 0; i < nrecs; i++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); - - ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, ep)); - - start_block = xfs_bmbt_get_startblock(ep); - if (isnullstartblock(start_block)) { - /* - * It's a delayed allocation extent, so skip it. - */ + for_each_xfs_iext(ifp, &icur, &rec) { + if (isnullstartblock(rec.br_startblock)) continue; - } - - /* Translate to on disk format */ - put_unaligned_be64(ep->l0, &dp->l0); - put_unaligned_be64(ep->l1, &dp->l1); + ASSERT(xfs_bmap_validate_extent(ip, whichfork, &rec) == NULL); + xfs_bmbt_disk_set_all(dp, &rec); + trace_xfs_write_extent(ip, &icur, state, _RET_IP_); + copied += sizeof(struct xfs_bmbt_rec); dp++; - copied++; } - ASSERT(copied != 0); - return (copied * (uint)sizeof(xfs_bmbt_rec_t)); + ASSERT(copied > 0); + ASSERT(copied <= ifp->if_bytes); + return copied; } /* @@ -872,7 +669,6 @@ xfs_iflush_fork( !(iip->ili_fields & extflag[whichfork])); if ((iip->ili_fields & extflag[whichfork]) && (ifp->if_bytes > 0)) { - ASSERT(xfs_iext_get_ext(ifp, 0)); ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, whichfork); @@ -894,16 +690,8 @@ xfs_iflush_fork( case XFS_DINODE_FMT_DEV: if (iip->ili_fields & XFS_ILOG_DEV) { ASSERT(whichfork == XFS_DATA_FORK); - xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); - } - break; - - case XFS_DINODE_FMT_UUID: - if (iip->ili_fields & XFS_ILOG_UUID) { - ASSERT(whichfork == XFS_DATA_FORK); - memcpy(XFS_DFORK_DPTR(dip), - &ip->i_df.if_u2.if_uuid, - sizeof(uuid_t)); + xfs_dinode_put_rdev(dip, + linux_to_xfs_dev_t(VFS_I(ip)->i_rdev)); } break; @@ -913,33 +701,6 @@ xfs_iflush_fork( } } -/* - * Return a pointer to the extent record at file index idx. - */ -xfs_bmbt_rec_host_t * -xfs_iext_get_ext( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx) /* index of target extent */ -{ - ASSERT(idx >= 0); - ASSERT(idx < xfs_iext_count(ifp)); - - if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { - return ifp->if_u1.if_ext_irec->er_extbuf; - } else if (ifp->if_flags & XFS_IFEXTIREC) { - xfs_ext_irec_t *erp; /* irec pointer */ - int erp_idx = 0; /* irec index */ - xfs_extnum_t page_idx = idx; /* ext index in target list */ - - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); - return &erp->er_extbuf[page_idx]; - } else if (ifp->if_bytes) { - return &ifp->if_u1.if_extents[idx]; - } else { - return NULL; - } -} - /* Convert bmap state flags to an inode fork. */ struct xfs_ifork * xfs_iext_state_to_fork( @@ -954,1011 +715,6 @@ xfs_iext_state_to_fork( } /* - * Insert new item(s) into the extent records for incore inode - * fork 'ifp'. 'count' new items are inserted at index 'idx'. - */ -void -xfs_iext_insert( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* starting index of new items */ - xfs_extnum_t count, /* number of inserted items */ - xfs_bmbt_irec_t *new, /* items to insert */ - int state) /* type of extent conversion */ -{ - xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state); - xfs_extnum_t i; /* extent record index */ - - trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); - - ASSERT(ifp->if_flags & XFS_IFEXTENTS); - xfs_iext_add(ifp, idx, count); - for (i = idx; i < idx + count; i++, new++) - xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); -} - -/* - * This is called when the amount of space required for incore file - * extents needs to be increased. The ext_diff parameter stores the - * number of new extents being added and the idx parameter contains - * the extent index where the new extents will be added. If the new - * extents are being appended, then we just need to (re)allocate and - * initialize the space. Otherwise, if the new extents are being - * inserted into the middle of the existing entries, a bit more work - * is required to make room for the new extents to be inserted. The - * caller is responsible for filling in the new extent entries upon - * return. - */ -void -xfs_iext_add( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx, /* index to begin adding exts */ - int ext_diff) /* number of extents to add */ -{ - int byte_diff; /* new bytes being added */ - int new_size; /* size of extents after adding */ - xfs_extnum_t nextents; /* number of extents in file */ - - nextents = xfs_iext_count(ifp); - ASSERT((idx >= 0) && (idx <= nextents)); - byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); - new_size = ifp->if_bytes + byte_diff; - /* - * If the new number of extents (nextents + ext_diff) - * fits inside the inode, then continue to use the inline - * extent buffer. - */ - if (nextents + ext_diff <= XFS_INLINE_EXTS) { - if (idx < nextents) { - memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], - &ifp->if_u2.if_inline_ext[idx], - (nextents - idx) * sizeof(xfs_bmbt_rec_t)); - memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); - } - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - ifp->if_real_bytes = 0; - } - /* - * Otherwise use a linear (direct) extent list. - * If the extents are currently inside the inode, - * xfs_iext_realloc_direct will switch us from - * inline to direct extent allocation mode. - */ - else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { - xfs_iext_realloc_direct(ifp, new_size); - if (idx < nextents) { - memmove(&ifp->if_u1.if_extents[idx + ext_diff], - &ifp->if_u1.if_extents[idx], - (nextents - idx) * sizeof(xfs_bmbt_rec_t)); - memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); - } - } - /* Indirection array */ - else { - xfs_ext_irec_t *erp; - int erp_idx = 0; - int page_idx = idx; - - ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); - if (ifp->if_flags & XFS_IFEXTIREC) { - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); - } else { - xfs_iext_irec_init(ifp); - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - erp = ifp->if_u1.if_ext_irec; - } - /* Extents fit in target extent page */ - if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { - if (page_idx < erp->er_extcount) { - memmove(&erp->er_extbuf[page_idx + ext_diff], - &erp->er_extbuf[page_idx], - (erp->er_extcount - page_idx) * - sizeof(xfs_bmbt_rec_t)); - memset(&erp->er_extbuf[page_idx], 0, byte_diff); - } - erp->er_extcount += ext_diff; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); - } - /* Insert a new extent page */ - else if (erp) { - xfs_iext_add_indirect_multi(ifp, - erp_idx, page_idx, ext_diff); - } - /* - * If extent(s) are being appended to the last page in - * the indirection array and the new extent(s) don't fit - * in the page, then erp is NULL and erp_idx is set to - * the next index needed in the indirection array. - */ - else { - uint count = ext_diff; - - while (count) { - erp = xfs_iext_irec_new(ifp, erp_idx); - erp->er_extcount = min(count, XFS_LINEAR_EXTS); - count -= erp->er_extcount; - if (count) - erp_idx++; - } - } - } - ifp->if_bytes = new_size; -} - -/* - * This is called when incore extents are being added to the indirection - * array and the new extents do not fit in the target extent list. The - * erp_idx parameter contains the irec index for the target extent list - * in the indirection array, and the idx parameter contains the extent - * index within the list. The number of extents being added is stored - * in the count parameter. - * - * |-------| |-------| - * | | | | idx - number of extents before idx - * | idx | | count | - * | | | | count - number of extents being inserted at idx - * |-------| |-------| - * | count | | nex2 | nex2 - number of extents after idx + count - * |-------| |-------| - */ -void -xfs_iext_add_indirect_multi( - xfs_ifork_t *ifp, /* inode fork pointer */ - int erp_idx, /* target extent irec index */ - xfs_extnum_t idx, /* index within target list */ - int count) /* new extents being added */ -{ - int byte_diff; /* new bytes being added */ - xfs_ext_irec_t *erp; /* pointer to irec entry */ - xfs_extnum_t ext_diff; /* number of extents to add */ - xfs_extnum_t ext_cnt; /* new extents still needed */ - xfs_extnum_t nex2; /* extents after idx + count */ - xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ - int nlists; /* number of irec's (lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - nex2 = erp->er_extcount - idx; - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - - /* - * Save second part of target extent list - * (all extents past */ - if (nex2) { - byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); - nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); - memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); - erp->er_extcount -= nex2; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); - memset(&erp->er_extbuf[idx], 0, byte_diff); - } - - /* - * Add the new extents to the end of the target - * list, then allocate new irec record(s) and - * extent buffer(s) as needed to store the rest - * of the new extents. - */ - ext_cnt = count; - ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); - if (ext_diff) { - erp->er_extcount += ext_diff; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); - ext_cnt -= ext_diff; - } - while (ext_cnt) { - erp_idx++; - erp = xfs_iext_irec_new(ifp, erp_idx); - ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); - erp->er_extcount = ext_diff; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); - ext_cnt -= ext_diff; - } - - /* Add nex2 extents back to indirection array */ - if (nex2) { - xfs_extnum_t ext_avail; - int i; - - byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); - ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; - i = 0; - /* - * If nex2 extents fit in the current page, append - * nex2_ep after the new extents. - */ - if (nex2 <= ext_avail) { - i = erp->er_extcount; - } - /* - * Otherwise, check if space is available in the - * next page. - */ - else if ((erp_idx < nlists - 1) && - (nex2 <= (ext_avail = XFS_LINEAR_EXTS - - ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { - erp_idx++; - erp++; - /* Create a hole for nex2 extents */ - memmove(&erp->er_extbuf[nex2], erp->er_extbuf, - erp->er_extcount * sizeof(xfs_bmbt_rec_t)); - } - /* - * Final choice, create a new extent page for - * nex2 extents. - */ - else { - erp_idx++; - erp = xfs_iext_irec_new(ifp, erp_idx); - } - memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); - kmem_free(nex2_ep); - erp->er_extcount += nex2; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); - } -} - -/* - * This is called when the amount of space required for incore file - * extents needs to be decreased. The ext_diff parameter stores the - * number of extents to be removed and the idx parameter contains - * the extent index where the extents will be removed from. - * - * If the amount of space needed has decreased below the linear - * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous - * extent array. Otherwise, use kmem_realloc() to adjust the - * size to what is needed. - */ -void -xfs_iext_remove( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* index to begin removing exts */ - int ext_diff, /* number of extents to remove */ - int state) /* type of extent conversion */ -{ - xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state); - xfs_extnum_t nextents; /* number of extents in file */ - int new_size; /* size of extents after removal */ - - trace_xfs_iext_remove(ip, idx, state, _RET_IP_); - - ASSERT(ext_diff > 0); - nextents = xfs_iext_count(ifp); - new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); - - if (new_size == 0) { - xfs_iext_destroy(ifp); - } else if (ifp->if_flags & XFS_IFEXTIREC) { - xfs_iext_remove_indirect(ifp, idx, ext_diff); - } else if (ifp->if_real_bytes) { - xfs_iext_remove_direct(ifp, idx, ext_diff); - } else { - xfs_iext_remove_inline(ifp, idx, ext_diff); - } - ifp->if_bytes = new_size; -} - -/* - * This removes ext_diff extents from the inline buffer, beginning - * at extent index idx. - */ -void -xfs_iext_remove_inline( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx, /* index to begin removing exts */ - int ext_diff) /* number of extents to remove */ -{ - int nextents; /* number of extents in file */ - - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - ASSERT(idx < XFS_INLINE_EXTS); - nextents = xfs_iext_count(ifp); - ASSERT(((nextents - ext_diff) > 0) && - (nextents - ext_diff) < XFS_INLINE_EXTS); - - if (idx + ext_diff < nextents) { - memmove(&ifp->if_u2.if_inline_ext[idx], - &ifp->if_u2.if_inline_ext[idx + ext_diff], - (nextents - (idx + ext_diff)) * - sizeof(xfs_bmbt_rec_t)); - memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], - 0, ext_diff * sizeof(xfs_bmbt_rec_t)); - } else { - memset(&ifp->if_u2.if_inline_ext[idx], 0, - ext_diff * sizeof(xfs_bmbt_rec_t)); - } -} - -/* - * This removes ext_diff extents from a linear (direct) extent list, - * beginning at extent index idx. If the extents are being removed - * from the end of the list (ie. truncate) then we just need to re- - * allocate the list to remove the extra space. Otherwise, if the - * extents are being removed from the middle of the existing extent - * entries, then we first need to move the extent records beginning - * at idx + ext_diff up in the list to overwrite the records being - * removed, then remove the extra space via kmem_realloc. - */ -void -xfs_iext_remove_direct( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx, /* index to begin removing exts */ - int ext_diff) /* number of extents to remove */ -{ - xfs_extnum_t nextents; /* number of extents in file */ - int new_size; /* size of extents after removal */ - - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - new_size = ifp->if_bytes - - (ext_diff * sizeof(xfs_bmbt_rec_t)); - nextents = xfs_iext_count(ifp); - - if (new_size == 0) { - xfs_iext_destroy(ifp); - return; - } - /* Move extents up in the list (if needed) */ - if (idx + ext_diff < nextents) { - memmove(&ifp->if_u1.if_extents[idx], - &ifp->if_u1.if_extents[idx + ext_diff], - (nextents - (idx + ext_diff)) * - sizeof(xfs_bmbt_rec_t)); - } - memset(&ifp->if_u1.if_extents[nextents - ext_diff], - 0, ext_diff * sizeof(xfs_bmbt_rec_t)); - /* - * Reallocate the direct extent list. If the extents - * will fit inside the inode then xfs_iext_realloc_direct - * will switch from direct to inline extent allocation - * mode for us. - */ - xfs_iext_realloc_direct(ifp, new_size); - ifp->if_bytes = new_size; -} - -/* - * This is called when incore extents are being removed from the - * indirection array and the extents being removed span multiple extent - * buffers. The idx parameter contains the file extent index where we - * want to begin removing extents, and the count parameter contains - * how many extents need to be removed. - * - * |-------| |-------| - * | nex1 | | | nex1 - number of extents before idx - * |-------| | count | - * | | | | count - number of extents being removed at idx - * | count | |-------| - * | | | nex2 | nex2 - number of extents after idx + count - * |-------| |-------| - */ -void -xfs_iext_remove_indirect( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t idx, /* index to begin removing extents */ - int count) /* number of extents to remove */ -{ - xfs_ext_irec_t *erp; /* indirection array pointer */ - int erp_idx = 0; /* indirection array index */ - xfs_extnum_t ext_cnt; /* extents left to remove */ - xfs_extnum_t ext_diff; /* extents to remove in current list */ - xfs_extnum_t nex1; /* number of extents before idx */ - xfs_extnum_t nex2; /* extents after idx + count */ - int page_idx = idx; /* index in target extent list */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); - ASSERT(erp != NULL); - nex1 = page_idx; - ext_cnt = count; - while (ext_cnt) { - nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); - ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); - /* - * Check for deletion of entire list; - * xfs_iext_irec_remove() updates extent offsets. - */ - if (ext_diff == erp->er_extcount) { - xfs_iext_irec_remove(ifp, erp_idx); - ext_cnt -= ext_diff; - nex1 = 0; - if (ext_cnt) { - ASSERT(erp_idx < ifp->if_real_bytes / - XFS_IEXT_BUFSZ); - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - nex1 = 0; - continue; - } else { - break; - } - } - /* Move extents up (if needed) */ - if (nex2) { - memmove(&erp->er_extbuf[nex1], - &erp->er_extbuf[nex1 + ext_diff], - nex2 * sizeof(xfs_bmbt_rec_t)); - } - /* Zero out rest of page */ - memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - - ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); - /* Update remaining counters */ - erp->er_extcount -= ext_diff; - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); - ext_cnt -= ext_diff; - nex1 = 0; - erp_idx++; - erp++; - } - ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); - xfs_iext_irec_compact(ifp); -} - -/* - * Create, destroy, or resize a linear (direct) block of extents. - */ -void -xfs_iext_realloc_direct( - xfs_ifork_t *ifp, /* inode fork pointer */ - int new_size) /* new size of extents after adding */ -{ - int rnew_size; /* real new size of extents */ - - rnew_size = new_size; - - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || - ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && - (new_size != ifp->if_real_bytes))); - - /* Free extent records */ - if (new_size == 0) { - xfs_iext_destroy(ifp); - } - /* Resize direct extent list and zero any new bytes */ - else if (ifp->if_real_bytes) { - /* Check if extents will fit inside the inode */ - if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { - xfs_iext_direct_to_inline(ifp, new_size / - (uint)sizeof(xfs_bmbt_rec_t)); - ifp->if_bytes = new_size; - return; - } - if (!is_power_of_2(new_size)){ - rnew_size = roundup_pow_of_two(new_size); - } - if (rnew_size != ifp->if_real_bytes) { - ifp->if_u1.if_extents = - kmem_realloc(ifp->if_u1.if_extents, - rnew_size, KM_NOFS); - } - if (rnew_size > ifp->if_real_bytes) { - memset(&ifp->if_u1.if_extents[ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t)], 0, - rnew_size - ifp->if_real_bytes); - } - } - /* Switch from the inline extent buffer to a direct extent list */ - else { - if (!is_power_of_2(new_size)) { - rnew_size = roundup_pow_of_two(new_size); - } - xfs_iext_inline_to_direct(ifp, rnew_size); - } - ifp->if_real_bytes = rnew_size; - ifp->if_bytes = new_size; -} - -/* - * Switch from linear (direct) extent records to inline buffer. - */ -void -xfs_iext_direct_to_inline( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t nextents) /* number of extents in file */ -{ - ASSERT(ifp->if_flags & XFS_IFEXTENTS); - ASSERT(nextents <= XFS_INLINE_EXTS); - /* - * The inline buffer was zeroed when we switched - * from inline to direct extent allocation mode, - * so we don't need to clear it here. - */ - memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, - nextents * sizeof(xfs_bmbt_rec_t)); - kmem_free(ifp->if_u1.if_extents); - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - ifp->if_real_bytes = 0; -} - -/* - * Switch from inline buffer to linear (direct) extent records. - * new_size should already be rounded up to the next power of 2 - * by the caller (when appropriate), so use new_size as it is. - * However, since new_size may be rounded up, we can't update - * if_bytes here. It is the caller's responsibility to update - * if_bytes upon return. - */ -void -xfs_iext_inline_to_direct( - xfs_ifork_t *ifp, /* inode fork pointer */ - int new_size) /* number of extents in file */ -{ - ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); - memset(ifp->if_u1.if_extents, 0, new_size); - if (ifp->if_bytes) { - memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, - ifp->if_bytes); - memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * - sizeof(xfs_bmbt_rec_t)); - } - ifp->if_real_bytes = new_size; -} - -/* - * Resize an extent indirection array to new_size bytes. - */ -STATIC void -xfs_iext_realloc_indirect( - xfs_ifork_t *ifp, /* inode fork pointer */ - int new_size) /* new indirection array size */ -{ - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - ASSERT(ifp->if_real_bytes); - ASSERT((new_size >= 0) && - (new_size != ((ifp->if_real_bytes / XFS_IEXT_BUFSZ) * - sizeof(xfs_ext_irec_t)))); - if (new_size == 0) { - xfs_iext_destroy(ifp); - } else { - ifp->if_u1.if_ext_irec = - kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS); - } -} - -/* - * Switch from indirection array to linear (direct) extent allocations. - */ -STATIC void -xfs_iext_indirect_to_direct( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - xfs_extnum_t nextents; /* number of extents in file */ - int size; /* size of file extents */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nextents = xfs_iext_count(ifp); - ASSERT(nextents <= XFS_LINEAR_EXTS); - size = nextents * sizeof(xfs_bmbt_rec_t); - - xfs_iext_irec_compact_pages(ifp); - ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); - - ep = ifp->if_u1.if_ext_irec->er_extbuf; - kmem_free(ifp->if_u1.if_ext_irec); - ifp->if_flags &= ~XFS_IFEXTIREC; - ifp->if_u1.if_extents = ep; - ifp->if_bytes = size; - if (nextents < XFS_LINEAR_EXTS) { - xfs_iext_realloc_direct(ifp, size); - } -} - -/* - * Remove all records from the indirection array. - */ -STATIC void -xfs_iext_irec_remove_all( - struct xfs_ifork *ifp) -{ - int nlists; - int i; - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - for (i = 0; i < nlists; i++) - kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf); - kmem_free(ifp->if_u1.if_ext_irec); - ifp->if_flags &= ~XFS_IFEXTIREC; -} - -/* - * Free incore file extents. - */ -void -xfs_iext_destroy( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - if (ifp->if_flags & XFS_IFEXTIREC) { - xfs_iext_irec_remove_all(ifp); - } else if (ifp->if_real_bytes) { - kmem_free(ifp->if_u1.if_extents); - } else if (ifp->if_bytes) { - memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * - sizeof(xfs_bmbt_rec_t)); - } - ifp->if_u1.if_extents = NULL; - ifp->if_real_bytes = 0; - ifp->if_bytes = 0; -} - -/* - * Return a pointer to the extent record for file system block bno. - */ -xfs_bmbt_rec_host_t * /* pointer to found extent record */ -xfs_iext_bno_to_ext( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fileoff_t bno, /* block number to search for */ - xfs_extnum_t *idxp) /* index of target extent */ -{ - xfs_bmbt_rec_host_t *base; /* pointer to first extent */ - xfs_filblks_t blockcount = 0; /* number of blocks in extent */ - xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ - xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ - int high; /* upper boundary in search */ - xfs_extnum_t idx = 0; /* index of target extent */ - int low; /* lower boundary in search */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_fileoff_t startoff = 0; /* start offset of extent */ - - nextents = xfs_iext_count(ifp); - if (nextents == 0) { - *idxp = 0; - return NULL; - } - low = 0; - if (ifp->if_flags & XFS_IFEXTIREC) { - /* Find target extent list */ - int erp_idx = 0; - erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); - base = erp->er_extbuf; - high = erp->er_extcount - 1; - } else { - base = ifp->if_u1.if_extents; - high = nextents - 1; - } - /* Binary search extent records */ - while (low <= high) { - idx = (low + high) >> 1; - ep = base + idx; - startoff = xfs_bmbt_get_startoff(ep); - blockcount = xfs_bmbt_get_blockcount(ep); - if (bno < startoff) { - high = idx - 1; - } else if (bno >= startoff + blockcount) { - low = idx + 1; - } else { - /* Convert back to file-based extent index */ - if (ifp->if_flags & XFS_IFEXTIREC) { - idx += erp->er_extoff; - } - *idxp = idx; - return ep; - } - } - /* Convert back to file-based extent index */ - if (ifp->if_flags & XFS_IFEXTIREC) { - idx += erp->er_extoff; - } - if (bno >= startoff + blockcount) { - if (++idx == nextents) { - ep = NULL; - } else { - ep = xfs_iext_get_ext(ifp, idx); - } - } - *idxp = idx; - return ep; -} - -/* - * Return a pointer to the indirection array entry containing the - * extent record for filesystem block bno. Store the index of the - * target irec in *erp_idxp. - */ -xfs_ext_irec_t * /* pointer to found extent record */ -xfs_iext_bno_to_irec( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fileoff_t bno, /* block number to search for */ - int *erp_idxp) /* irec index of target ext list */ -{ - xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ - xfs_ext_irec_t *erp_next; /* next indirection array entry */ - int erp_idx; /* indirection array index */ - int nlists; /* number of extent irec's (lists) */ - int high; /* binary search upper limit */ - int low; /* binary search lower limit */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - erp_idx = 0; - low = 0; - high = nlists - 1; - while (low <= high) { - erp_idx = (low + high) >> 1; - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; - if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { - high = erp_idx - 1; - } else if (erp_next && bno >= - xfs_bmbt_get_startoff(erp_next->er_extbuf)) { - low = erp_idx + 1; - } else { - break; - } - } - *erp_idxp = erp_idx; - return erp; -} - -/* - * Return a pointer to the indirection array entry containing the - * extent record at file extent index *idxp. Store the index of the - * target irec in *erp_idxp and store the page index of the target - * extent record in *idxp. - */ -xfs_ext_irec_t * -xfs_iext_idx_to_irec( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_extnum_t *idxp, /* extent index (file -> page) */ - int *erp_idxp, /* pointer to target irec */ - int realloc) /* new bytes were just added */ -{ - xfs_ext_irec_t *prev; /* pointer to previous irec */ - xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ - int erp_idx; /* indirection array index */ - int nlists; /* number of irec's (ex lists) */ - int high; /* binary search upper limit */ - int low; /* binary search lower limit */ - xfs_extnum_t page_idx = *idxp; /* extent index in target list */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - ASSERT(page_idx >= 0); - ASSERT(page_idx <= xfs_iext_count(ifp)); - ASSERT(page_idx < xfs_iext_count(ifp) || realloc); - - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - erp_idx = 0; - low = 0; - high = nlists - 1; - - /* Binary search extent irec's */ - while (low <= high) { - erp_idx = (low + high) >> 1; - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - prev = erp_idx > 0 ? erp - 1 : NULL; - if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && - realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { - high = erp_idx - 1; - } else if (page_idx > erp->er_extoff + erp->er_extcount || - (page_idx == erp->er_extoff + erp->er_extcount && - !realloc)) { - low = erp_idx + 1; - } else if (page_idx == erp->er_extoff + erp->er_extcount && - erp->er_extcount == XFS_LINEAR_EXTS) { - ASSERT(realloc); - page_idx = 0; - erp_idx++; - erp = erp_idx < nlists ? erp + 1 : NULL; - break; - } else { - page_idx -= erp->er_extoff; - break; - } - } - *idxp = page_idx; - *erp_idxp = erp_idx; - return erp; -} - -/* - * Allocate and initialize an indirection array once the space needed - * for incore extents increases above XFS_IEXT_BUFSZ. - */ -void -xfs_iext_irec_init( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - xfs_ext_irec_t *erp; /* indirection array pointer */ - xfs_extnum_t nextents; /* number of extents in file */ - - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - nextents = xfs_iext_count(ifp); - ASSERT(nextents <= XFS_LINEAR_EXTS); - - erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); - - if (nextents == 0) { - ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); - } else if (!ifp->if_real_bytes) { - xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); - } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { - xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); - } - erp->er_extbuf = ifp->if_u1.if_extents; - erp->er_extcount = nextents; - erp->er_extoff = 0; - - ifp->if_flags |= XFS_IFEXTIREC; - ifp->if_real_bytes = XFS_IEXT_BUFSZ; - ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); - ifp->if_u1.if_ext_irec = erp; - - return; -} - -/* - * Allocate and initialize a new entry in the indirection array. - */ -xfs_ext_irec_t * -xfs_iext_irec_new( - xfs_ifork_t *ifp, /* inode fork pointer */ - int erp_idx) /* index for new irec */ -{ - xfs_ext_irec_t *erp; /* indirection array pointer */ - int i; /* loop counter */ - int nlists; /* number of irec's (ex lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - - /* Resize indirection array */ - xfs_iext_realloc_indirect(ifp, ++nlists * - sizeof(xfs_ext_irec_t)); - /* - * Move records down in the array so the - * new page can use erp_idx. - */ - erp = ifp->if_u1.if_ext_irec; - for (i = nlists - 1; i > erp_idx; i--) { - memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); - } - ASSERT(i == erp_idx); - - /* Initialize new extent record */ - erp = ifp->if_u1.if_ext_irec; - erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); - ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; - memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); - erp[erp_idx].er_extcount = 0; - erp[erp_idx].er_extoff = erp_idx > 0 ? - erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; - return (&erp[erp_idx]); -} - -/* - * Remove a record from the indirection array. - */ -void -xfs_iext_irec_remove( - xfs_ifork_t *ifp, /* inode fork pointer */ - int erp_idx) /* irec index to remove */ -{ - xfs_ext_irec_t *erp; /* indirection array pointer */ - int i; /* loop counter */ - int nlists; /* number of irec's (ex lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - if (erp->er_extbuf) { - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, - -erp->er_extcount); - kmem_free(erp->er_extbuf); - } - /* Compact extent records */ - erp = ifp->if_u1.if_ext_irec; - for (i = erp_idx; i < nlists - 1; i++) { - memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); - } - /* - * Manually free the last extent record from the indirection - * array. A call to xfs_iext_realloc_indirect() with a size - * of zero would result in a call to xfs_iext_destroy() which - * would in turn call this function again, creating a nasty - * infinite loop. - */ - if (--nlists) { - xfs_iext_realloc_indirect(ifp, - nlists * sizeof(xfs_ext_irec_t)); - } else { - kmem_free(ifp->if_u1.if_ext_irec); - } - ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; -} - -/* - * This is called to clean up large amounts of unused memory allocated - * by the indirection array. Before compacting anything though, verify - * that the indirection array is still needed and switch back to the - * linear extent list (or even the inline buffer) if possible. The - * compaction policy is as follows: - * - * Full Compaction: Extents fit into a single page (or inline buffer) - * Partial Compaction: Extents occupy less than 50% of allocated space - * No Compaction: Extents occupy at least 50% of allocated space - */ -void -xfs_iext_irec_compact( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - xfs_extnum_t nextents; /* number of extents in file */ - int nlists; /* number of irec's (ex lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - nextents = xfs_iext_count(ifp); - - if (nextents == 0) { - xfs_iext_destroy(ifp); - } else if (nextents <= XFS_INLINE_EXTS) { - xfs_iext_indirect_to_direct(ifp); - xfs_iext_direct_to_inline(ifp, nextents); - } else if (nextents <= XFS_LINEAR_EXTS) { - xfs_iext_indirect_to_direct(ifp); - } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { - xfs_iext_irec_compact_pages(ifp); - } -} - -/* - * Combine extents from neighboring extent pages. - */ -void -xfs_iext_irec_compact_pages( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ - int erp_idx = 0; /* indirection array index */ - int nlists; /* number of irec's (ex lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - while (erp_idx < nlists - 1) { - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - erp_next = erp + 1; - if (erp_next->er_extcount <= - (XFS_LINEAR_EXTS - erp->er_extcount)) { - memcpy(&erp->er_extbuf[erp->er_extcount], - erp_next->er_extbuf, erp_next->er_extcount * - sizeof(xfs_bmbt_rec_t)); - erp->er_extcount += erp_next->er_extcount; - /* - * Free page before removing extent record - * so er_extoffs don't get modified in - * xfs_iext_irec_remove. - */ - kmem_free(erp_next->er_extbuf); - erp_next->er_extbuf = NULL; - xfs_iext_irec_remove(ifp, erp_idx + 1); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - } else { - erp_idx++; - } - } -} - -/* - * This is called to update the er_extoff field in the indirection - * array when extents have been added or removed from one of the - * extent lists. erp_idx contains the irec index to begin updating - * at and ext_diff contains the number of extents that were added - * or removed. - */ -void -xfs_iext_irec_update_extoffs( - xfs_ifork_t *ifp, /* inode fork pointer */ - int erp_idx, /* irec index to update */ - int ext_diff) /* number of new extents */ -{ - int i; /* loop counter */ - int nlists; /* number of irec's (ex lists */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - for (i = erp_idx; i < nlists; i++) { - ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; - } -} - -/* * Initialize an inode's copy-on-write fork. */ void @@ -1975,60 +731,44 @@ xfs_ifork_init_cow( ip->i_cnextents = 0; } -/* - * Lookup the extent covering bno. - * - * If there is an extent covering bno return the extent index, and store the - * expanded extent structure in *gotp, and the extent index in *idx. - * If there is no extent covering bno, but there is an extent after it (e.g. - * it lies in a hole) return that extent in *gotp and its index in *idx - * instead. - * If bno is beyond the last extent return false, and return the index after - * the last valid index in *idxp. - */ -bool -xfs_iext_lookup_extent( +/* Default fork content verifiers. */ +struct xfs_ifork_ops xfs_default_ifork_ops = { + .verify_attr = xfs_attr_shortform_verify, + .verify_dir = xfs_dir2_sf_verify, + .verify_symlink = xfs_symlink_shortform_verify, +}; + +/* Verify the inline contents of the data fork of an inode. */ +xfs_failaddr_t +xfs_ifork_verify_data( struct xfs_inode *ip, - struct xfs_ifork *ifp, - xfs_fileoff_t bno, - xfs_extnum_t *idxp, - struct xfs_bmbt_irec *gotp) + struct xfs_ifork_ops *ops) { - struct xfs_bmbt_rec_host *ep; - - XFS_STATS_INC(ip->i_mount, xs_look_exlist); - - ep = xfs_iext_bno_to_ext(ifp, bno, idxp); - if (!ep) - return false; - xfs_bmbt_get_all(ep, gotp); - return true; -} + /* Non-local data fork, we're done. */ + if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + return NULL; -/* - * Return true if there is an extent at index idx, and return the expanded - * extent structure at idx in that case. Else return false. - */ -bool -xfs_iext_get_extent( - struct xfs_ifork *ifp, - xfs_extnum_t idx, - struct xfs_bmbt_irec *gotp) -{ - if (idx < 0 || idx >= xfs_iext_count(ifp)) - return false; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp); - return true; + /* Check the inline data fork if there is one. */ + switch (VFS_I(ip)->i_mode & S_IFMT) { + case S_IFDIR: + return ops->verify_dir(ip); + case S_IFLNK: + return ops->verify_symlink(ip); + default: + return NULL; + } } -void -xfs_iext_update_extent( - struct xfs_ifork *ifp, - xfs_extnum_t idx, - struct xfs_bmbt_irec *gotp) +/* Verify the inline contents of the attr fork of an inode. */ +xfs_failaddr_t +xfs_ifork_verify_attr( + struct xfs_inode *ip, + struct xfs_ifork_ops *ops) { - ASSERT(idx >= 0); - ASSERT(idx < xfs_iext_count(ifp)); - - xfs_bmbt_set_all(xfs_iext_get_ext(ifp, idx), gotp); + /* There has to be an attr fork allocated if aformat is local. */ + if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) + return NULL; + if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK)) + return __this_address; + return ops->verify_attr(ip); } diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 11af705219f6..dd8aba0dd119 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -22,56 +22,19 @@ struct xfs_inode_log_item; struct xfs_dinode; /* - * The following xfs_ext_irec_t struct introduces a second (top) level - * to the in-core extent allocation scheme. These structs are allocated - * in a contiguous block, creating an indirection array where each entry - * (irec) contains a pointer to a buffer of in-core extent records which - * it manages. Each extent buffer is 4k in size, since 4k is the system - * page size on Linux i386 and systems with larger page sizes don't seem - * to gain much, if anything, by using their native page size as the - * extent buffer size. Also, using 4k extent buffers everywhere provides - * a consistent interface for CXFS across different platforms. - * - * There is currently no limit on the number of irec's (extent lists) - * allowed, so heavily fragmented files may require an indirection array - * which spans multiple system pages of memory. The number of extents - * which would require this amount of contiguous memory is very large - * and should not cause problems in the foreseeable future. However, - * if the memory needed for the contiguous array ever becomes a problem, - * it is possible that a third level of indirection may be required. - */ -typedef struct xfs_ext_irec { - xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */ - xfs_extnum_t er_extoff; /* extent offset in file */ - xfs_extnum_t er_extcount; /* number of extents in page/block */ -} xfs_ext_irec_t; - -/* * File incore extent information, present for each of data & attr forks. */ -#define XFS_IEXT_BUFSZ 4096 -#define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t)) -#define XFS_INLINE_EXTS 2 -#define XFS_INLINE_DATA 32 typedef struct xfs_ifork { int if_bytes; /* bytes in if_u1 */ int if_real_bytes; /* bytes allocated in if_u1 */ struct xfs_btree_block *if_broot; /* file's incore btree root */ short if_broot_bytes; /* bytes allocated for root */ unsigned char if_flags; /* per-fork flags */ + int if_height; /* height of the extent tree */ union { - xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ - xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ + void *if_root; /* extent tree root */ char *if_data; /* inline file data */ } if_u1; - union { - xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS]; - /* very small file extents */ - char if_inline_data[XFS_INLINE_DATA]; - /* very small file data */ - xfs_dev_t if_rdev; /* dev number if special */ - uuid_t if_uuid; /* mount point value */ - } if_u2; } xfs_ifork_t; /* @@ -80,7 +43,6 @@ typedef struct xfs_ifork { #define XFS_IFINLINE 0x01 /* Inline data is read in */ #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ -#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ /* * Fork handling. @@ -150,48 +112,92 @@ int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *, int); void xfs_init_local_fork(struct xfs_inode *, int, const void *, int); -struct xfs_bmbt_rec_host * - xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); -xfs_extnum_t xfs_iext_count(struct xfs_ifork *); -void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t, - struct xfs_bmbt_irec *, int); -void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int); -void xfs_iext_add_indirect_multi(struct xfs_ifork *, int, - xfs_extnum_t, int); -void xfs_iext_remove(struct xfs_inode *, xfs_extnum_t, int, int); -void xfs_iext_remove_inline(struct xfs_ifork *, xfs_extnum_t, int); -void xfs_iext_remove_direct(struct xfs_ifork *, xfs_extnum_t, int); -void xfs_iext_remove_indirect(struct xfs_ifork *, xfs_extnum_t, int); -void xfs_iext_realloc_direct(struct xfs_ifork *, int); -void xfs_iext_direct_to_inline(struct xfs_ifork *, xfs_extnum_t); -void xfs_iext_inline_to_direct(struct xfs_ifork *, int); +xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp); +void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *, int); +void xfs_iext_remove(struct xfs_inode *, struct xfs_iext_cursor *, + int); void xfs_iext_destroy(struct xfs_ifork *); -struct xfs_bmbt_rec_host * - xfs_iext_bno_to_ext(struct xfs_ifork *, xfs_fileoff_t, int *); -struct xfs_ext_irec * - xfs_iext_bno_to_irec(struct xfs_ifork *, xfs_fileoff_t, int *); -struct xfs_ext_irec * - xfs_iext_idx_to_irec(struct xfs_ifork *, xfs_extnum_t *, int *, - int); -void xfs_iext_irec_init(struct xfs_ifork *); -struct xfs_ext_irec * - xfs_iext_irec_new(struct xfs_ifork *, int); -void xfs_iext_irec_remove(struct xfs_ifork *, int); -void xfs_iext_irec_compact(struct xfs_ifork *); -void xfs_iext_irec_compact_pages(struct xfs_ifork *); -void xfs_iext_irec_compact_full(struct xfs_ifork *); -void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); bool xfs_iext_lookup_extent(struct xfs_inode *ip, struct xfs_ifork *ifp, xfs_fileoff_t bno, - xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp); -bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *gotp); +bool xfs_iext_lookup_extent_before(struct xfs_inode *ip, + struct xfs_ifork *ifp, xfs_fileoff_t *end, + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp); -void xfs_iext_update_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, +bool xfs_iext_get_extent(struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp); +void xfs_iext_update_extent(struct xfs_inode *ip, int state, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *gotp); + +void xfs_iext_first(struct xfs_ifork *, struct xfs_iext_cursor *); +void xfs_iext_last(struct xfs_ifork *, struct xfs_iext_cursor *); +void xfs_iext_next(struct xfs_ifork *, struct xfs_iext_cursor *); +void xfs_iext_prev(struct xfs_ifork *, struct xfs_iext_cursor *); + +static inline bool xfs_iext_next_extent(struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp) +{ + xfs_iext_next(ifp, cur); + return xfs_iext_get_extent(ifp, cur, gotp); +} + +static inline bool xfs_iext_prev_extent(struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp) +{ + xfs_iext_prev(ifp, cur); + return xfs_iext_get_extent(ifp, cur, gotp); +} + +/* + * Return the extent after cur in gotp without updating the cursor. + */ +static inline bool xfs_iext_peek_next_extent(struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp) +{ + struct xfs_iext_cursor ncur = *cur; + + xfs_iext_next(ifp, &ncur); + return xfs_iext_get_extent(ifp, &ncur, gotp); +} + +/* + * Return the extent before cur in gotp without updating the cursor. + */ +static inline bool xfs_iext_peek_prev_extent(struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp) +{ + struct xfs_iext_cursor ncur = *cur; + + xfs_iext_prev(ifp, &ncur); + return xfs_iext_get_extent(ifp, &ncur, gotp); +} + +#define for_each_xfs_iext(ifp, ext, got) \ + for (xfs_iext_first((ifp), (ext)); \ + xfs_iext_get_extent((ifp), (ext), (got)); \ + xfs_iext_next((ifp), (ext))) extern struct kmem_zone *xfs_ifork_zone; extern void xfs_ifork_init_cow(struct xfs_inode *ip); +typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *); + +struct xfs_ifork_ops { + xfs_ifork_verifier_t verify_symlink; + xfs_ifork_verifier_t verify_dir; + xfs_ifork_verifier_t verify_attr; +}; +extern struct xfs_ifork_ops xfs_default_ifork_ops; + +xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip, + struct xfs_ifork_ops *ops); +xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip, + struct xfs_ifork_ops *ops); + #endif /* __XFS_INODE_FORK_H__ */ diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 8372e9bcd7b6..349d9f8edb89 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -264,54 +264,43 @@ typedef struct xfs_trans_header { * (if any) is indicated in the ilf_dsize field. Changes to this structure * must be added on to the end. */ -typedef struct xfs_inode_log_format { - uint16_t ilf_type; /* inode log item type */ - uint16_t ilf_size; /* size of this item */ - uint32_t ilf_fields; /* flags for fields logged */ - uint16_t ilf_asize; /* size of attr d/ext/root */ - uint16_t ilf_dsize; /* size of data/ext/root */ - uint64_t ilf_ino; /* inode number */ - union { - uint32_t ilfu_rdev; /* rdev value for dev inode*/ - uuid_t ilfu_uuid; /* mount point value */ - } ilf_u; - int64_t ilf_blkno; /* blkno of inode buffer */ - int32_t ilf_len; /* len of inode buffer */ - int32_t ilf_boffset; /* off of inode in buffer */ -} xfs_inode_log_format_t; - -typedef struct xfs_inode_log_format_32 { +struct xfs_inode_log_format { uint16_t ilf_type; /* inode log item type */ uint16_t ilf_size; /* size of this item */ uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ + uint32_t ilf_pad; /* pad for 64 bit boundary */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ - uuid_t ilfu_uuid; /* mount point value */ + uint8_t __pad[16]; /* unused */ } ilf_u; int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ int32_t ilf_boffset; /* off of inode in buffer */ -} __attribute__((packed)) xfs_inode_log_format_32_t; +}; -typedef struct xfs_inode_log_format_64 { +/* + * Old 32 bit systems will log in this format without the 64 bit + * alignment padding. Recovery will detect this and convert it to the + * correct format. + */ +struct xfs_inode_log_format_32 { uint16_t ilf_type; /* inode log item type */ uint16_t ilf_size; /* size of this item */ uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ - uint32_t ilf_pad; /* pad for 64 bit boundary */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ - uuid_t ilfu_uuid; /* mount point value */ + uint8_t __pad[16]; /* unused */ } ilf_u; int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ int32_t ilf_boffset; /* off of inode in buffer */ -} xfs_inode_log_format_64_t; +} __attribute__((packed)); /* @@ -322,7 +311,7 @@ typedef struct xfs_inode_log_format_64 { #define XFS_ILOG_DEXT 0x004 /* log i_df.if_extents */ #define XFS_ILOG_DBROOT 0x008 /* log i_df.i_broot */ #define XFS_ILOG_DEV 0x010 /* log the dev field */ -#define XFS_ILOG_UUID 0x020 /* log the uuid field */ +#define XFS_ILOG_UUID 0x020 /* added long ago, but never used */ #define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */ #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ @@ -340,9 +329,9 @@ typedef struct xfs_inode_log_format_64 { #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ - XFS_ILOG_UUID | XFS_ILOG_ADATA | \ - XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \ - XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) + XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ + XFS_ILOG_ABROOT | XFS_ILOG_DOWNER | \ + XFS_ILOG_AOWNER) #define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ XFS_ILOG_DBROOT) @@ -352,10 +341,10 @@ typedef struct xfs_inode_log_format_64 { #define XFS_ILOG_ALL (XFS_ILOG_CORE | XFS_ILOG_DDATA | \ XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ - XFS_ILOG_DEV | XFS_ILOG_UUID | \ - XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ - XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \ - XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) + XFS_ILOG_DEV | XFS_ILOG_ADATA | \ + XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \ + XFS_ILOG_TIMESTAMP | XFS_ILOG_DOWNER | \ + XFS_ILOG_AOWNER) static inline int xfs_ilog_fbroot(int w) { diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index c10597973333..cc4cbe290939 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -55,7 +55,7 @@ xfs_log_calc_max_attrsetm_res( * the maximum one in terms of the pre-calculated values which were done * at mount time. */ -STATIC void +void xfs_log_get_max_trans_res( struct xfs_mount *mp, struct xfs_trans_res *max_resp) diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index d69c772271cb..bb1b13a9b5f4 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -112,8 +112,6 @@ typedef uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ -#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ -#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ #define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ @@ -153,8 +151,11 @@ typedef uint16_t xfs_qwarncnt_t; (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) -extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, - xfs_dqid_t id, uint type, uint flags, const char *str); +extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp, + struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type, + uint flags); extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); +extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq, + xfs_dqid_t id, uint type); #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 9d5406b4f663..bee68c23d612 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -30,6 +30,7 @@ #include "xfs_bmap.h" #include "xfs_refcount_btree.h" #include "xfs_alloc.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" @@ -1487,27 +1488,12 @@ __xfs_refcount_cow_alloc( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Add refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); - if (error) - return error; - - /* Add rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* @@ -1520,27 +1506,12 @@ __xfs_refcount_cow_free( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Remove refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_FREE, dfops); - if (error) - return error; - - /* Remove rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_free_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* Record a CoW staging extent in the refcount btree. */ @@ -1551,11 +1522,19 @@ xfs_refcount_alloc_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, + error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, fsb, len); + if (error) + return error; + + /* Add rmap entry */ + return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); } /* Forget a CoW staging event in the refcount btree. */ @@ -1566,9 +1545,17 @@ xfs_refcount_free_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; + /* Remove rmap entry */ + error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); + if (error) + return error; + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, fsb, len); } @@ -1709,3 +1696,22 @@ out_cursor: xfs_trans_brelse(tp, agbp); goto out_trans; } + +/* Is there a record covering a given extent? */ +int +xfs_refcount_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.rc.rc_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.rc.rc_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index eafb9d1f3b37..2a731ac68fe4 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -83,4 +83,7 @@ static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res) return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; } +extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 3c59dd3d58d7..265fdcefcbae 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -79,8 +79,6 @@ xfs_refcountbt_alloc_block( struct xfs_alloc_arg args; /* block allocation args */ int error; /* error return value */ - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; @@ -98,7 +96,6 @@ xfs_refcountbt_alloc_block( trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, args.agbno, 1); if (args.fsbno == NULLFSBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -109,12 +106,10 @@ xfs_refcountbt_alloc_block( be32_add_cpu(&agf->agf_refcount_blocks, 1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; return 0; out_error: - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } @@ -223,29 +218,31 @@ xfs_refcountbt_diff_two_keys( be32_to_cpu(k2->refc.rc_startblock); } -STATIC bool +STATIC xfs_failaddr_t xfs_refcountbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC)) - return false; + return __this_address; if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return false; - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + return __this_address; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; level = be16_to_cpu(block->bb_level); if (pag && pag->pagf_init) { if (level >= pag->pagf_refcount_level) - return false; + return __this_address; } else if (level >= mp->m_refc_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); } @@ -254,25 +251,30 @@ STATIC void xfs_refcountbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_refcountbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_refcountbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } STATIC void xfs_refcountbt_write_verify( struct xfs_buf *bp) { - if (!xfs_refcountbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_refcountbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -283,6 +285,7 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = { .name = "xfs_refcountbt", .verify_read = xfs_refcountbt_read_verify, .verify_write = xfs_refcountbt_write_verify, + .verify_struct = xfs_refcountbt_verify, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 55c88a732690..79822cf6ebe3 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -34,6 +34,7 @@ #include "xfs_rmap_btree.h" #include "xfs_trans_space.h" #include "xfs_trace.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_extent_busy.h" #include "xfs_bmap.h" @@ -367,6 +368,51 @@ xfs_rmap_lookup_le_range( } /* + * Perform all the relevant owner checks for a removal op. If we're doing an + * unknown-owner removal then we have no owner information to check. + */ +static int +xfs_rmap_free_check_owner( + struct xfs_mount *mp, + uint64_t ltoff, + struct xfs_rmap_irec *rec, + xfs_fsblock_t bno, + xfs_filblks_t len, + uint64_t owner, + uint64_t offset, + unsigned int flags) +{ + int error = 0; + + if (owner == XFS_RMAP_OWN_UNKNOWN) + return 0; + + /* Make sure the unwritten flag matches. */ + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == + (rec->rm_flags & XFS_RMAP_UNWRITTEN), out); + + /* Make sure the owner matches what we expect to find in the tree. */ + XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); + + /* Check the offset, if necessary. */ + if (XFS_RMAP_NON_INODE_OWNER(owner)) + goto out; + + if (flags & XFS_RMAP_BMBT_BLOCK) { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, + out); + } else { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); + XFS_WANT_CORRUPTED_GOTO(mp, + ltoff + rec->rm_blockcount >= offset + len, + out); + } + +out: + return error; +} + +/* * Find the extent in the rmap btree and remove it. * * The record we find should always be an exact match for the extent that we're @@ -443,33 +489,40 @@ xfs_rmap_unmap( goto out_done; } - /* Make sure the unwritten flag matches. */ - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == - (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); + /* + * If we're doing an unknown-owner removal for EFI recovery, we expect + * to find the full range in the rmapbt or nothing at all. If we + * don't find any rmaps overlapping either end of the range, we're + * done. Hopefully this means that the EFI creator already queued + * (and finished) a RUI to remove the rmap. + */ + if (owner == XFS_RMAP_OWN_UNKNOWN && + ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { + struct xfs_rmap_irec rtrec; + + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto out_error; + if (i == 0) + goto out_done; + error = xfs_rmap_get_rec(cur, &rtrec, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (rtrec.rm_startblock >= bno + len) + goto out_done; + } /* Make sure the extent we found covers the entire freeing range. */ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && - ltrec.rm_startblock + ltrec.rm_blockcount >= - bno + len, out_error); - - /* Make sure the owner matches what we expect to find in the tree. */ - XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || - XFS_RMAP_NON_INODE_OWNER(owner), out_error); + ltrec.rm_startblock + ltrec.rm_blockcount >= + bno + len, out_error); - /* Check the offset, if necessary. */ - if (!XFS_RMAP_NON_INODE_OWNER(owner)) { - if (flags & XFS_RMAP_BMBT_BLOCK) { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, - out_error); - } else { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_offset <= offset, out_error); - XFS_WANT_CORRUPTED_GOTO(mp, - ltoff + ltrec.rm_blockcount >= offset + len, - out_error); - } - } + /* Check owner information. */ + error = xfs_rmap_free_check_owner(mp, ltoff, <rec, bno, len, owner, + offset, flags); + if (error) + goto out_error; if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ @@ -663,6 +716,7 @@ xfs_rmap_map( flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, unwritten, oinfo); + ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); /* * For the initial lookup, look for an exact match or the left-adjacent @@ -2333,3 +2387,70 @@ xfs_rmap_compare( else return 0; } + +/* Is there a record covering a given extent? */ +int +xfs_rmap_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.r.rm_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.r.rm_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} + +/* + * Is there a record for this owner completely covering a given physical + * extent? If so, *has_rmap will be set to true. If there is no record + * or the record only covers part of the range, we set *has_rmap to false. + * This function doesn't perform range lookups or offset checks, so it is + * not suitable for checking data fork blocks. + */ +int +xfs_rmap_record_exists( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo, + bool *has_rmap) +{ + uint64_t owner; + uint64_t offset; + unsigned int flags; + int has_record; + struct xfs_rmap_irec irec; + int error; + + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); + ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) || + (flags & XFS_RMAP_BMBT_BLOCK)); + + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, + &has_record); + if (error) + return error; + if (!has_record) { + *has_rmap = false; + return 0; + } + + error = xfs_rmap_get_rec(cur, &irec, &has_record); + if (error) + return error; + if (!has_record) { + *has_rmap = false; + return 0; + } + + *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno && + irec.rm_startblock + irec.rm_blockcount >= bno + len); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 466ede637080..380e53be98d5 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -61,7 +61,21 @@ static inline void xfs_rmap_skip_owner_update( struct xfs_owner_info *oi) { - oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL); +} + +static inline bool +xfs_rmap_should_skip_owner_update( + struct xfs_owner_info *oi) +{ + return oi->oi_owner == XFS_RMAP_OWN_NULL; +} + +static inline void +xfs_rmap_any_owner_update( + struct xfs_owner_info *oi) +{ + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN); } /* Reverse mapping functions. */ @@ -219,5 +233,10 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a, union xfs_btree_rec; int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec, struct xfs_rmap_irec *irec); +int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, bool *exists); +int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, struct xfs_owner_info *oinfo, + bool *has_rmap); #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 9d9c9192584c..8b0d0de1cd11 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -104,20 +104,15 @@ xfs_rmapbt_alloc_block( int error; xfs_agblock_t bno; - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - /* Allocate the new block from the freelist. If we can't, give up. */ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, &bno, 1); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (error) return error; - } trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, bno, 1); if (bno == NULLAGBLOCK) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; } @@ -130,7 +125,8 @@ xfs_rmapbt_alloc_block( be32_add_cpu(&agf->agf_rmap_blocks, 1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_private.a.agno); + *stat = 1; return 0; } @@ -158,6 +154,8 @@ xfs_rmapbt_free_block( XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); + xfs_ag_resv_rmapbt_free(cur->bc_mp, cur->bc_private.a.agno); + return 0; } @@ -303,13 +301,14 @@ xfs_rmapbt_diff_two_keys( return 0; } -static bool +static xfs_failaddr_t xfs_rmapbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; /* @@ -325,19 +324,20 @@ xfs_rmapbt_verify( * in this case. */ if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC)) - return false; + return __this_address; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) - return false; - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + return __this_address; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; level = be16_to_cpu(block->bb_level); if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) - return false; + return __this_address; } else if (level >= mp->m_rmap_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); } @@ -346,25 +346,30 @@ static void xfs_rmapbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_rmapbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_rmapbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_rmapbt_write_verify( struct xfs_buf *bp) { - if (!xfs_rmapbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_rmapbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -375,6 +380,7 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = { .name = "xfs_rmapbt", .verify_read = xfs_rmapbt_read_verify, .verify_write = xfs_rmapbt_write_verify, + .verify_struct = xfs_rmapbt_verify, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 5d4e43ef4eea..106be2d0bb88 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -672,7 +672,6 @@ xfs_rtmodify_range( /* * Compute a mask of relevant bits. */ - bit = 0; mask = ((xfs_rtword_t)1 << lastbit) - 1; /* * Set/clear the active bits. @@ -1086,3 +1085,36 @@ xfs_rtalloc_query_all( return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv); } + +/* + * Verify that an realtime block number pointer doesn't point off the + * end of the realtime device. + */ +bool +xfs_verify_rtbno( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + return rtbno < mp->m_sb.sb_rblocks; +} + +/* Is the given extent all free? */ +int +xfs_rtalloc_extent_is_free( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_rtblock_t start, + xfs_extlen_t len, + bool *is_free) +{ + xfs_rtblock_t end; + int matches; + int error; + + error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches); + if (error) + return error; + + *is_free = matches; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 9b5aae2bcc0b..53433cc024fd 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -40,6 +40,8 @@ #include "xfs_rmap_btree.h" #include "xfs_bmap.h" #include "xfs_refcount_btree.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -116,6 +118,9 @@ xfs_mount_validate_sb( bool check_inprogress, bool check_version) { + uint32_t agcount = 0; + uint32_t rem; + if (sbp->sb_magicnum != XFS_SB_MAGIC) { xfs_warn(mp, "bad magic number"); return -EWRONGFS; @@ -226,6 +231,13 @@ xfs_mount_validate_sb( return -EINVAL; } + /* Compute agcount for this number of dblocks and agblocks */ + if (sbp->sb_agblocks) { + agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem); + if (rem) + agcount++; + } + /* * More sanity checking. Most of these were stolen directly from * xfs_repair. @@ -250,6 +262,10 @@ xfs_mount_validate_sb( sbp->sb_inodesize != (1 << sbp->sb_inodelog) || sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || + XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || + XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || + sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 || + agcount == 0 || agcount != sbp->sb_agcount || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || @@ -640,11 +656,10 @@ xfs_sb_read_verify( error = xfs_sb_verify(bp, true); out_error: - if (error) { + if (error == -EFSCORRUPTED || error == -EFSBADCRC) + xfs_verifier_error(bp, error, __this_address); + else if (error) xfs_buf_ioerror(bp, error); - if (error == -EFSCORRUPTED || error == -EFSBADCRC) - xfs_verifier_error(bp); - } } /* @@ -673,13 +688,12 @@ xfs_sb_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; int error; error = xfs_sb_verify(bp, false); if (error) { - xfs_buf_ioerror(bp, error); - xfs_verifier_error(bp); + xfs_verifier_error(bp, error, __this_address); return; } @@ -717,7 +731,6 @@ xfs_sb_mount_common( struct xfs_sb *sbp) { mp->m_agfrotor = mp->m_agirotor = 0; - spin_lock_init(&mp->m_agirotor_lock); mp->m_maxagi = mp->m_sb.sb_agcount; mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; @@ -876,3 +889,88 @@ xfs_sync_sb( xfs_trans_set_sync(tp); return xfs_trans_commit(tp); } + +int +xfs_fs_geometry( + struct xfs_sb *sbp, + struct xfs_fsop_geom *geo, + int struct_version) +{ + memset(geo, 0, sizeof(struct xfs_fsop_geom)); + + geo->blocksize = sbp->sb_blocksize; + geo->rtextsize = sbp->sb_rextsize; + geo->agblocks = sbp->sb_agblocks; + geo->agcount = sbp->sb_agcount; + geo->logblocks = sbp->sb_logblocks; + geo->sectsize = sbp->sb_sectsize; + geo->inodesize = sbp->sb_inodesize; + geo->imaxpct = sbp->sb_imax_pct; + geo->datablocks = sbp->sb_dblocks; + geo->rtblocks = sbp->sb_rblocks; + geo->rtextents = sbp->sb_rextents; + geo->logstart = sbp->sb_logstart; + BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid)); + memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); + + if (struct_version < 2) + return 0; + + geo->sunit = sbp->sb_unit; + geo->swidth = sbp->sb_width; + + if (struct_version < 3) + return 0; + + geo->version = XFS_FSOP_GEOM_VERSION; + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | + XFS_FSOP_GEOM_FLAGS_DIRV2; + if (xfs_sb_version_hasattr(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; + if (xfs_sb_version_hasquota(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; + if (xfs_sb_version_hasalign(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; + if (xfs_sb_version_hasdalign(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; + if (xfs_sb_version_hasextflgbit(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG; + if (xfs_sb_version_hassector(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; + if (xfs_sb_version_hasasciici(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; + if (xfs_sb_version_haslazysbcount(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; + if (xfs_sb_version_hasattr2(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; + if (xfs_sb_version_hasprojid32bit(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; + if (xfs_sb_version_hascrc(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; + if (xfs_sb_version_hasftype(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; + if (xfs_sb_version_hasfinobt(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; + if (xfs_sb_version_hassparseinodes(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; + if (xfs_sb_version_hasrmapbt(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; + if (xfs_sb_version_hasreflink(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; + if (xfs_sb_version_hassector(sbp)) + geo->logsectsize = sbp->sb_logsectsize; + else + geo->logsectsize = BBSIZE; + geo->rtsectsize = sbp->sb_blocksize; + geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); + + if (struct_version < 4) + return 0; + + if (xfs_sb_version_haslogv2(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; + + geo->logsunit = sbp->sb_logsunit; + + return 0; +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 961e6475a309..63dcd2a1a657 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -34,4 +34,8 @@ extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); +#define XFS_FS_GEOM_MAX_STRUCT_VER (4) +extern int xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo, + int struct_version); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c6f4eb46fe26..d0b84da0cb1e 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -76,6 +76,9 @@ struct xfs_log_item_desc { int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); int xfs_log_calc_minimum_size(struct xfs_mount *); +struct xfs_trans_res; +void xfs_log_get_max_trans_res(struct xfs_mount *mp, + struct xfs_trans_res *max_resp); /* * Values for t_flags. @@ -143,5 +146,6 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); +xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); #endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index c484877129a0..5ef5f354587e 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -98,7 +98,7 @@ xfs_symlink_hdr_ok( return true; } -static bool +static xfs_failaddr_t xfs_symlink_verify( struct xfs_buf *bp) { @@ -106,22 +106,22 @@ xfs_symlink_verify( struct xfs_dsymlink_hdr *dsl = bp->b_addr; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) - return false; + return __this_address; if (be32_to_cpu(dsl->sl_offset) + be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN) - return false; + return __this_address; if (dsl->sl_owner == 0) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) - return false; + return __this_address; - return true; + return NULL; } static void @@ -129,18 +129,19 @@ xfs_symlink_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_symlink_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_symlink_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -148,15 +149,16 @@ xfs_symlink_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_symlink_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_symlink_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -171,6 +173,7 @@ const struct xfs_buf_ops xfs_symlink_buf_ops = { .name = "xfs_symlink", .verify_read = xfs_symlink_read_verify, .verify_write = xfs_symlink_write_verify, + .verify_struct = xfs_symlink_verify, }; void @@ -207,3 +210,37 @@ xfs_symlink_local_to_remote( xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + ifp->if_bytes - 1); } + +/* Verify the consistency of an inline symlink. */ +xfs_failaddr_t +xfs_symlink_shortform_verify( + struct xfs_inode *ip) +{ + char *sfp; + char *endp; + struct xfs_ifork *ifp; + int size; + + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + sfp = (char *)ifp->if_u1.if_data; + size = ifp->if_bytes; + endp = sfp + size; + + /* Zero length symlinks can exist while we're deleting a remote one. */ + if (size == 0) + return NULL; + + /* No negative sizes or overly long symlink targets. */ + if (size < 0 || size > XFS_SYMLINK_MAXLEN) + return __this_address; + + /* No NULLs in the target either. */ + if (memchr(sfp, 0, size - 1)) + return __this_address; + + /* We /did/ null-terminate the buffer, right? */ + if (*endp != 0) + return __this_address; + return NULL; +} diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6bd916bd35e2..5f17641f040f 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -34,6 +34,9 @@ #include "xfs_trans_space.h" #include "xfs_trace.h" +#define _ALLOC true +#define _FREE false + /* * A buffer has a format structure overhead in the log in addition * to the data, so we need to take this into account when reserving @@ -132,43 +135,77 @@ xfs_calc_inode_res( } /* - * The free inode btree is a conditional feature and the log reservation - * requirements differ slightly from that of the traditional inode allocation - * btree. The finobt tracks records for inode chunks with at least one free - * inode. A record can be removed from the tree for an inode allocation - * or free and thus the finobt reservation is unconditional across: + * Inode btree record insertion/removal modifies the inode btree and free space + * btrees (since the inobt does not use the agfl). This requires the following + * reservation: * - * - inode allocation - * - inode free - * - inode chunk allocation + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size * - * The 'modify' param indicates to include the record modification scenario. The - * 'alloc' param indicates to include the reservation for free space btree - * modifications on behalf of finobt modifications. This is required only for - * transactions that do not already account for free space btree modifications. + * The caller must account for SB and AG header modifications, etc. + */ +STATIC uint +xfs_calc_inobt_res( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +/* + * The free inode btree is a conditional feature. The behavior differs slightly + * from that of the traditional inode btree in that the finobt tracks records + * for inode chunks with at least one free inode. A record can be removed from + * the tree during individual inode allocation. Therefore the finobt + * reservation is unconditional for both the inode chunk allocation and + * individual inode allocation (modify) cases. * - * the free inode btree: max depth * block size - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the free inode btree entry: block size + * Behavior aside, the reservation for finobt modification is equivalent to the + * traditional inobt: cover a full finobt shape change plus block allocation. */ STATIC uint xfs_calc_finobt_res( - struct xfs_mount *mp, - int alloc, - int modify) + struct xfs_mount *mp) { - uint res; - if (!xfs_sb_version_hasfinobt(&mp->m_sb)) return 0; - res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); - if (alloc) - res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); - if (modify) - res += (uint)XFS_FSB_TO_B(mp, 1); + return xfs_calc_inobt_res(mp); +} +/* + * Calculate the reservation required to allocate or free an inode chunk. This + * includes: + * + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the inode chunk: m_ialloc_blks * N + * + * The size N of the inode chunk reservation depends on whether it is for + * allocation or free and which type of create transaction is in use. An inode + * chunk free always invalidates the buffers and only requires reservation for + * headers (N == 0). An inode chunk allocation requires a chunk sized + * reservation on v4 and older superblocks to initialize the chunk. No chunk + * reservation is required for allocation on v5 supers, which use ordered + * buffers to initialize. + */ +STATIC uint +xfs_calc_inode_chunk_res( + struct xfs_mount *mp, + bool alloc) +{ + uint res, size = 0; + + res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); + if (alloc) { + /* icreate tx uses ordered buffers */ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return res; + size = XFS_FSB_TO_B(mp, 1); + } + + res += xfs_calc_buf_res(mp->m_ialloc_blks, size); return res; } @@ -232,8 +269,6 @@ xfs_calc_write_reservation( * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size */ STATIC uint xfs_calc_itruncate_reservation( @@ -245,12 +280,7 @@ xfs_calc_itruncate_reservation( XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(5, 0) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0))); + XFS_FSB_TO_B(mp, 1)))); } /* @@ -282,13 +312,14 @@ xfs_calc_rename_reservation( * For removing an inode from unlinked list at first, we can modify: * the agi hash list and counters: sector size * the on disk inode before ours in the agi hash list: inode cluster size + * the on disk inode in the agi hash list: inode cluster size */ STATIC uint xfs_calc_iunlink_remove_reservation( struct xfs_mount *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); + 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); } /* @@ -320,13 +351,13 @@ xfs_calc_link_reservation( /* * For adding an inode to unlinked list we can modify: * the agi hash list: sector size - * the unlinked inode: inode size + * the on disk inode: inode cluster size */ STATIC uint xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_inode_res(mp, 1); + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); } /* @@ -379,45 +410,16 @@ xfs_calc_create_resv_modify( xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + (uint)XFS_FSB_TO_B(mp, 1) + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 1, 1); -} - -/* - * For create we can allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the superblock for the nlink flag: sector size - * the inode blocks allocated: mp->m_ialloc_blks * blocksize - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -STATIC uint -xfs_calc_create_resv_alloc( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); -} - -STATIC uint -__xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX(xfs_calc_create_resv_alloc(mp), - xfs_calc_create_resv_modify(mp)); + xfs_calc_finobt_res(mp); } /* * For icreate we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the finobt (record insertion) + * the inode chunk (allocation, optional init) + * the inobt (record insertion) + * the finobt (optional, record insertion) */ STATIC uint xfs_calc_icreate_resv_alloc( @@ -425,10 +427,9 @@ xfs_calc_icreate_resv_alloc( { return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 0); + xfs_calc_inode_chunk_res(mp, _ALLOC) + + xfs_calc_inobt_res(mp) + + xfs_calc_finobt_res(mp); } STATIC uint @@ -440,26 +441,12 @@ xfs_calc_icreate_reservation(xfs_mount_t *mp) } STATIC uint -xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - if (xfs_sb_version_hascrc(&mp->m_sb)) - return xfs_calc_icreate_reservation(mp); - return __xfs_calc_create_reservation(mp); - -} - -STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { uint res = XFS_DQUOT_LOGRES(mp); - if (xfs_sb_version_hascrc(&mp->m_sb)) - res += xfs_calc_icreate_resv_alloc(mp); - else - res += xfs_calc_create_resv_alloc(mp); - + res += xfs_calc_icreate_resv_alloc(mp); return res + xfs_calc_iunlink_add_reservation(mp); } @@ -470,7 +457,7 @@ STATIC uint xfs_calc_mkdir_reservation( struct xfs_mount *mp) { - return xfs_calc_create_reservation(mp); + return xfs_calc_icreate_reservation(mp); } @@ -483,20 +470,24 @@ STATIC uint xfs_calc_symlink_reservation( struct xfs_mount *mp) { - return xfs_calc_create_reservation(mp) + + return xfs_calc_icreate_reservation(mp) + xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); } /* * In freeing an inode we can modify: * the inode being freed: inode size - * the super block free inode counter: sector size - * the agi hash list and counters: sector size - * the inode btree entry: block size - * the on disk inode before ours in the agi hash list: inode cluster size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size + * the super block free inode counter, AGF and AGFL: sector size + * the on disk inode (agi unlinked list removal) + * the inode chunk (invalidated, headers only) + * the inode btree * the finobt (record insertion, removal or modification) + * + * Note that the inode chunk res. includes an allocfree res. for freeing of the + * inode chunk. This is technically extraneous because the inode chunk free is + * deferred (it occurs after a transaction roll). Include the extra reservation + * anyways since we've had reports of ifree transaction overruns due to too many + * agfl fixups during inode chunk frees. */ STATIC uint xfs_calc_ifree_reservation( @@ -504,15 +495,11 @@ xfs_calc_ifree_reservation( { return XFS_DQUOT_LOGRES(mp) + xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + - xfs_calc_buf_res(1, 0) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 1); + xfs_calc_inode_chunk_res(mp, _FREE) + + xfs_calc_inobt_res(mp) + + xfs_calc_finobt_res(mp); } /* @@ -842,7 +829,7 @@ xfs_trans_resv_calc( resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); + resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 0220159bd463..3c560695c546 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -48,6 +48,12 @@ typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ typedef int64_t xfs_sfiloff_t; /* signed block number in a file */ /* + * New verifiers will return the instruction address of the failing check. + * NULL means everything is ok. + */ +typedef void * xfs_failaddr_t; + +/* * Null values for the types. */ #define NULLFSBLOCK ((xfs_fsblock_t)-1) @@ -136,5 +142,21 @@ typedef uint32_t xfs_dqid_t; #define XFS_NBWORD (1 << XFS_NBWORDLOG) #define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1) +struct xfs_iext_cursor { + struct xfs_iext_leaf *leaf; + int pos; +}; + +typedef enum { + XFS_EXT_NORM, XFS_EXT_UNWRITTEN, +} xfs_exntst_t; + +typedef struct xfs_bmbt_irec +{ + xfs_fileoff_t br_startoff; /* starting file offset */ + xfs_fsblock_t br_startblock; /* starting block number */ + xfs_filblks_t br_blockcount; /* number of blocks */ + xfs_exntst_t br_state; /* extent state */ +} xfs_bmbt_irec_t; #endif /* __XFS_TYPES_H__ */ |