diff options
Diffstat (limited to 'fs/xfs/scrub')
-rw-r--r-- | fs/xfs/scrub/agheader.c | 967 | ||||
-rw-r--r-- | fs/xfs/scrub/alloc.c | 183 | ||||
-rw-r--r-- | fs/xfs/scrub/attr.c | 471 | ||||
-rw-r--r-- | fs/xfs/scrub/bmap.c | 734 | ||||
-rw-r--r-- | fs/xfs/scrub/btree.c | 676 | ||||
-rw-r--r-- | fs/xfs/scrub/btree.h | 66 | ||||
-rw-r--r-- | fs/xfs/scrub/common.c | 775 | ||||
-rw-r--r-- | fs/xfs/scrub/common.h | 160 | ||||
-rw-r--r-- | fs/xfs/scrub/dabtree.c | 613 | ||||
-rw-r--r-- | fs/xfs/scrub/dabtree.h | 59 | ||||
-rw-r--r-- | fs/xfs/scrub/dir.c | 842 | ||||
-rw-r--r-- | fs/xfs/scrub/ialloc.c | 528 | ||||
-rw-r--r-- | fs/xfs/scrub/inode.c | 611 | ||||
-rw-r--r-- | fs/xfs/scrub/parent.c | 327 | ||||
-rw-r--r-- | fs/xfs/scrub/quota.c | 297 | ||||
-rw-r--r-- | fs/xfs/scrub/refcount.c | 515 | ||||
-rw-r--r-- | fs/xfs/scrub/rmap.c | 261 | ||||
-rw-r--r-- | fs/xfs/scrub/rtbitmap.c | 122 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 462 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.h | 152 | ||||
-rw-r--r-- | fs/xfs/scrub/symlink.c | 92 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.c | 58 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 500 | ||||
-rw-r--r-- | fs/xfs/scrub/xfs_scrub.h | 29 |
24 files changed, 9500 insertions, 0 deletions
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c new file mode 100644 index 000000000000..018aabbd9394 --- /dev/null +++ b/fs/xfs/scrub/agheader.c @@ -0,0 +1,967 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" +#include "xfs_rmap.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" + +/* + * Walk all the blocks in the AGFL. The fn function can return any negative + * error code or XFS_BTREE_QUERY_RANGE_ABORT. + */ +int +xfs_scrub_walk_agfl( + struct xfs_scrub_context *sc, + int (*fn)(struct xfs_scrub_context *, + xfs_agblock_t bno, void *), + void *priv) +{ + struct xfs_agf *agf; + __be32 *agfl_bno; + struct xfs_mount *mp = sc->mp; + unsigned int flfirst; + unsigned int fllast; + int i; + int error; + + agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, sc->sa.agfl_bp); + flfirst = be32_to_cpu(agf->agf_flfirst); + fllast = be32_to_cpu(agf->agf_fllast); + + /* Nothing to walk in an empty AGFL. */ + if (agf->agf_flcount == cpu_to_be32(0)) + return 0; + + /* first to last is a consecutive list. */ + if (fllast >= flfirst) { + for (i = flfirst; i <= fllast; i++) { + error = fn(sc, be32_to_cpu(agfl_bno[i]), priv); + if (error) + return error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return error; + } + + return 0; + } + + /* first to the end */ + for (i = flfirst; i < xfs_agfl_size(mp); i++) { + error = fn(sc, be32_to_cpu(agfl_bno[i]), priv); + if (error) + return error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return error; + } + + /* the start to last. */ + for (i = 0; i <= fllast; i++) { + error = fn(sc, be32_to_cpu(agfl_bno[i]), priv); + if (error) + return error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return error; + } + + return 0; +} + +/* Superblock */ + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_superblock_xref( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agnumber_t agno = sc->sm->sm_agno; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agbno = XFS_SB_BLOCK(mp); + + error = xfs_scrub_ag_init(sc, agno, &sc->sa); + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + + /* scrub teardown will take care of sc->sa for us */ +} + +/* + * Scrub the filesystem superblock. + * + * Note: We do /not/ attempt to check AG 0's superblock. Mount is + * responsible for validating all the geometry information in sb 0, so + * if the filesystem is capable of initiating online scrub, then clearly + * sb 0 is ok and we can use its information to check everything else. + */ +int +xfs_scrub_superblock( + struct xfs_scrub_context *sc) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + struct xfs_dsb *sb; + xfs_agnumber_t agno; + uint32_t v2_ok; + __be32 features_mask; + int error; + __be16 vernum_mask; + + agno = sc->sm->sm_agno; + if (agno == 0) + return 0; + + error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, + XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), + XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); + /* + * The superblock verifier can return several different error codes + * if it thinks the superblock doesn't look right. For a mount these + * would all get bounced back to userspace, but if we're here then the + * fs mounted successfully, which means that this secondary superblock + * is simply incorrect. Treat all these codes the same way we treat + * any corruption. + */ + switch (error) { + case -EINVAL: /* also -EWRONGFS */ + case -ENOSYS: + case -EFBIG: + error = -EFSCORRUPTED; + default: + break; + } + if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) + return error; + + sb = XFS_BUF_TO_SBP(bp); + + /* + * Verify the geometries match. Fields that are permanently + * set by mkfs are checked; fields that can be updated later + * (and are not propagated to backup superblocks) are preen + * checked. + */ + if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_rblocks != cpu_to_be64(mp->m_sb.sb_rblocks)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_rextents != cpu_to_be64(mp->m_sb.sb_rextents)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (!uuid_equal(&sb->sb_uuid, &mp->m_sb.sb_uuid)) + xfs_scrub_block_set_preen(sc, bp); + + if (sb->sb_logstart != cpu_to_be64(mp->m_sb.sb_logstart)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino)) + xfs_scrub_block_set_preen(sc, bp); + + if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino)) + xfs_scrub_block_set_preen(sc, bp); + + if (sb->sb_rsumino != cpu_to_be64(mp->m_sb.sb_rsumino)) + xfs_scrub_block_set_preen(sc, bp); + + if (sb->sb_rextsize != cpu_to_be32(mp->m_sb.sb_rextsize)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_agblocks != cpu_to_be32(mp->m_sb.sb_agblocks)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_agcount != cpu_to_be32(mp->m_sb.sb_agcount)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_rbmblocks != cpu_to_be32(mp->m_sb.sb_rbmblocks)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_logblocks != cpu_to_be32(mp->m_sb.sb_logblocks)) + xfs_scrub_block_set_corrupt(sc, bp); + + /* Check sb_versionnum bits that are set at mkfs time. */ + vernum_mask = cpu_to_be16(~XFS_SB_VERSION_OKBITS | + XFS_SB_VERSION_NUMBITS | + XFS_SB_VERSION_ALIGNBIT | + XFS_SB_VERSION_DALIGNBIT | + XFS_SB_VERSION_SHAREDBIT | + XFS_SB_VERSION_LOGV2BIT | + XFS_SB_VERSION_SECTORBIT | + XFS_SB_VERSION_EXTFLGBIT | + XFS_SB_VERSION_DIRV2BIT); + if ((sb->sb_versionnum & vernum_mask) != + (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask)) + xfs_scrub_block_set_corrupt(sc, bp); + + /* Check sb_versionnum bits that can be set after mkfs time. */ + vernum_mask = cpu_to_be16(XFS_SB_VERSION_ATTRBIT | + XFS_SB_VERSION_NLINKBIT | + XFS_SB_VERSION_QUOTABIT); + if ((sb->sb_versionnum & vernum_mask) != + (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask)) + xfs_scrub_block_set_preen(sc, bp); + + if (sb->sb_sectsize != cpu_to_be16(mp->m_sb.sb_sectsize)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_inodesize != cpu_to_be16(mp->m_sb.sb_inodesize)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_inopblock != cpu_to_be16(mp->m_sb.sb_inopblock)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (memcmp(sb->sb_fname, mp->m_sb.sb_fname, sizeof(sb->sb_fname))) + xfs_scrub_block_set_preen(sc, bp); + + if (sb->sb_blocklog != mp->m_sb.sb_blocklog) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_sectlog != mp->m_sb.sb_sectlog) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_inodelog != mp->m_sb.sb_inodelog) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_inopblog != mp->m_sb.sb_inopblog) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_agblklog != mp->m_sb.sb_agblklog) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_rextslog != mp->m_sb.sb_rextslog) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_imax_pct != mp->m_sb.sb_imax_pct) + xfs_scrub_block_set_preen(sc, bp); + + /* + * Skip the summary counters since we track them in memory anyway. + * sb_icount, sb_ifree, sb_fdblocks, sb_frexents + */ + + if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino)) + xfs_scrub_block_set_preen(sc, bp); + + if (sb->sb_gquotino != cpu_to_be64(mp->m_sb.sb_gquotino)) + xfs_scrub_block_set_preen(sc, bp); + + /* + * Skip the quota flags since repair will force quotacheck. + * sb_qflags + */ + + if (sb->sb_flags != mp->m_sb.sb_flags) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_shared_vn != mp->m_sb.sb_shared_vn) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_inoalignmt != cpu_to_be32(mp->m_sb.sb_inoalignmt)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_unit != cpu_to_be32(mp->m_sb.sb_unit)) + xfs_scrub_block_set_preen(sc, bp); + + if (sb->sb_width != cpu_to_be32(mp->m_sb.sb_width)) + xfs_scrub_block_set_preen(sc, bp); + + if (sb->sb_dirblklog != mp->m_sb.sb_dirblklog) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_logsectlog != mp->m_sb.sb_logsectlog) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_logsectsize != cpu_to_be16(mp->m_sb.sb_logsectsize)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_logsunit != cpu_to_be32(mp->m_sb.sb_logsunit)) + xfs_scrub_block_set_corrupt(sc, bp); + + /* Do we see any invalid bits in sb_features2? */ + if (!xfs_sb_version_hasmorebits(&mp->m_sb)) { + if (sb->sb_features2 != 0) + xfs_scrub_block_set_corrupt(sc, bp); + } else { + v2_ok = XFS_SB_VERSION2_OKBITS; + if (XFS_SB_VERSION_NUM(&mp->m_sb) >= XFS_SB_VERSION_5) + v2_ok |= XFS_SB_VERSION2_CRCBIT; + + if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok))) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_features2 != sb->sb_bad_features2) + xfs_scrub_block_set_preen(sc, bp); + } + + /* Check sb_features2 flags that are set at mkfs time. */ + features_mask = cpu_to_be32(XFS_SB_VERSION2_LAZYSBCOUNTBIT | + XFS_SB_VERSION2_PROJID32BIT | + XFS_SB_VERSION2_CRCBIT | + XFS_SB_VERSION2_FTYPE); + if ((sb->sb_features2 & features_mask) != + (cpu_to_be32(mp->m_sb.sb_features2) & features_mask)) + xfs_scrub_block_set_corrupt(sc, bp); + + /* Check sb_features2 flags that can be set after mkfs time. */ + features_mask = cpu_to_be32(XFS_SB_VERSION2_ATTR2BIT); + if ((sb->sb_features2 & features_mask) != + (cpu_to_be32(mp->m_sb.sb_features2) & features_mask)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (!xfs_sb_version_hascrc(&mp->m_sb)) { + /* all v5 fields must be zero */ + if (memchr_inv(&sb->sb_features_compat, 0, + sizeof(struct xfs_dsb) - + offsetof(struct xfs_dsb, sb_features_compat))) + xfs_scrub_block_set_corrupt(sc, bp); + } else { + /* Check compat flags; all are set at mkfs time. */ + features_mask = cpu_to_be32(XFS_SB_FEAT_COMPAT_UNKNOWN); + if ((sb->sb_features_compat & features_mask) != + (cpu_to_be32(mp->m_sb.sb_features_compat) & features_mask)) + xfs_scrub_block_set_corrupt(sc, bp); + + /* Check ro compat flags; all are set at mkfs time. */ + features_mask = cpu_to_be32(XFS_SB_FEAT_RO_COMPAT_UNKNOWN | + XFS_SB_FEAT_RO_COMPAT_FINOBT | + XFS_SB_FEAT_RO_COMPAT_RMAPBT | + XFS_SB_FEAT_RO_COMPAT_REFLINK); + if ((sb->sb_features_ro_compat & features_mask) != + (cpu_to_be32(mp->m_sb.sb_features_ro_compat) & + features_mask)) + xfs_scrub_block_set_corrupt(sc, bp); + + /* Check incompat flags; all are set at mkfs time. */ + features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_UNKNOWN | + XFS_SB_FEAT_INCOMPAT_FTYPE | + XFS_SB_FEAT_INCOMPAT_SPINODES | + XFS_SB_FEAT_INCOMPAT_META_UUID); + if ((sb->sb_features_incompat & features_mask) != + (cpu_to_be32(mp->m_sb.sb_features_incompat) & + features_mask)) + xfs_scrub_block_set_corrupt(sc, bp); + + /* Check log incompat flags; all are set at mkfs time. */ + features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN); + if ((sb->sb_features_log_incompat & features_mask) != + (cpu_to_be32(mp->m_sb.sb_features_log_incompat) & + features_mask)) + xfs_scrub_block_set_corrupt(sc, bp); + + /* Don't care about sb_crc */ + + if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align)) + xfs_scrub_block_set_corrupt(sc, bp); + + if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino)) + xfs_scrub_block_set_preen(sc, bp); + + /* Don't care about sb_lsn */ + } + + if (xfs_sb_version_hasmetauuid(&mp->m_sb)) { + /* The metadata UUID must be the same for all supers */ + if (!uuid_equal(&sb->sb_meta_uuid, &mp->m_sb.sb_meta_uuid)) + xfs_scrub_block_set_corrupt(sc, bp); + } + + /* Everything else must be zero. */ + if (memchr_inv(sb + 1, 0, + BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) + xfs_scrub_block_set_corrupt(sc, bp); + + xfs_scrub_superblock_xref(sc, bp); + + return error; +} + +/* AGF */ + +/* Tally freespace record lengths. */ +STATIC int +xfs_scrub_agf_record_bno_lengths( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *rec, + void *priv) +{ + xfs_extlen_t *blocks = priv; + + (*blocks) += rec->ar_blockcount; + return 0; +} + +/* Check agf_freeblks */ +static inline void +xfs_scrub_agf_xref_freeblks( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_extlen_t blocks = 0; + int error; + + if (!sc->sa.bno_cur) + return; + + error = xfs_alloc_query_all(sc->sa.bno_cur, + xfs_scrub_agf_record_bno_lengths, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + return; + if (blocks != be32_to_cpu(agf->agf_freeblks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + +/* Cross reference the AGF with the cntbt (freespace by length btree) */ +static inline void +xfs_scrub_agf_xref_cntbt( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_agblock_t agbno; + xfs_extlen_t blocks; + int have; + int error; + + if (!sc->sa.cnt_cur) + return; + + /* Any freespace at all? */ + error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + return; + if (!have) { + if (agf->agf_freeblks != be32_to_cpu(0)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + return; + } + + /* Check agf_longest */ + error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + return; + if (!have || blocks != be32_to_cpu(agf->agf_longest)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + +/* Check the btree block counts in the AGF against the btrees. */ +STATIC void +xfs_scrub_agf_xref_btreeblks( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + struct xfs_mount *mp = sc->mp; + xfs_agblock_t blocks; + xfs_agblock_t btreeblks; + int error; + + /* Check agf_rmap_blocks; set up for agf_btreeblks check */ + if (sc->sa.rmap_cur) { + error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + btreeblks = blocks - 1; + if (blocks != be32_to_cpu(agf->agf_rmap_blocks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + } else { + btreeblks = 0; + } + + /* + * No rmap cursor; we can't xref if we have the rmapbt feature. + * We also can't do it if we're missing the free space btree cursors. + */ + if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) || + !sc->sa.bno_cur || !sc->sa.cnt_cur) + return; + + /* Check agf_btreeblks */ + error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + return; + btreeblks += blocks - 1; + + error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + return; + btreeblks += blocks - 1; + + if (btreeblks != be32_to_cpu(agf->agf_btreeblks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + +/* Check agf_refcount_blocks against tree size */ +static inline void +xfs_scrub_agf_xref_refcblks( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_agblock_t blocks; + int error; + + if (!sc->sa.refc_cur) + return; + + error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (blocks != be32_to_cpu(agf->agf_refcount_blocks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agf_xref( + struct xfs_scrub_context *sc) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agbno = XFS_AGF_BLOCK(mp); + + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + if (error) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_agf_xref_freeblks(sc); + xfs_scrub_agf_xref_cntbt(sc); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_agf_xref_btreeblks(sc); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xfs_scrub_agf_xref_refcblks(sc); + + /* scrub teardown will take care of sc->sa for us */ +} + +/* Scrub the AGF. */ +int +xfs_scrub_agf( + struct xfs_scrub_context *sc) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_agf *agf; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_agblock_t eoag; + xfs_agblock_t agfl_first; + xfs_agblock_t agfl_last; + xfs_agblock_t agfl_count; + xfs_agblock_t fl_count; + int level; + int error = 0; + + agno = sc->sa.agno = sc->sm->sm_agno; + error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp, + &sc->sa.agf_bp, &sc->sa.agfl_bp); + if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error)) + goto out; + xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp); + + agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + + /* Check the AG length */ + eoag = be32_to_cpu(agf->agf_length); + if (eoag != xfs_ag_block_count(mp, agno)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + + /* Check the AGF btree roots and levels */ + agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]); + if (!xfs_verify_agbno(mp, agno, agbno)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + + agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]); + if (!xfs_verify_agbno(mp, agno, agbno)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + + level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + + level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]); + if (!xfs_verify_agbno(mp, agno, agbno)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + + level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + } + + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + agbno = be32_to_cpu(agf->agf_refcount_root); + if (!xfs_verify_agbno(mp, agno, agbno)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + + level = be32_to_cpu(agf->agf_refcount_level); + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + } + + /* Check the AGFL counters */ + agfl_first = be32_to_cpu(agf->agf_flfirst); + agfl_last = be32_to_cpu(agf->agf_fllast); + agfl_count = be32_to_cpu(agf->agf_flcount); + if (agfl_last > agfl_first) + fl_count = agfl_last - agfl_first + 1; + else + fl_count = xfs_agfl_size(mp) - agfl_first + agfl_last + 1; + if (agfl_count != 0 && fl_count != agfl_count) + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + + xfs_scrub_agf_xref(sc); +out: + return error; +} + +/* AGFL */ + +struct xfs_scrub_agfl_info { + struct xfs_owner_info oinfo; + unsigned int sz_entries; + unsigned int nr_entries; + xfs_agblock_t *entries; +}; + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agfl_block_xref( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + struct xfs_owner_info *oinfo) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); +} + +/* Scrub an AGFL block. */ +STATIC int +xfs_scrub_agfl_block( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + void *priv) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_scrub_agfl_info *sai = priv; + xfs_agnumber_t agno = sc->sa.agno; + + if (xfs_verify_agbno(mp, agno, agbno) && + sai->nr_entries < sai->sz_entries) + sai->entries[sai->nr_entries++] = agbno; + else + xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp); + + xfs_scrub_agfl_block_xref(sc, agbno, priv); + + return 0; +} + +static int +xfs_scrub_agblock_cmp( + const void *pa, + const void *pb) +{ + const xfs_agblock_t *a = pa; + const xfs_agblock_t *b = pb; + + return (int)*a - (int)*b; +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agfl_xref( + struct xfs_scrub_context *sc) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agbno = XFS_AGFL_BLOCK(mp); + + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + if (error) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + + /* + * Scrub teardown will take care of sc->sa for us. Leave sc->sa + * active so that the agfl block xref can use it too. + */ +} + +/* Scrub the AGFL. */ +int +xfs_scrub_agfl( + struct xfs_scrub_context *sc) +{ + struct xfs_scrub_agfl_info sai; + struct xfs_agf *agf; + xfs_agnumber_t agno; + unsigned int agflcount; + unsigned int i; + int error; + + agno = sc->sa.agno = sc->sm->sm_agno; + error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp, + &sc->sa.agf_bp, &sc->sa.agfl_bp); + if (!xfs_scrub_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) + goto out; + if (!sc->sa.agf_bp) + return -EFSCORRUPTED; + xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp); + + xfs_scrub_agfl_xref(sc); + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + + /* Allocate buffer to ensure uniqueness of AGFL entries. */ + agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + agflcount = be32_to_cpu(agf->agf_flcount); + if (agflcount > xfs_agfl_size(sc->mp)) { + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + goto out; + } + memset(&sai, 0, sizeof(sai)); + sai.sz_entries = agflcount; + sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS); + if (!sai.entries) { + error = -ENOMEM; + goto out; + } + + /* Check the blocks in the AGFL. */ + xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG); + error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai); + if (error) + goto out_free; + + if (agflcount != sai.nr_entries) { + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + goto out_free; + } + + /* Sort entries, check for duplicates. */ + sort(sai.entries, sai.nr_entries, sizeof(sai.entries[0]), + xfs_scrub_agblock_cmp, NULL); + for (i = 1; i < sai.nr_entries; i++) { + if (sai.entries[i] == sai.entries[i - 1]) { + xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + break; + } + } + +out_free: + kmem_free(sai.entries); +out: + return error; +} + +/* AGI */ + +/* Check agi_count/agi_freecount */ +static inline void +xfs_scrub_agi_xref_icounts( + struct xfs_scrub_context *sc) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); + xfs_agino_t icount; + xfs_agino_t freecount; + int error; + + if (!sc->sa.ino_cur) + return; + + error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur)) + return; + if (be32_to_cpu(agi->agi_count) != icount || + be32_to_cpu(agi->agi_freecount) != freecount) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agi_xref( + struct xfs_scrub_context *sc) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agbno = XFS_AGI_BLOCK(mp); + + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + if (error) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_scrub_agi_xref_icounts(sc); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + + /* scrub teardown will take care of sc->sa for us */ +} + +/* Scrub the AGI. */ +int +xfs_scrub_agi( + struct xfs_scrub_context *sc) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_agi *agi; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_agblock_t eoag; + xfs_agino_t agino; + xfs_agino_t first_agino; + xfs_agino_t last_agino; + xfs_agino_t icount; + int i; + int level; + int error = 0; + + agno = sc->sa.agno = sc->sm->sm_agno; + error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp, + &sc->sa.agf_bp, &sc->sa.agfl_bp); + if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error)) + goto out; + xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp); + + agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); + + /* Check the AG length */ + eoag = be32_to_cpu(agi->agi_length); + if (eoag != xfs_ag_block_count(mp, agno)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + + /* Check btree roots and levels */ + agbno = be32_to_cpu(agi->agi_root); + if (!xfs_verify_agbno(mp, agno, agbno)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + + level = be32_to_cpu(agi->agi_level); + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + agbno = be32_to_cpu(agi->agi_free_root); + if (!xfs_verify_agbno(mp, agno, agbno)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + + level = be32_to_cpu(agi->agi_free_level); + if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + } + + /* Check inode counters */ + xfs_ialloc_agino_range(mp, agno, &first_agino, &last_agino); + icount = be32_to_cpu(agi->agi_count); + if (icount > last_agino - first_agino + 1 || + icount < be32_to_cpu(agi->agi_freecount)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + + /* Check inode pointers */ + agino = be32_to_cpu(agi->agi_newino); + if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + + agino = be32_to_cpu(agi->agi_dirino); + if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + + /* Check unlinked inode buckets */ + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) { + agino = be32_to_cpu(agi->agi_unlinked[i]); + if (agino == NULLAGINO) + continue; + if (!xfs_verify_agino(mp, agno, agino)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + } + + if (agi->agi_pad32 != cpu_to_be32(0)) + xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + + xfs_scrub_agi_xref(sc); +out: + return error; +} diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c new file mode 100644 index 000000000000..517c079d3f68 --- /dev/null +++ b/fs/xfs/scrub/alloc.c @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_alloc.h" +#include "xfs_rmap.h" +#include "xfs_alloc.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" + +/* + * Set us up to scrub free space btrees. + */ +int +xfs_scrub_setup_ag_allocbt( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + return xfs_scrub_setup_ag_btree(sc, ip, false); +} + +/* Free space btree scrubber. */ +/* + * Ensure there's a corresponding cntbt/bnobt record matching this + * bnobt/cntbt record, respectively. + */ +STATIC void +xfs_scrub_allocbt_xref_other( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + struct xfs_btree_cur **pcur; + xfs_agblock_t fbno; + xfs_extlen_t flen; + int has_otherrec; + int error; + + if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT) + pcur = &sc->sa.cnt_cur; + else + pcur = &sc->sa.bno_cur; + if (!*pcur) + return; + + error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec); + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + return; + if (!has_otherrec) { + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + return; + } + + error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec); + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + return; + if (!has_otherrec) { + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + return; + } + + if (fbno != agbno || flen != len) + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_allocbt_xref( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_allocbt_xref_other(sc, agbno, len); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + xfs_scrub_xref_has_no_owner(sc, agbno, len); + xfs_scrub_xref_is_not_shared(sc, agbno, len); +} + +/* Scrub a bnobt/cntbt record. */ +STATIC int +xfs_scrub_allocbt_rec( + struct xfs_scrub_btree *bs, + union xfs_btree_rec *rec) +{ + struct xfs_mount *mp = bs->cur->bc_mp; + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agblock_t bno; + xfs_extlen_t len; + int error = 0; + + bno = be32_to_cpu(rec->alloc.ar_startblock); + len = be32_to_cpu(rec->alloc.ar_blockcount); + + if (bno + len <= bno || + !xfs_verify_agbno(mp, agno, bno) || + !xfs_verify_agbno(mp, agno, bno + len - 1)) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + xfs_scrub_allocbt_xref(bs->sc, bno, len); + + return error; +} + +/* Scrub the freespace btrees for some AG. */ +STATIC int +xfs_scrub_allocbt( + struct xfs_scrub_context *sc, + xfs_btnum_t which) +{ + struct xfs_owner_info oinfo; + struct xfs_btree_cur *cur; + + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); + cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur; + return xfs_scrub_btree(sc, cur, xfs_scrub_allocbt_rec, &oinfo, NULL); +} + +int +xfs_scrub_bnobt( + struct xfs_scrub_context *sc) +{ + return xfs_scrub_allocbt(sc, XFS_BTNUM_BNO); +} + +int +xfs_scrub_cntbt( + struct xfs_scrub_context *sc) +{ + return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT); +} + +/* xref check that the extent is not free */ +void +xfs_scrub_xref_is_used_space( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + bool is_freesp; + int error; + + if (!sc->sa.bno_cur) + return; + + error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + return; + if (is_freesp) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0); +} diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c new file mode 100644 index 000000000000..127575f0abfb --- /dev/null +++ b/fs/xfs/scrub/attr.c @@ -0,0 +1,471 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/dabtree.h" +#include "scrub/trace.h" + +#include <linux/posix_acl_xattr.h> +#include <linux/xattr.h> + +/* Set us up to scrub an inode's extended attributes. */ +int +xfs_scrub_setup_xattr( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + size_t sz; + + /* + * Allocate the buffer without the inode lock held. We need enough + * space to read every xattr value in the file or enough space to + * hold three copies of the xattr free space bitmap. (Not both at + * the same time.) + */ + sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) * + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize)); + sc->buf = kmem_zalloc_large(sz, KM_SLEEP); + if (!sc->buf) + return -ENOMEM; + + return xfs_scrub_setup_inode_contents(sc, ip, 0); +} + +/* Extended Attributes */ + +struct xfs_scrub_xattr { + struct xfs_attr_list_context context; + struct xfs_scrub_context *sc; +}; + +/* + * Check that an extended attribute key can be looked up by hash. + * + * We use the XFS attribute list iterator (i.e. xfs_attr_list_int_ilocked) + * to call this function for every attribute key in an inode. Once + * we're here, we load the attribute value to see if any errors happen, + * or if we get more or less data than we expected. + */ +static void +xfs_scrub_xattr_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen) +{ + struct xfs_scrub_xattr *sx; + struct xfs_da_args args = { NULL }; + int error = 0; + + sx = container_of(context, struct xfs_scrub_xattr, context); + + if (flags & XFS_ATTR_INCOMPLETE) { + /* Incomplete attr key, just mark the inode for preening. */ + xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino); + return; + } + + args.flags = ATTR_KERNOTIME; + if (flags & XFS_ATTR_ROOT) + args.flags |= ATTR_ROOT; + else if (flags & XFS_ATTR_SECURE) + args.flags |= ATTR_SECURE; + args.geo = context->dp->i_mount->m_attr_geo; + args.whichfork = XFS_ATTR_FORK; + args.dp = context->dp; + args.name = name; + args.namelen = namelen; + args.hashval = xfs_da_hashname(args.name, args.namelen); + args.trans = context->tp; + args.value = sx->sc->buf; + args.valuelen = XATTR_SIZE_MAX; + + error = xfs_attr_get_ilocked(context->dp, &args); + if (error == -EEXIST) + error = 0; + if (!xfs_scrub_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, + &error)) + goto fail_xref; + if (args.valuelen != valuelen) + xfs_scrub_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, + args.blkno); + +fail_xref: + return; +} + +/* + * Mark a range [start, start+len) in this map. Returns true if the + * region was free, and false if there's a conflict or a problem. + * + * Within a char, the lowest bit of the char represents the byte with + * the smallest address + */ +STATIC bool +xfs_scrub_xattr_set_map( + struct xfs_scrub_context *sc, + unsigned long *map, + unsigned int start, + unsigned int len) +{ + unsigned int mapsize = sc->mp->m_attr_geo->blksize; + bool ret = true; + + if (start >= mapsize) + return false; + if (start + len > mapsize) { + len = mapsize - start; + ret = false; + } + + if (find_next_bit(map, mapsize, start) < start + len) + ret = false; + bitmap_set(map, start, len); + + return ret; +} + +/* + * Check the leaf freemap from the usage bitmap. Returns false if the + * attr freemap has problems or points to used space. + */ +STATIC bool +xfs_scrub_xattr_check_freemap( + struct xfs_scrub_context *sc, + unsigned long *map, + struct xfs_attr3_icleaf_hdr *leafhdr) +{ + unsigned long *freemap; + unsigned long *dstmap; + unsigned int mapsize = sc->mp->m_attr_geo->blksize; + int i; + + /* Construct bitmap of freemap contents. */ + freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize); + bitmap_zero(freemap, mapsize); + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + if (!xfs_scrub_xattr_set_map(sc, freemap, + leafhdr->freemap[i].base, + leafhdr->freemap[i].size)) + return false; + } + + /* Look for bits that are set in freemap and are marked in use. */ + dstmap = freemap + BITS_TO_LONGS(mapsize); + return bitmap_and(dstmap, freemap, map, mapsize) == 0; +} + +/* + * Check this leaf entry's relations to everything else. + * Returns the number of bytes used for the name/value data. + */ +STATIC void +xfs_scrub_xattr_entry( + struct xfs_scrub_da_btree *ds, + int level, + char *buf_end, + struct xfs_attr_leafblock *leaf, + struct xfs_attr3_icleaf_hdr *leafhdr, + unsigned long *usedmap, + struct xfs_attr_leaf_entry *ent, + int idx, + unsigned int *usedbytes, + __u32 *last_hashval) +{ + struct xfs_mount *mp = ds->state->mp; + char *name_end; + struct xfs_attr_leaf_name_local *lentry; + struct xfs_attr_leaf_name_remote *rentry; + unsigned int nameidx; + unsigned int namesize; + + if (ent->pad2 != 0) + xfs_scrub_da_set_corrupt(ds, level); + + /* Hash values in order? */ + if (be32_to_cpu(ent->hashval) < *last_hashval) + xfs_scrub_da_set_corrupt(ds, level); + *last_hashval = be32_to_cpu(ent->hashval); + + nameidx = be16_to_cpu(ent->nameidx); + if (nameidx < leafhdr->firstused || + nameidx >= mp->m_attr_geo->blksize) { + xfs_scrub_da_set_corrupt(ds, level); + return; + } + + /* Check the name information. */ + if (ent->flags & XFS_ATTR_LOCAL) { + lentry = xfs_attr3_leaf_name_local(leaf, idx); + namesize = xfs_attr_leaf_entsize_local(lentry->namelen, + be16_to_cpu(lentry->valuelen)); + name_end = (char *)lentry + namesize; + if (lentry->namelen == 0) + xfs_scrub_da_set_corrupt(ds, level); + } else { + rentry = xfs_attr3_leaf_name_remote(leaf, idx); + namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); + name_end = (char *)rentry + namesize; + if (rentry->namelen == 0 || rentry->valueblk == 0) + xfs_scrub_da_set_corrupt(ds, level); + } + if (name_end > buf_end) + xfs_scrub_da_set_corrupt(ds, level); + + if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, nameidx, namesize)) + xfs_scrub_da_set_corrupt(ds, level); + if (!(ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + *usedbytes += namesize; +} + +/* Scrub an attribute leaf. */ +STATIC int +xfs_scrub_xattr_block( + struct xfs_scrub_da_btree *ds, + int level) +{ + struct xfs_attr3_icleaf_hdr leafhdr; + struct xfs_mount *mp = ds->state->mp; + struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; + struct xfs_buf *bp = blk->bp; + xfs_dablk_t *last_checked = ds->private; + struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_attr_leaf_entry *ent; + struct xfs_attr_leaf_entry *entries; + unsigned long *usedmap = ds->sc->buf; + char *buf_end; + size_t off; + __u32 last_hashval = 0; + unsigned int usedbytes = 0; + unsigned int hdrsize; + int i; + + if (*last_checked == blk->blkno) + return 0; + *last_checked = blk->blkno; + bitmap_zero(usedmap, mp->m_attr_geo->blksize); + + /* Check all the padding. */ + if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb)) { + struct xfs_attr3_leafblock *leaf = bp->b_addr; + + if (leaf->hdr.pad1 != 0 || leaf->hdr.pad2 != 0 || + leaf->hdr.info.hdr.pad != 0) + xfs_scrub_da_set_corrupt(ds, level); + } else { + if (leaf->hdr.pad1 != 0 || leaf->hdr.info.pad != 0) + xfs_scrub_da_set_corrupt(ds, level); + } + + /* Check the leaf header */ + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); + hdrsize = xfs_attr3_leaf_hdr_size(leaf); + + if (leafhdr.usedbytes > mp->m_attr_geo->blksize) + xfs_scrub_da_set_corrupt(ds, level); + if (leafhdr.firstused > mp->m_attr_geo->blksize) + xfs_scrub_da_set_corrupt(ds, level); + if (leafhdr.firstused < hdrsize) + xfs_scrub_da_set_corrupt(ds, level); + if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, 0, hdrsize)) + xfs_scrub_da_set_corrupt(ds, level); + + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + + entries = xfs_attr3_leaf_entryp(leaf); + if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused) + xfs_scrub_da_set_corrupt(ds, level); + + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; + for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { + /* Mark the leaf entry itself. */ + off = (char *)ent - (char *)leaf; + if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, off, + sizeof(xfs_attr_leaf_entry_t))) { + xfs_scrub_da_set_corrupt(ds, level); + goto out; + } + + /* Check the entry and nameval. */ + xfs_scrub_xattr_entry(ds, level, buf_end, leaf, &leafhdr, + usedmap, ent, i, &usedbytes, &last_hashval); + + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + } + + if (!xfs_scrub_xattr_check_freemap(ds->sc, usedmap, &leafhdr)) + xfs_scrub_da_set_corrupt(ds, level); + + if (leafhdr.usedbytes != usedbytes) + xfs_scrub_da_set_corrupt(ds, level); + +out: + return 0; +} + +/* Scrub a attribute btree record. */ +STATIC int +xfs_scrub_xattr_rec( + struct xfs_scrub_da_btree *ds, + int level, + void *rec) +{ + struct xfs_mount *mp = ds->state->mp; + struct xfs_attr_leaf_entry *ent = rec; + struct xfs_da_state_blk *blk; + struct xfs_attr_leaf_name_local *lentry; + struct xfs_attr_leaf_name_remote *rentry; + struct xfs_buf *bp; + xfs_dahash_t calc_hash; + xfs_dahash_t hash; + int nameidx; + int hdrsize; + unsigned int badflags; + int error; + + blk = &ds->state->path.blk[level]; + + /* Check the whole block, if necessary. */ + error = xfs_scrub_xattr_block(ds, level); + if (error) + goto out; + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + + /* Check the hash of the entry. */ + error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval); + if (error) + goto out; + + /* Find the attr entry's location. */ + bp = blk->bp; + hdrsize = xfs_attr3_leaf_hdr_size(bp->b_addr); + nameidx = be16_to_cpu(ent->nameidx); + if (nameidx < hdrsize || nameidx >= mp->m_attr_geo->blksize) { + xfs_scrub_da_set_corrupt(ds, level); + goto out; + } + + /* Retrieve the entry and check it. */ + hash = be32_to_cpu(ent->hashval); + badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE | + XFS_ATTR_INCOMPLETE); + if ((ent->flags & badflags) != 0) + xfs_scrub_da_set_corrupt(ds, level); + if (ent->flags & XFS_ATTR_LOCAL) { + lentry = (struct xfs_attr_leaf_name_local *) + (((char *)bp->b_addr) + nameidx); + if (lentry->namelen <= 0) { + xfs_scrub_da_set_corrupt(ds, level); + goto out; + } + calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen); + } else { + rentry = (struct xfs_attr_leaf_name_remote *) + (((char *)bp->b_addr) + nameidx); + if (rentry->namelen <= 0) { + xfs_scrub_da_set_corrupt(ds, level); + goto out; + } + calc_hash = xfs_da_hashname(rentry->name, rentry->namelen); + } + if (calc_hash != hash) + xfs_scrub_da_set_corrupt(ds, level); + +out: + return error; +} + +/* Scrub the extended attribute metadata. */ +int +xfs_scrub_xattr( + struct xfs_scrub_context *sc) +{ + struct xfs_scrub_xattr sx; + struct attrlist_cursor_kern cursor = { 0 }; + xfs_dablk_t last_checked = -1U; + int error = 0; + + if (!xfs_inode_hasattr(sc->ip)) + return -ENOENT; + + memset(&sx, 0, sizeof(sx)); + /* Check attribute tree structure */ + error = xfs_scrub_da_btree(sc, XFS_ATTR_FORK, xfs_scrub_xattr_rec, + &last_checked); + if (error) + goto out; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + + /* Check that every attr key can also be looked up by hash. */ + sx.context.dp = sc->ip; + sx.context.cursor = &cursor; + sx.context.resynch = 1; + sx.context.put_listent = xfs_scrub_xattr_listent; + sx.context.tp = sc->tp; + sx.context.flags = ATTR_INCOMPLETE; + sx.sc = sc; + + /* + * Look up every xattr in this file by name. + * + * Use the backend implementation of xfs_attr_list to call + * xfs_scrub_xattr_listent on every attribute key in this inode. + * In other words, we use the same iterator/callback mechanism + * that listattr uses to scrub extended attributes, though in our + * _listent function, we check the value of the attribute. + * + * The VFS only locks i_rwsem when modifying attrs, so keep all + * three locks held because that's the only way to ensure we're + * the only thread poking into the da btree. We traverse the da + * btree while holding a leaf buffer locked for the xattr name + * iteration, which doesn't really follow the usual buffer + * locking order. + */ + error = xfs_attr_list_int_ilocked(&sx.context); + if (!xfs_scrub_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) + goto out; +out: + return error; +} diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c new file mode 100644 index 000000000000..639d14b51e90 --- /dev/null +++ b/fs/xfs/scrub/bmap.c @@ -0,0 +1,734 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_inode_fork.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "xfs_refcount.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" + +/* Set us up with an inode's bmap. */ +int +xfs_scrub_setup_inode_bmap( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = sc->mp; + int error; + + error = xfs_scrub_get_inode(sc, ip); + if (error) + goto out; + + sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; + xfs_ilock(sc->ip, sc->ilock_flags); + + /* + * We don't want any ephemeral data fork updates sitting around + * while we inspect block mappings, so wait for directio to finish + * and flush dirty data if we have delalloc reservations. + */ + if (S_ISREG(VFS_I(sc->ip)->i_mode) && + sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) { + inode_dio_wait(VFS_I(sc->ip)); + error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping); + if (error) + goto out; + } + + /* Got the inode, lock it and we're ready to go. */ + error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); + if (error) + goto out; + sc->ilock_flags |= XFS_ILOCK_EXCL; + xfs_ilock(sc->ip, XFS_ILOCK_EXCL); + +out: + /* scrub teardown will unlock and release the inode */ + return error; +} + +/* + * Inode fork block mapping (BMBT) scrubber. + * More complex than the others because we have to scrub + * all the extents regardless of whether or not the fork + * is in btree format. + */ + +struct xfs_scrub_bmap_info { + struct xfs_scrub_context *sc; + xfs_fileoff_t lastoff; + bool is_rt; + bool is_shared; + int whichfork; +}; + +/* Look for a corresponding rmap for this irec. */ +static inline bool +xfs_scrub_bmap_get_rmap( + struct xfs_scrub_bmap_info *info, + struct xfs_bmbt_irec *irec, + xfs_agblock_t agbno, + uint64_t owner, + struct xfs_rmap_irec *rmap) +{ + xfs_fileoff_t offset; + unsigned int rflags = 0; + int has_rmap; + int error; + + if (info->whichfork == XFS_ATTR_FORK) + rflags |= XFS_RMAP_ATTR_FORK; + + /* + * CoW staging extents are owned (on disk) by the refcountbt, so + * their rmaps do not have offsets. + */ + if (info->whichfork == XFS_COW_FORK) + offset = 0; + else + offset = irec->br_startoff; + + /* + * If the caller thinks this could be a shared bmbt extent (IOWs, + * any data fork extent of a reflink inode) then we have to use the + * range rmap lookup to make sure we get the correct owner/offset. + */ + if (info->is_shared) { + error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, + owner, offset, rflags, rmap, &has_rmap); + if (!xfs_scrub_should_check_xref(info->sc, &error, + &info->sc->sa.rmap_cur)) + return false; + goto out; + } + + /* + * Otherwise, use the (faster) regular lookup. + */ + error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner, + offset, rflags, &has_rmap); + if (!xfs_scrub_should_check_xref(info->sc, &error, + &info->sc->sa.rmap_cur)) + return false; + if (!has_rmap) + goto out; + + error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap); + if (!xfs_scrub_should_check_xref(info->sc, &error, + &info->sc->sa.rmap_cur)) + return false; + +out: + if (!has_rmap) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + return has_rmap; +} + +/* Make sure that we have rmapbt records for this extent. */ +STATIC void +xfs_scrub_bmap_xref_rmap( + struct xfs_scrub_bmap_info *info, + struct xfs_bmbt_irec *irec, + xfs_agblock_t agbno) +{ + struct xfs_rmap_irec rmap; + unsigned long long rmap_end; + uint64_t owner; + + if (!info->sc->sa.rmap_cur) + return; + + if (info->whichfork == XFS_COW_FORK) + owner = XFS_RMAP_OWN_COW; + else + owner = info->sc->ip->i_ino; + + /* Find the rmap record for this irec. */ + if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap)) + return; + + /* Check the rmap. */ + rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; + if (rmap.rm_startblock > agbno || + agbno + irec->br_blockcount > rmap_end) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* + * Check the logical offsets if applicable. CoW staging extents + * don't track logical offsets since the mappings only exist in + * memory. + */ + if (info->whichfork != XFS_COW_FORK) { + rmap_end = (unsigned long long)rmap.rm_offset + + rmap.rm_blockcount; + if (rmap.rm_offset > irec->br_startoff || + irec->br_startoff + irec->br_blockcount > rmap_end) + xfs_scrub_fblock_xref_set_corrupt(info->sc, + info->whichfork, irec->br_startoff); + } + + if (rmap.rm_owner != owner) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* + * Check for discrepancies between the unwritten flag in the irec and + * the rmap. Note that the (in-memory) CoW fork distinguishes between + * unwritten and written extents, but we don't track that in the rmap + * records because the blocks are owned (on-disk) by the refcountbt, + * which doesn't track unwritten state. + */ + if (owner != XFS_RMAP_OWN_COW && + irec->br_state == XFS_EXT_UNWRITTEN && + !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + if (info->whichfork == XFS_ATTR_FORK && + !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); +} + +/* Cross-reference a single rtdev extent record. */ +STATIC void +xfs_scrub_bmap_rt_extent_xref( + struct xfs_scrub_bmap_info *info, + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_bmbt_irec *irec) +{ + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock, + irec->br_blockcount); +} + +/* Cross-reference a single datadev extent record. */ +STATIC void +xfs_scrub_bmap_extent_xref( + struct xfs_scrub_bmap_info *info, + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_bmbt_irec *irec) +{ + struct xfs_mount *mp = info->sc->mp; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t len; + int error; + + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); + len = irec->br_blockcount; + + error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa); + if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork, + irec->br_startoff, &error)) + return; + + xfs_scrub_xref_is_used_space(info->sc, agbno, len); + xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len); + xfs_scrub_bmap_xref_rmap(info, irec, agbno); + switch (info->whichfork) { + case XFS_DATA_FORK: + if (xfs_is_reflink_inode(info->sc->ip)) + break; + /* fall through */ + case XFS_ATTR_FORK: + xfs_scrub_xref_is_not_shared(info->sc, agbno, + irec->br_blockcount); + break; + case XFS_COW_FORK: + xfs_scrub_xref_is_cow_staging(info->sc, agbno, + irec->br_blockcount); + break; + } + + xfs_scrub_ag_free(info->sc, &info->sc->sa); +} + +/* Scrub a single extent record. */ +STATIC int +xfs_scrub_bmap_extent( + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_scrub_bmap_info *info, + struct xfs_bmbt_irec *irec) +{ + struct xfs_mount *mp = info->sc->mp; + struct xfs_buf *bp = NULL; + xfs_filblks_t end; + int error = 0; + + if (cur) + xfs_btree_get_block(cur, 0, &bp); + + /* + * Check for out-of-order extents. This record could have come + * from the incore list, for which there is no ordering check. + */ + if (irec->br_startoff < info->lastoff) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* There should never be a "hole" extent in either extent list. */ + if (irec->br_startblock == HOLESTARTBLOCK) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* + * Check for delalloc extents. We never iterate the ones in the + * in-core extent scan, and we should never see these in the bmbt. + */ + if (isnullstartblock(irec->br_startblock)) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* Make sure the extent points to a valid place. */ + if (irec->br_blockcount > MAXEXTLEN) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + end = irec->br_startblock + irec->br_blockcount - 1; + if (info->is_rt && + (!xfs_verify_rtbno(mp, irec->br_startblock) || + !xfs_verify_rtbno(mp, end))) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + if (!info->is_rt && + (!xfs_verify_fsbno(mp, irec->br_startblock) || + !xfs_verify_fsbno(mp, end) || + XFS_FSB_TO_AGNO(mp, irec->br_startblock) != + XFS_FSB_TO_AGNO(mp, end))) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* We don't allow unwritten extents on attr forks. */ + if (irec->br_state == XFS_EXT_UNWRITTEN && + info->whichfork == XFS_ATTR_FORK) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + if (info->is_rt) + xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec); + else + xfs_scrub_bmap_extent_xref(info, ip, cur, irec); + + info->lastoff = irec->br_startoff + irec->br_blockcount; + return error; +} + +/* Scrub a bmbt record. */ +STATIC int +xfs_scrub_bmapbt_rec( + struct xfs_scrub_btree *bs, + union xfs_btree_rec *rec) +{ + struct xfs_bmbt_irec irec; + struct xfs_scrub_bmap_info *info = bs->private; + struct xfs_inode *ip = bs->cur->bc_private.b.ip; + struct xfs_buf *bp = NULL; + struct xfs_btree_block *block; + uint64_t owner; + int i; + + /* + * Check the owners of the btree blocks up to the level below + * the root since the verifiers don't do that. + */ + if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) && + bs->cur->bc_ptrs[0] == 1) { + for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { + block = xfs_btree_get_block(bs->cur, i, &bp); + owner = be64_to_cpu(block->bb_u.l.bb_owner); + if (owner != ip->i_ino) + xfs_scrub_fblock_set_corrupt(bs->sc, + info->whichfork, 0); + } + } + + /* Set up the in-core record and scrub it. */ + xfs_bmbt_disk_get_all(&rec->bmbt, &irec); + return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec); +} + +/* Scan the btree records. */ +STATIC int +xfs_scrub_bmap_btree( + struct xfs_scrub_context *sc, + int whichfork, + struct xfs_scrub_bmap_info *info) +{ + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + struct xfs_inode *ip = sc->ip; + struct xfs_btree_cur *cur; + int error; + + cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); + error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_rec, &oinfo, info); + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : + XFS_BTREE_NOERROR); + return error; +} + +struct xfs_scrub_bmap_check_rmap_info { + struct xfs_scrub_context *sc; + int whichfork; + struct xfs_iext_cursor icur; +}; + +/* Can we find bmaps that fit this rmap? */ +STATIC int +xfs_scrub_bmap_check_rmap( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_bmbt_irec irec; + struct xfs_scrub_bmap_check_rmap_info *sbcri = priv; + struct xfs_ifork *ifp; + struct xfs_scrub_context *sc = sbcri->sc; + bool have_map; + + /* Is this even the right fork? */ + if (rec->rm_owner != sc->ip->i_ino) + return 0; + if ((sbcri->whichfork == XFS_ATTR_FORK) ^ + !!(rec->rm_flags & XFS_RMAP_ATTR_FORK)) + return 0; + if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) + return 0; + + /* Now look up the bmbt record. */ + ifp = XFS_IFORK_PTR(sc->ip, sbcri->whichfork); + if (!ifp) { + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + goto out; + } + have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset, + &sbcri->icur, &irec); + if (!have_map) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + /* + * bmap extent record lengths are constrained to 2^21 blocks in length + * because of space constraints in the on-disk metadata structure. + * However, rmap extent record lengths are constrained only by AG + * length, so we have to loop through the bmbt to make sure that the + * entire rmap is covered by bmbt records. + */ + while (have_map) { + if (irec.br_startoff != rec->rm_offset) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp, + cur->bc_private.a.agno, rec->rm_startblock)) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + if (irec.br_blockcount > rec->rm_blockcount) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + break; + rec->rm_startblock += irec.br_blockcount; + rec->rm_offset += irec.br_blockcount; + rec->rm_blockcount -= irec.br_blockcount; + if (rec->rm_blockcount == 0) + break; + have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); + if (!have_map) + xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + rec->rm_offset); + } + +out: + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return XFS_BTREE_QUERY_RANGE_ABORT; + return 0; +} + +/* Make sure each rmap has a corresponding bmbt entry. */ +STATIC int +xfs_scrub_bmap_check_ag_rmaps( + struct xfs_scrub_context *sc, + int whichfork, + xfs_agnumber_t agno) +{ + struct xfs_scrub_bmap_check_rmap_info sbcri; + struct xfs_btree_cur *cur; + struct xfs_buf *agf; + int error; + + error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf); + if (error) + return error; + + cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno); + if (!cur) { + error = -ENOMEM; + goto out_agf; + } + + sbcri.sc = sc; + sbcri.whichfork = whichfork; + error = xfs_rmap_query_all(cur, xfs_scrub_bmap_check_rmap, &sbcri); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) + error = 0; + + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); +out_agf: + xfs_trans_brelse(sc->tp, agf); + return error; +} + +/* Make sure each rmap has a corresponding bmbt entry. */ +STATIC int +xfs_scrub_bmap_check_rmaps( + struct xfs_scrub_context *sc, + int whichfork) +{ + loff_t size; + xfs_agnumber_t agno; + int error; + + if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) || + whichfork == XFS_COW_FORK || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + return 0; + + /* Don't support realtime rmap checks yet. */ + if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK) + return 0; + + /* + * Only do this for complex maps that are in btree format, or for + * situations where we would seem to have a size but zero extents. + * The inode repair code can zap broken iforks, which means we have + * to flag this bmap as corrupt if there are rmaps that need to be + * reattached. + */ + switch (whichfork) { + case XFS_DATA_FORK: + size = i_size_read(VFS_I(sc->ip)); + break; + case XFS_ATTR_FORK: + size = XFS_IFORK_Q(sc->ip); + break; + default: + size = 0; + break; + } + if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE && + (size == 0 || XFS_IFORK_NEXTENTS(sc->ip, whichfork) > 0)) + return 0; + + for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) { + error = xfs_scrub_bmap_check_ag_rmaps(sc, whichfork, agno); + if (error) + return error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + break; + } + + return 0; +} + +/* + * Scrub an inode fork's block mappings. + * + * First we scan every record in every btree block, if applicable. + * Then we unconditionally scan the incore extent cache. + */ +STATIC int +xfs_scrub_bmap( + struct xfs_scrub_context *sc, + int whichfork) +{ + struct xfs_bmbt_irec irec; + struct xfs_scrub_bmap_info info = { NULL }; + struct xfs_mount *mp = sc->mp; + struct xfs_inode *ip = sc->ip; + struct xfs_ifork *ifp; + xfs_fileoff_t endoff; + struct xfs_iext_cursor icur; + int error = 0; + + ifp = XFS_IFORK_PTR(ip, whichfork); + + info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip); + info.whichfork = whichfork; + info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip); + info.sc = sc; + + switch (whichfork) { + case XFS_COW_FORK: + /* Non-existent CoW forks are ignorable. */ + if (!ifp) + goto out; + /* No CoW forks on non-reflink inodes/filesystems. */ + if (!xfs_is_reflink_inode(ip)) { + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + goto out; + } + break; + case XFS_ATTR_FORK: + if (!ifp) + goto out_check_rmap; + if (!xfs_sb_version_hasattr(&mp->m_sb) && + !xfs_sb_version_hasattr2(&mp->m_sb)) + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + break; + default: + ASSERT(whichfork == XFS_DATA_FORK); + break; + } + + /* Check the fork values */ + switch (XFS_IFORK_FORMAT(ip, whichfork)) { + case XFS_DINODE_FMT_UUID: + case XFS_DINODE_FMT_DEV: + case XFS_DINODE_FMT_LOCAL: + /* No mappings to check. */ + goto out; + case XFS_DINODE_FMT_EXTENTS: + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + xfs_scrub_fblock_set_corrupt(sc, whichfork, 0); + goto out; + } + break; + case XFS_DINODE_FMT_BTREE: + if (whichfork == XFS_COW_FORK) { + xfs_scrub_fblock_set_corrupt(sc, whichfork, 0); + goto out; + } + + error = xfs_scrub_bmap_btree(sc, whichfork, &info); + if (error) + goto out; + break; + default: + xfs_scrub_fblock_set_corrupt(sc, whichfork, 0); + goto out; + } + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + + /* Now try to scrub the in-memory extent list. */ + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(sc->tp, ip, whichfork); + if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error)) + goto out; + } + + /* Find the offset of the last extent in the mapping. */ + error = xfs_bmap_last_offset(ip, &endoff, whichfork); + if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error)) + goto out; + + /* Scrub extent records. */ + info.lastoff = 0; + ifp = XFS_IFORK_PTR(ip, whichfork); + for_each_xfs_iext(ifp, &icur, &irec) { + if (xfs_scrub_should_terminate(sc, &error)) + break; + if (isnullstartblock(irec.br_startblock)) + continue; + if (irec.br_startoff >= endoff) { + xfs_scrub_fblock_set_corrupt(sc, whichfork, + irec.br_startoff); + goto out; + } + error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec); + if (error) + goto out; + } + +out_check_rmap: + error = xfs_scrub_bmap_check_rmaps(sc, whichfork); + if (!xfs_scrub_fblock_xref_process_error(sc, whichfork, 0, &error)) + goto out; +out: + return error; +} + +/* Scrub an inode's data fork. */ +int +xfs_scrub_bmap_data( + struct xfs_scrub_context *sc) +{ + return xfs_scrub_bmap(sc, XFS_DATA_FORK); +} + +/* Scrub an inode's attr fork. */ +int +xfs_scrub_bmap_attr( + struct xfs_scrub_context *sc) +{ + return xfs_scrub_bmap(sc, XFS_ATTR_FORK); +} + +/* Scrub an inode's CoW fork. */ +int +xfs_scrub_bmap_cow( + struct xfs_scrub_context *sc) +{ + if (!xfs_is_reflink_inode(sc->ip)) + return -ENOENT; + + return xfs_scrub_bmap(sc, XFS_COW_FORK); +} diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c new file mode 100644 index 000000000000..54218168c8f9 --- /dev/null +++ b/fs/xfs/scrub/btree.c @@ -0,0 +1,676 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" + +/* btree scrubbing */ + +/* + * Check for btree operation errors. See the section about handling + * operational errors in common.c. + */ +static bool +__xfs_scrub_btree_process_error( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level, + int *error, + __u32 errflag, + void *ret_ip) +{ + if (*error == 0) + return true; + + switch (*error) { + case -EDEADLOCK: + /* Used to restart an op with deadlock avoidance. */ + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); + break; + case -EFSBADCRC: + case -EFSCORRUPTED: + /* Note the badness but don't abort. */ + sc->sm->sm_flags |= errflag; + *error = 0; + /* fall through */ + default: + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + trace_xfs_scrub_ifork_btree_op_error(sc, cur, level, + *error, ret_ip); + else + trace_xfs_scrub_btree_op_error(sc, cur, level, + *error, ret_ip); + break; + } + return false; +} + +bool +xfs_scrub_btree_process_error( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level, + int *error) +{ + return __xfs_scrub_btree_process_error(sc, cur, level, error, + XFS_SCRUB_OFLAG_CORRUPT, __return_address); +} + +bool +xfs_scrub_btree_xref_process_error( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level, + int *error) +{ + return __xfs_scrub_btree_process_error(sc, cur, level, error, + XFS_SCRUB_OFLAG_XFAIL, __return_address); +} + +/* Record btree block corruption. */ +static void +__xfs_scrub_btree_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level, + __u32 errflag, + void *ret_ip) +{ + sc->sm->sm_flags |= errflag; + + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + trace_xfs_scrub_ifork_btree_error(sc, cur, level, + ret_ip); + else + trace_xfs_scrub_btree_error(sc, cur, level, + ret_ip); +} + +void +xfs_scrub_btree_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level) +{ + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT, + __return_address); +} + +void +xfs_scrub_btree_xref_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level) +{ + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT, + __return_address); +} + +/* + * Make sure this record is in order and doesn't stray outside of the parent + * keys. + */ +STATIC void +xfs_scrub_btree_rec( + struct xfs_scrub_btree *bs) +{ + struct xfs_btree_cur *cur = bs->cur; + union xfs_btree_rec *rec; + union xfs_btree_key key; + union xfs_btree_key hkey; + union xfs_btree_key *keyp; + struct xfs_btree_block *block; + struct xfs_btree_block *keyblock; + struct xfs_buf *bp; + + block = xfs_btree_get_block(cur, 0, &bp); + rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); + + trace_xfs_scrub_btree_rec(bs->sc, cur, 0); + + /* If this isn't the first record, are they in order? */ + if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) + xfs_scrub_btree_set_corrupt(bs->sc, cur, 0); + bs->firstrec = false; + memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len); + + if (cur->bc_nlevels == 1) + return; + + /* Is this at least as large as the parent low key? */ + cur->bc_ops->init_key_from_rec(&key, rec); + keyblock = xfs_btree_get_block(cur, 1, &bp); + keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock); + if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0) + xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); + + if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) + return; + + /* Is this no larger than the parent high key? */ + cur->bc_ops->init_high_key_from_rec(&hkey, rec); + keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock); + if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0) + xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); +} + +/* + * Make sure this key is in order and doesn't stray outside of the parent + * keys. + */ +STATIC void +xfs_scrub_btree_key( + struct xfs_scrub_btree *bs, + int level) +{ + struct xfs_btree_cur *cur = bs->cur; + union xfs_btree_key *key; + union xfs_btree_key *keyp; + struct xfs_btree_block *block; + struct xfs_btree_block *keyblock; + struct xfs_buf *bp; + + block = xfs_btree_get_block(cur, level, &bp); + key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); + + trace_xfs_scrub_btree_key(bs->sc, cur, level); + + /* If this isn't the first key, are they in order? */ + if (!bs->firstkey[level] && + !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key)) + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + bs->firstkey[level] = false; + memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len); + + if (level + 1 >= cur->bc_nlevels) + return; + + /* Is this at least as large as the parent low key? */ + keyblock = xfs_btree_get_block(cur, level + 1, &bp); + keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0) + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + + if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) + return; + + /* Is this no larger than the parent high key? */ + key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); + keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0) + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); +} + +/* + * Check a btree pointer. Returns true if it's ok to use this pointer. + * Callers do not need to set the corrupt flag. + */ +static bool +xfs_scrub_btree_ptr_ok( + struct xfs_scrub_btree *bs, + int level, + union xfs_btree_ptr *ptr) +{ + bool res; + + /* A btree rooted in an inode has no block pointer to the root. */ + if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == bs->cur->bc_nlevels) + return true; + + /* Otherwise, check the pointers. */ + if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS) + res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level); + else + res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level); + if (!res) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); + + return res; +} + +/* Check that a btree block's sibling matches what we expect it. */ +STATIC int +xfs_scrub_btree_block_check_sibling( + struct xfs_scrub_btree *bs, + int level, + int direction, + union xfs_btree_ptr *sibling) +{ + struct xfs_btree_cur *cur = bs->cur; + struct xfs_btree_block *pblock; + struct xfs_buf *pbp; + struct xfs_btree_cur *ncur = NULL; + union xfs_btree_ptr *pp; + int success; + int error; + + error = xfs_btree_dup_cursor(cur, &ncur); + if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) || + !ncur) + return error; + + /* + * If the pointer is null, we shouldn't be able to move the upper + * level pointer anywhere. + */ + if (xfs_btree_ptr_is_null(cur, sibling)) { + if (direction > 0) + error = xfs_btree_increment(ncur, level + 1, &success); + else + error = xfs_btree_decrement(ncur, level + 1, &success); + if (error == 0 && success) + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + error = 0; + goto out; + } + + /* Increment upper level pointer. */ + if (direction > 0) + error = xfs_btree_increment(ncur, level + 1, &success); + else + error = xfs_btree_decrement(ncur, level + 1, &success); + if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error)) + goto out; + if (!success) { + xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1); + goto out; + } + + /* Compare upper level pointer to sibling pointer. */ + pblock = xfs_btree_get_block(ncur, level + 1, &pbp); + pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); + if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp)) + goto out; + if (pbp) + xfs_scrub_buffer_recheck(bs->sc, pbp); + + if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); +out: + xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR); + return error; +} + +/* Check the siblings of a btree block. */ +STATIC int +xfs_scrub_btree_block_check_siblings( + struct xfs_scrub_btree *bs, + struct xfs_btree_block *block) +{ + struct xfs_btree_cur *cur = bs->cur; + union xfs_btree_ptr leftsib; + union xfs_btree_ptr rightsib; + int level; + int error = 0; + + xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB); + xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB); + level = xfs_btree_get_level(block); + + /* Root block should never have siblings. */ + if (level == cur->bc_nlevels - 1) { + if (!xfs_btree_ptr_is_null(cur, &leftsib) || + !xfs_btree_ptr_is_null(cur, &rightsib)) + xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + goto out; + } + + /* + * Does the left & right sibling pointers match the adjacent + * parent level pointers? + * (These function absorbs error codes for us.) + */ + error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib); + if (error) + return error; + error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib); + if (error) + return error; +out: + return error; +} + +struct check_owner { + struct list_head list; + xfs_daddr_t daddr; + int level; +}; + +/* + * Make sure this btree block isn't in the free list and that there's + * an rmap record for it. + */ +STATIC int +xfs_scrub_btree_check_block_owner( + struct xfs_scrub_btree *bs, + int level, + xfs_daddr_t daddr) +{ + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_btnum_t btnum; + bool init_sa; + int error = 0; + + if (!bs->cur) + return 0; + + btnum = bs->cur->bc_btnum; + agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); + agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); + + init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; + if (init_sa) { + error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa); + if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, + level, &error)) + return error; + } + + xfs_scrub_xref_is_used_space(bs->sc, agbno, 1); + /* + * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we + * have to nullify it (to shut down further block owner checks) if + * self-xref encounters problems. + */ + if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO) + bs->cur = NULL; + + xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo); + if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP) + bs->cur = NULL; + + if (init_sa) + xfs_scrub_ag_free(bs->sc, &bs->sc->sa); + + return error; +} + +/* Check the owner of a btree block. */ +STATIC int +xfs_scrub_btree_check_owner( + struct xfs_scrub_btree *bs, + int level, + struct xfs_buf *bp) +{ + struct xfs_btree_cur *cur = bs->cur; + struct check_owner *co; + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL) + return 0; + + /* + * We want to cross-reference each btree block with the bnobt + * and the rmapbt. We cannot cross-reference the bnobt or + * rmapbt while scanning the bnobt or rmapbt, respectively, + * because we cannot alter the cursor and we'd prefer not to + * duplicate cursors. Therefore, save the buffer daddr for + * later scanning. + */ + if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) { + co = kmem_alloc(sizeof(struct check_owner), + KM_MAYFAIL | KM_NOFS); + if (!co) + return -ENOMEM; + co->level = level; + co->daddr = XFS_BUF_ADDR(bp); + list_add_tail(&co->list, &bs->to_check); + return 0; + } + + return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); +} + +/* + * Grab and scrub a btree block given a btree pointer. Returns block + * and buffer pointers (if applicable) if they're ok to use. + */ +STATIC int +xfs_scrub_btree_get_block( + struct xfs_scrub_btree *bs, + int level, + union xfs_btree_ptr *pp, + struct xfs_btree_block **pblock, + struct xfs_buf **pbp) +{ + void *failed_at; + int error; + + *pblock = NULL; + *pbp = NULL; + + error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock); + if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) || + !*pblock) + return error; + + xfs_btree_get_block(bs->cur, level, pbp); + if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS) + failed_at = __xfs_btree_check_lblock(bs->cur, *pblock, + level, *pbp); + else + failed_at = __xfs_btree_check_sblock(bs->cur, *pblock, + level, *pbp); + if (failed_at) { + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); + return 0; + } + if (*pbp) + xfs_scrub_buffer_recheck(bs->sc, *pbp); + + /* + * Check the block's owner; this function absorbs error codes + * for us. + */ + error = xfs_scrub_btree_check_owner(bs, level, *pbp); + if (error) + return error; + + /* + * Check the block's siblings; this function absorbs error codes + * for us. + */ + return xfs_scrub_btree_block_check_siblings(bs, *pblock); +} + +/* + * Check that the low and high keys of this block match the keys stored + * in the parent block. + */ +STATIC void +xfs_scrub_btree_block_keys( + struct xfs_scrub_btree *bs, + int level, + struct xfs_btree_block *block) +{ + union xfs_btree_key block_keys; + struct xfs_btree_cur *cur = bs->cur; + union xfs_btree_key *high_bk; + union xfs_btree_key *parent_keys; + union xfs_btree_key *high_pk; + struct xfs_btree_block *parent_block; + struct xfs_buf *bp; + + if (level >= cur->bc_nlevels - 1) + return; + + /* Calculate the keys for this block. */ + xfs_btree_get_keys(cur, block, &block_keys); + + /* Obtain the parent's copy of the keys for this block. */ + parent_block = xfs_btree_get_block(cur, level + 1, &bp); + parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], + parent_block); + + if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0) + xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); + + if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) + return; + + /* Get high keys */ + high_bk = xfs_btree_high_key_from_key(cur, &block_keys); + high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], + parent_block); + + if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0) + xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); +} + +/* + * Visit all nodes and leaves of a btree. Check that all pointers and + * records are in order, that the keys reflect the records, and use a callback + * so that the caller can verify individual records. + */ +int +xfs_scrub_btree( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + xfs_scrub_btree_rec_fn scrub_fn, + struct xfs_owner_info *oinfo, + void *private) +{ + struct xfs_scrub_btree bs = { NULL }; + union xfs_btree_ptr ptr; + union xfs_btree_ptr *pp; + union xfs_btree_rec *recp; + struct xfs_btree_block *block; + int level; + struct xfs_buf *bp; + struct check_owner *co; + struct check_owner *n; + int i; + int error = 0; + + /* Initialize scrub state */ + bs.cur = cur; + bs.scrub_rec = scrub_fn; + bs.oinfo = oinfo; + bs.firstrec = true; + bs.private = private; + bs.sc = sc; + for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) + bs.firstkey[i] = true; + INIT_LIST_HEAD(&bs.to_check); + + /* Don't try to check a tree with a height we can't handle. */ + if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) { + xfs_scrub_btree_set_corrupt(sc, cur, 0); + goto out; + } + + /* + * Load the root of the btree. The helper function absorbs + * error codes for us. + */ + level = cur->bc_nlevels - 1; + cur->bc_ops->init_ptr_from_cur(cur, &ptr); + if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr)) + goto out; + error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp); + if (error || !block) + goto out; + + cur->bc_ptrs[level] = 1; + + while (level < cur->bc_nlevels) { + block = xfs_btree_get_block(cur, level, &bp); + + if (level == 0) { + /* End of leaf, pop back towards the root. */ + if (cur->bc_ptrs[level] > + be16_to_cpu(block->bb_numrecs)) { + xfs_scrub_btree_block_keys(&bs, level, block); + if (level < cur->bc_nlevels - 1) + cur->bc_ptrs[level + 1]++; + level++; + continue; + } + + /* Records in order for scrub? */ + xfs_scrub_btree_rec(&bs); + + /* Call out to the record checker. */ + recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); + error = bs.scrub_rec(&bs, recp); + if (error) + break; + if (xfs_scrub_should_terminate(sc, &error) || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + break; + + cur->bc_ptrs[level]++; + continue; + } + + /* End of node, pop back towards the root. */ + if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { + xfs_scrub_btree_block_keys(&bs, level, block); + if (level < cur->bc_nlevels - 1) + cur->bc_ptrs[level + 1]++; + level++; + continue; + } + + /* Keys in order for scrub? */ + xfs_scrub_btree_key(&bs, level); + + /* Drill another level deeper. */ + pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); + if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) { + cur->bc_ptrs[level]++; + continue; + } + level--; + error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp); + if (error || !block) + goto out; + + cur->bc_ptrs[level] = 1; + } + +out: + /* Process deferred owner checks on btree blocks. */ + list_for_each_entry_safe(co, n, &bs.to_check, list) { + if (!error && bs.cur) + error = xfs_scrub_btree_check_block_owner(&bs, + co->level, co->daddr); + list_del(&co->list); + kmem_free(co); + } + + return error; +} diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h new file mode 100644 index 000000000000..e2b868ede70b --- /dev/null +++ b/fs/xfs/scrub/btree.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_SCRUB_BTREE_H__ +#define __XFS_SCRUB_BTREE_H__ + +/* btree scrub */ + +/* Check for btree operation errors. */ +bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, int level, int *error); + +/* Check for btree xref operation errors. */ +bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, int level, + int *error); + +/* Check for btree corruption. */ +void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, int level); + +/* Check for btree xref discrepancies. */ +void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, int level); + +struct xfs_scrub_btree; +typedef int (*xfs_scrub_btree_rec_fn)( + struct xfs_scrub_btree *bs, + union xfs_btree_rec *rec); + +struct xfs_scrub_btree { + /* caller-provided scrub state */ + struct xfs_scrub_context *sc; + struct xfs_btree_cur *cur; + xfs_scrub_btree_rec_fn scrub_rec; + struct xfs_owner_info *oinfo; + void *private; + + /* internal scrub state */ + union xfs_btree_rec lastrec; + bool firstrec; + union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS]; + bool firstkey[XFS_BTREE_MAXLEVELS]; + struct list_head to_check; +}; +int xfs_scrub_btree(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, + xfs_scrub_btree_rec_fn scrub_fn, + struct xfs_owner_info *oinfo, void *private); + +#endif /* __XFS_SCRUB_BTREE_H__ */ diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c new file mode 100644 index 000000000000..8ed91d5c868d --- /dev/null +++ b/fs/xfs/scrub/common.c @@ -0,0 +1,775 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_itable.h" +#include "xfs_alloc.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" +#include "xfs_refcount.h" +#include "xfs_refcount_btree.h" +#include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "xfs_log.h" +#include "xfs_trans_priv.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/btree.h" + +/* Common code for the metadata scrubbers. */ + +/* + * Handling operational errors. + * + * The *_process_error() family of functions are used to process error return + * codes from functions called as part of a scrub operation. + * + * If there's no error, we return true to tell the caller that it's ok + * to move on to the next check in its list. + * + * For non-verifier errors (e.g. ENOMEM) we return false to tell the + * caller that something bad happened, and we preserve *error so that + * the caller can return the *error up the stack to userspace. + * + * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting + * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words, + * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT, + * not via return codes. We return false to tell the caller that + * something bad happened. Since the error has been cleared, the caller + * will (presumably) return that zero and scrubbing will move on to + * whatever's next. + * + * ftrace can be used to record the precise metadata location and the + * approximate code location of the failed operation. + */ + +/* Check for operational errors. */ +static bool +__xfs_scrub_process_error( + struct xfs_scrub_context *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error, + __u32 errflag, + void *ret_ip) +{ + switch (*error) { + case 0: + return true; + case -EDEADLOCK: + /* Used to restart an op with deadlock avoidance. */ + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); + break; + case -EFSBADCRC: + case -EFSCORRUPTED: + /* Note the badness but don't abort. */ + sc->sm->sm_flags |= errflag; + *error = 0; + /* fall through */ + default: + trace_xfs_scrub_op_error(sc, agno, bno, *error, + ret_ip); + break; + } + return false; +} + +bool +xfs_scrub_process_error( + struct xfs_scrub_context *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error) +{ + return __xfs_scrub_process_error(sc, agno, bno, error, + XFS_SCRUB_OFLAG_CORRUPT, __return_address); +} + +bool +xfs_scrub_xref_process_error( + struct xfs_scrub_context *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error) +{ + return __xfs_scrub_process_error(sc, agno, bno, error, + XFS_SCRUB_OFLAG_XFAIL, __return_address); +} + +/* Check for operational errors for a file offset. */ +static bool +__xfs_scrub_fblock_process_error( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset, + int *error, + __u32 errflag, + void *ret_ip) +{ + switch (*error) { + case 0: + return true; + case -EDEADLOCK: + /* Used to restart an op with deadlock avoidance. */ + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); + break; + case -EFSBADCRC: + case -EFSCORRUPTED: + /* Note the badness but don't abort. */ + sc->sm->sm_flags |= errflag; + *error = 0; + /* fall through */ + default: + trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error, + ret_ip); + break; + } + return false; +} + +bool +xfs_scrub_fblock_process_error( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset, + int *error) +{ + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, + XFS_SCRUB_OFLAG_CORRUPT, __return_address); +} + +bool +xfs_scrub_fblock_xref_process_error( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset, + int *error) +{ + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, + XFS_SCRUB_OFLAG_XFAIL, __return_address); +} + +/* + * Handling scrub corruption/optimization/warning checks. + * + * The *_set_{corrupt,preen,warning}() family of functions are used to + * record the presence of metadata that is incorrect (corrupt), could be + * optimized somehow (preen), or should be flagged for administrative + * review but is not incorrect (warn). + * + * ftrace can be used to record the precise metadata location and + * approximate code location of the failed check. + */ + +/* Record a block which could be optimized. */ +void +xfs_scrub_block_set_preen( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; + trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address); +} + +/* + * Record an inode which could be optimized. The trace data will + * include the block given by bp if bp is given; otherwise it will use + * the block location of the inode record itself. + */ +void +xfs_scrub_ino_set_preen( + struct xfs_scrub_context *sc, + xfs_ino_t ino) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; + trace_xfs_scrub_ino_preen(sc, ino, __return_address); +} + +/* Record a corrupt block. */ +void +xfs_scrub_block_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); +} + +/* Record a corruption while cross-referencing. */ +void +xfs_scrub_block_xref_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); +} + +/* + * Record a corrupt inode. The trace data will include the block given + * by bp if bp is given; otherwise it will use the block location of the + * inode record itself. + */ +void +xfs_scrub_ino_set_corrupt( + struct xfs_scrub_context *sc, + xfs_ino_t ino) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + trace_xfs_scrub_ino_error(sc, ino, __return_address); +} + +/* Record a corruption while cross-referencing with an inode. */ +void +xfs_scrub_ino_xref_set_corrupt( + struct xfs_scrub_context *sc, + xfs_ino_t ino) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + trace_xfs_scrub_ino_error(sc, ino, __return_address); +} + +/* Record corruption in a block indexed by a file fork. */ +void +xfs_scrub_fblock_set_corrupt( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); +} + +/* Record a corruption while cross-referencing a fork block. */ +void +xfs_scrub_fblock_xref_set_corrupt( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); +} + +/* + * Warn about inodes that need administrative review but is not + * incorrect. + */ +void +xfs_scrub_ino_set_warning( + struct xfs_scrub_context *sc, + xfs_ino_t ino) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; + trace_xfs_scrub_ino_warning(sc, ino, __return_address); +} + +/* Warn about a block indexed by a file fork that needs review. */ +void +xfs_scrub_fblock_set_warning( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; + trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address); +} + +/* Signal an incomplete scrub. */ +void +xfs_scrub_set_incomplete( + struct xfs_scrub_context *sc) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE; + trace_xfs_scrub_incomplete(sc, __return_address); +} + +/* + * rmap scrubbing -- compute the number of blocks with a given owner, + * at least according to the reverse mapping data. + */ + +struct xfs_scrub_rmap_ownedby_info { + struct xfs_owner_info *oinfo; + xfs_filblks_t *blocks; +}; + +STATIC int +xfs_scrub_count_rmap_ownedby_irec( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_scrub_rmap_ownedby_info *sroi = priv; + bool irec_attr; + bool oinfo_attr; + + irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK; + oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK; + + if (rec->rm_owner != sroi->oinfo->oi_owner) + return 0; + + if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr) + (*sroi->blocks) += rec->rm_blockcount; + + return 0; +} + +/* + * Calculate the number of blocks the rmap thinks are owned by something. + * The caller should pass us an rmapbt cursor. + */ +int +xfs_scrub_count_rmap_ownedby_ag( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + struct xfs_owner_info *oinfo, + xfs_filblks_t *blocks) +{ + struct xfs_scrub_rmap_ownedby_info sroi; + + sroi.oinfo = oinfo; + *blocks = 0; + sroi.blocks = blocks; + + return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec, + &sroi); +} + +/* + * AG scrubbing + * + * These helpers facilitate locking an allocation group's header + * buffers, setting up cursors for all btrees that are present, and + * cleaning everything up once we're through. + */ + +/* Decide if we want to return an AG header read failure. */ +static inline bool +want_ag_read_header_failure( + struct xfs_scrub_context *sc, + unsigned int type) +{ + /* Return all AG header read failures when scanning btrees. */ + if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF && + sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL && + sc->sm->sm_type != XFS_SCRUB_TYPE_AGI) + return true; + /* + * If we're scanning a given type of AG header, we only want to + * see read failures from that specific header. We'd like the + * other headers to cross-check them, but this isn't required. + */ + if (sc->sm->sm_type == type) + return true; + return false; +} + +/* + * Grab all the headers for an AG. + * + * The headers should be released by xfs_scrub_ag_free, but as a fail + * safe we attach all the buffers we grab to the scrub transaction so + * they'll all be freed when we cancel it. + */ +int +xfs_scrub_ag_read_headers( + struct xfs_scrub_context *sc, + xfs_agnumber_t agno, + struct xfs_buf **agi, + struct xfs_buf **agf, + struct xfs_buf **agfl) +{ + struct xfs_mount *mp = sc->mp; + int error; + + error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi); + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) + goto out; + + error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf); + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) + goto out; + + error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl); + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) + goto out; + error = 0; +out: + return error; +} + +/* Release all the AG btree cursors. */ +void +xfs_scrub_ag_btcur_free( + struct xfs_scrub_ag *sa) +{ + if (sa->refc_cur) + xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR); + if (sa->rmap_cur) + xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR); + if (sa->fino_cur) + xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR); + if (sa->ino_cur) + xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR); + if (sa->cnt_cur) + xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR); + if (sa->bno_cur) + xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR); + + sa->refc_cur = NULL; + sa->rmap_cur = NULL; + sa->fino_cur = NULL; + sa->ino_cur = NULL; + sa->bno_cur = NULL; + sa->cnt_cur = NULL; +} + +/* Initialize all the btree cursors for an AG. */ +int +xfs_scrub_ag_btcur_init( + struct xfs_scrub_context *sc, + struct xfs_scrub_ag *sa) +{ + struct xfs_mount *mp = sc->mp; + xfs_agnumber_t agno = sa->agno; + + if (sa->agf_bp) { + /* Set up a bnobt cursor for cross-referencing. */ + sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, + agno, XFS_BTNUM_BNO); + if (!sa->bno_cur) + goto err; + + /* Set up a cntbt cursor for cross-referencing. */ + sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, + agno, XFS_BTNUM_CNT); + if (!sa->cnt_cur) + goto err; + } + + /* Set up a inobt cursor for cross-referencing. */ + if (sa->agi_bp) { + sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, + agno, XFS_BTNUM_INO); + if (!sa->ino_cur) + goto err; + } + + /* Set up a finobt cursor for cross-referencing. */ + if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) { + sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, + agno, XFS_BTNUM_FINO); + if (!sa->fino_cur) + goto err; + } + + /* Set up a rmapbt cursor for cross-referencing. */ + if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) { + sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, + agno); + if (!sa->rmap_cur) + goto err; + } + + /* Set up a refcountbt cursor for cross-referencing. */ + if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) { + sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, + sa->agf_bp, agno, NULL); + if (!sa->refc_cur) + goto err; + } + + return 0; +err: + return -ENOMEM; +} + +/* Release the AG header context and btree cursors. */ +void +xfs_scrub_ag_free( + struct xfs_scrub_context *sc, + struct xfs_scrub_ag *sa) +{ + xfs_scrub_ag_btcur_free(sa); + if (sa->agfl_bp) { + xfs_trans_brelse(sc->tp, sa->agfl_bp); + sa->agfl_bp = NULL; + } + if (sa->agf_bp) { + xfs_trans_brelse(sc->tp, sa->agf_bp); + sa->agf_bp = NULL; + } + if (sa->agi_bp) { + xfs_trans_brelse(sc->tp, sa->agi_bp); + sa->agi_bp = NULL; + } + sa->agno = NULLAGNUMBER; +} + +/* + * For scrub, grab the AGI and the AGF headers, in that order. Locking + * order requires us to get the AGI before the AGF. We use the + * transaction to avoid deadlocking on crosslinked metadata buffers; + * either the caller passes one in (bmap scrub) or we have to create a + * transaction ourselves. + */ +int +xfs_scrub_ag_init( + struct xfs_scrub_context *sc, + xfs_agnumber_t agno, + struct xfs_scrub_ag *sa) +{ + int error; + + sa->agno = agno; + error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp, + &sa->agf_bp, &sa->agfl_bp); + if (error) + return error; + + return xfs_scrub_ag_btcur_init(sc, sa); +} + +/* Per-scrubber setup functions */ + +/* Set us up with a transaction and an empty context. */ +int +xfs_scrub_setup_fs( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + return xfs_scrub_trans_alloc(sc->sm, sc->mp, &sc->tp); +} + +/* Set us up with AG headers and btree cursors. */ +int +xfs_scrub_setup_ag_btree( + struct xfs_scrub_context *sc, + struct xfs_inode *ip, + bool force_log) +{ + struct xfs_mount *mp = sc->mp; + int error; + + /* + * If the caller asks us to checkpont the log, do so. This + * expensive operation should be performed infrequently and only + * as a last resort. Any caller that sets force_log should + * document why they need to do so. + */ + if (force_log) { + error = xfs_scrub_checkpoint_log(mp); + if (error) + return error; + } + + error = xfs_scrub_setup_fs(sc, ip); + if (error) + return error; + + return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa); +} + +/* Push everything out of the log onto disk. */ +int +xfs_scrub_checkpoint_log( + struct xfs_mount *mp) +{ + int error; + + error = xfs_log_force(mp, XFS_LOG_SYNC); + if (error) + return error; + xfs_ail_push_all_sync(mp->m_ail); + return 0; +} + +/* + * Given an inode and the scrub control structure, grab either the + * inode referenced in the control structure or the inode passed in. + * The inode is not locked. + */ +int +xfs_scrub_get_inode( + struct xfs_scrub_context *sc, + struct xfs_inode *ip_in) +{ + struct xfs_imap imap; + struct xfs_mount *mp = sc->mp; + struct xfs_inode *ip = NULL; + int error; + + /* We want to scan the inode we already had opened. */ + if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { + sc->ip = ip_in; + return 0; + } + + /* Look up the inode, see if the generation number matches. */ + if (xfs_internal_inum(mp, sc->sm->sm_ino)) + return -ENOENT; + error = xfs_iget(mp, NULL, sc->sm->sm_ino, + XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); + switch (error) { + case -ENOENT: + /* Inode doesn't exist, just bail out. */ + return error; + case 0: + /* Got an inode, continue. */ + break; + case -EINVAL: + /* + * -EINVAL with IGET_UNTRUSTED could mean one of several + * things: userspace gave us an inode number that doesn't + * correspond to fs space, or doesn't have an inobt entry; + * or it could simply mean that the inode buffer failed the + * read verifiers. + * + * Try just the inode mapping lookup -- if it succeeds, then + * the inode buffer verifier failed and something needs fixing. + * Otherwise, we really couldn't find it so tell userspace + * that it no longer exists. + */ + error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap, + XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); + if (error) + return -ENOENT; + error = -EFSCORRUPTED; + /* fall through */ + default: + trace_xfs_scrub_op_error(sc, + XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), + XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), + error, __return_address); + return error; + } + if (VFS_I(ip)->i_generation != sc->sm->sm_gen) { + iput(VFS_I(ip)); + return -ENOENT; + } + + sc->ip = ip; + return 0; +} + +/* Set us up to scrub a file's contents. */ +int +xfs_scrub_setup_inode_contents( + struct xfs_scrub_context *sc, + struct xfs_inode *ip, + unsigned int resblks) +{ + struct xfs_mount *mp = sc->mp; + int error; + + error = xfs_scrub_get_inode(sc, ip); + if (error) + return error; + + /* Got the inode, lock it and we're ready to go. */ + sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; + xfs_ilock(sc->ip, sc->ilock_flags); + error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); + if (error) + goto out; + sc->ilock_flags |= XFS_ILOCK_EXCL; + xfs_ilock(sc->ip, XFS_ILOCK_EXCL); + +out: + /* scrub teardown will unlock and release the inode for us */ + return error; +} + +/* + * Predicate that decides if we need to evaluate the cross-reference check. + * If there was an error accessing the cross-reference btree, just delete + * the cursor and skip the check. + */ +bool +xfs_scrub_should_check_xref( + struct xfs_scrub_context *sc, + int *error, + struct xfs_btree_cur **curpp) +{ + if (*error == 0) + return true; + + if (curpp) { + /* If we've already given up on xref, just bail out. */ + if (!*curpp) + return false; + + /* xref error, delete cursor and bail out. */ + xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR); + *curpp = NULL; + } + + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; + trace_xfs_scrub_xref_error(sc, *error, __return_address); + + /* + * Errors encountered during cross-referencing with another + * data structure should not cause this scrubber to abort. + */ + *error = 0; + return false; +} + +/* Run the structure verifiers on in-memory buffers to detect bad memory. */ +void +xfs_scrub_buffer_recheck( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + if (bp->b_ops == NULL) { + xfs_scrub_block_set_corrupt(sc, bp); + return; + } + if (bp->b_ops->verify_struct == NULL) { + xfs_scrub_set_incomplete(sc); + return; + } + fa = bp->b_ops->verify_struct(bp); + if (!fa) + return; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + trace_xfs_scrub_block_error(sc, bp->b_bn, fa); +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h new file mode 100644 index 000000000000..deaf60400981 --- /dev/null +++ b/fs/xfs/scrub/common.h @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_SCRUB_COMMON_H__ +#define __XFS_SCRUB_COMMON_H__ + +/* + * We /could/ terminate a scrub/repair operation early. If we're not + * in a good place to continue (fatal signal, etc.) then bail out. + * Note that we're careful not to make any judgements about *error. + */ +static inline bool +xfs_scrub_should_terminate( + struct xfs_scrub_context *sc, + int *error) +{ + if (fatal_signal_pending(current)) { + if (*error == 0) + *error = -EAGAIN; + return true; + } + return false; +} + +/* + * Grab an empty transaction so that we can re-grab locked buffers if + * one of our btrees turns out to be cyclic. + */ +static inline int +xfs_scrub_trans_alloc( + struct xfs_scrub_metadata *sm, + struct xfs_mount *mp, + struct xfs_trans **tpp) +{ + return xfs_trans_alloc_empty(mp, tpp); +} + +bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno, + xfs_agblock_t bno, int *error); +bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork, + xfs_fileoff_t offset, int *error); + +bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc, + xfs_agnumber_t agno, xfs_agblock_t bno, int *error); +bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc, + int whichfork, xfs_fileoff_t offset, int *error); + +void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc, + struct xfs_buf *bp); +void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino); + +void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc, + struct xfs_buf *bp); +void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino); +void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork, + xfs_fileoff_t offset); + +void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc, + struct xfs_buf *bp); +void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, + xfs_ino_t ino); +void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc, + int whichfork, xfs_fileoff_t offset); + +void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino); +void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, + xfs_fileoff_t offset); + +void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc); +int xfs_scrub_checkpoint_log(struct xfs_mount *mp); + +/* Are we set up for a cross-referencing check? */ +bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error, + struct xfs_btree_cur **curpp); + +/* Setup functions */ +int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip); +int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_ag_rmapbt(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_ag_refcountbt(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_inode(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_inode_bmap_data(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_directory(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_xattr(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_symlink(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +int xfs_scrub_setup_parent(struct xfs_scrub_context *sc, + struct xfs_inode *ip); +#ifdef CONFIG_XFS_RT +int xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip); +#else +static inline int +xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip) +{ + return -ENOENT; +} +#endif +#ifdef CONFIG_XFS_QUOTA +int xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip); +#else +static inline int +xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip) +{ + return -ENOENT; +} +#endif + +void xfs_scrub_ag_free(struct xfs_scrub_context *sc, struct xfs_scrub_ag *sa); +int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno, + struct xfs_scrub_ag *sa); +int xfs_scrub_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno, + struct xfs_buf **agi, struct xfs_buf **agf, + struct xfs_buf **agfl); +void xfs_scrub_ag_btcur_free(struct xfs_scrub_ag *sa); +int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc, + struct xfs_scrub_ag *sa); +int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc, + int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno, + void *), + void *priv); +int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + struct xfs_owner_info *oinfo, + xfs_filblks_t *blocks); + +int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc, + struct xfs_inode *ip, bool force_log); +int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in); +int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc, + struct xfs_inode *ip, unsigned int resblks); +void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp); + +#endif /* __XFS_SCRUB_COMMON_H__ */ diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c new file mode 100644 index 000000000000..bffdb7dc09bf --- /dev/null +++ b/fs/xfs/scrub/dabtree.c @@ -0,0 +1,613 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_inode_fork.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_attr_leaf.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/dabtree.h" + +/* Directory/Attribute Btree */ + +/* + * Check for da btree operation errors. See the section about handling + * operational errors in common.c. + */ +bool +xfs_scrub_da_process_error( + struct xfs_scrub_da_btree *ds, + int level, + int *error) +{ + struct xfs_scrub_context *sc = ds->sc; + + if (*error == 0) + return true; + + switch (*error) { + case -EDEADLOCK: + /* Used to restart an op with deadlock avoidance. */ + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); + break; + case -EFSBADCRC: + case -EFSCORRUPTED: + /* Note the badness but don't abort. */ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + *error = 0; + /* fall through */ + default: + trace_xfs_scrub_file_op_error(sc, ds->dargs.whichfork, + xfs_dir2_da_to_db(ds->dargs.geo, + ds->state->path.blk[level].blkno), + *error, __return_address); + break; + } + return false; +} + +/* + * Check for da btree corruption. See the section about handling + * operational errors in common.c. + */ +void +xfs_scrub_da_set_corrupt( + struct xfs_scrub_da_btree *ds, + int level) +{ + struct xfs_scrub_context *sc = ds->sc; + + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + + trace_xfs_scrub_fblock_error(sc, ds->dargs.whichfork, + xfs_dir2_da_to_db(ds->dargs.geo, + ds->state->path.blk[level].blkno), + __return_address); +} + +/* Find an entry at a certain level in a da btree. */ +STATIC void * +xfs_scrub_da_btree_entry( + struct xfs_scrub_da_btree *ds, + int level, + int rec) +{ + char *ents; + struct xfs_da_state_blk *blk; + void *baddr; + + /* Dispatch the entry finding function. */ + blk = &ds->state->path.blk[level]; + baddr = blk->bp->b_addr; + switch (blk->magic) { + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + ents = (char *)xfs_attr3_leaf_entryp(baddr); + return ents + (rec * sizeof(struct xfs_attr_leaf_entry)); + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr); + return ents + (rec * sizeof(struct xfs_dir2_leaf_entry)); + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr); + return ents + (rec * sizeof(struct xfs_dir2_leaf_entry)); + case XFS_DA_NODE_MAGIC: + case XFS_DA3_NODE_MAGIC: + ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr); + return ents + (rec * sizeof(struct xfs_da_node_entry)); + } + + return NULL; +} + +/* Scrub a da btree hash (key). */ +int +xfs_scrub_da_btree_hash( + struct xfs_scrub_da_btree *ds, + int level, + __be32 *hashp) +{ + struct xfs_da_state_blk *blks; + struct xfs_da_node_entry *entry; + xfs_dahash_t hash; + xfs_dahash_t parent_hash; + + /* Is this hash in order? */ + hash = be32_to_cpu(*hashp); + if (hash < ds->hashes[level]) + xfs_scrub_da_set_corrupt(ds, level); + ds->hashes[level] = hash; + + if (level == 0) + return 0; + + /* Is this hash no larger than the parent hash? */ + blks = ds->state->path.blk; + entry = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index); + parent_hash = be32_to_cpu(entry->hashval); + if (parent_hash < hash) + xfs_scrub_da_set_corrupt(ds, level); + + return 0; +} + +/* + * Check a da btree pointer. Returns true if it's ok to use this + * pointer. + */ +STATIC bool +xfs_scrub_da_btree_ptr_ok( + struct xfs_scrub_da_btree *ds, + int level, + xfs_dablk_t blkno) +{ + if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) { + xfs_scrub_da_set_corrupt(ds, level); + return false; + } + + return true; +} + +/* + * The da btree scrubber can handle leaf1 blocks as a degenerate + * form of leafn blocks. Since the regular da code doesn't handle + * leaf1, we must multiplex the verifiers. + */ +static void +xfs_scrub_da_btree_read_verify( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + bp->b_ops = &xfs_dir3_leaf1_buf_ops; + bp->b_ops->verify_read(bp); + return; + default: + /* + * xfs_da3_node_buf_ops already know how to handle + * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. + */ + bp->b_ops = &xfs_da3_node_buf_ops; + bp->b_ops->verify_read(bp); + return; + } +} +static void +xfs_scrub_da_btree_write_verify( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + bp->b_ops = &xfs_dir3_leaf1_buf_ops; + bp->b_ops->verify_write(bp); + return; + default: + /* + * xfs_da3_node_buf_ops already know how to handle + * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. + */ + bp->b_ops = &xfs_da3_node_buf_ops; + bp->b_ops->verify_write(bp); + return; + } +} +static void * +xfs_scrub_da_btree_verify( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + bp->b_ops = &xfs_dir3_leaf1_buf_ops; + return bp->b_ops->verify_struct(bp); + default: + bp->b_ops = &xfs_da3_node_buf_ops; + return bp->b_ops->verify_struct(bp); + } +} + +static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = { + .name = "xfs_scrub_da_btree", + .verify_read = xfs_scrub_da_btree_read_verify, + .verify_write = xfs_scrub_da_btree_write_verify, + .verify_struct = xfs_scrub_da_btree_verify, +}; + +/* Check a block's sibling. */ +STATIC int +xfs_scrub_da_btree_block_check_sibling( + struct xfs_scrub_da_btree *ds, + int level, + int direction, + xfs_dablk_t sibling) +{ + int retval; + int error; + + memcpy(&ds->state->altpath, &ds->state->path, + sizeof(ds->state->altpath)); + + /* + * If the pointer is null, we shouldn't be able to move the upper + * level pointer anywhere. + */ + if (sibling == 0) { + error = xfs_da3_path_shift(ds->state, &ds->state->altpath, + direction, false, &retval); + if (error == 0 && retval == 0) + xfs_scrub_da_set_corrupt(ds, level); + error = 0; + goto out; + } + + /* Move the alternate cursor one block in the direction given. */ + error = xfs_da3_path_shift(ds->state, &ds->state->altpath, + direction, false, &retval); + if (!xfs_scrub_da_process_error(ds, level, &error)) + return error; + if (retval) { + xfs_scrub_da_set_corrupt(ds, level); + return error; + } + if (ds->state->altpath.blk[level].bp) + xfs_scrub_buffer_recheck(ds->sc, + ds->state->altpath.blk[level].bp); + + /* Compare upper level pointer to sibling pointer. */ + if (ds->state->altpath.blk[level].blkno != sibling) + xfs_scrub_da_set_corrupt(ds, level); + xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp); +out: + return error; +} + +/* Check a block's sibling pointers. */ +STATIC int +xfs_scrub_da_btree_block_check_siblings( + struct xfs_scrub_da_btree *ds, + int level, + struct xfs_da_blkinfo *hdr) +{ + xfs_dablk_t forw; + xfs_dablk_t back; + int error = 0; + + forw = be32_to_cpu(hdr->forw); + back = be32_to_cpu(hdr->back); + + /* Top level blocks should not have sibling pointers. */ + if (level == 0) { + if (forw != 0 || back != 0) + xfs_scrub_da_set_corrupt(ds, level); + return 0; + } + + /* + * Check back (left) and forw (right) pointers. These functions + * absorb error codes for us. + */ + error = xfs_scrub_da_btree_block_check_sibling(ds, level, 0, back); + if (error) + goto out; + error = xfs_scrub_da_btree_block_check_sibling(ds, level, 1, forw); + +out: + memset(&ds->state->altpath, 0, sizeof(ds->state->altpath)); + return error; +} + +/* Load a dir/attribute block from a btree. */ +STATIC int +xfs_scrub_da_btree_block( + struct xfs_scrub_da_btree *ds, + int level, + xfs_dablk_t blkno) +{ + struct xfs_da_state_blk *blk; + struct xfs_da_intnode *node; + struct xfs_da_node_entry *btree; + struct xfs_da3_blkinfo *hdr3; + struct xfs_da_args *dargs = &ds->dargs; + struct xfs_inode *ip = ds->dargs.dp; + xfs_ino_t owner; + int *pmaxrecs; + struct xfs_da3_icnode_hdr nodehdr; + int error = 0; + + blk = &ds->state->path.blk[level]; + ds->state->path.active = level + 1; + + /* Release old block. */ + if (blk->bp) { + xfs_trans_brelse(dargs->trans, blk->bp); + blk->bp = NULL; + } + + /* Check the pointer. */ + blk->blkno = blkno; + if (!xfs_scrub_da_btree_ptr_ok(ds, level, blkno)) + goto out_nobuf; + + /* Read the buffer. */ + error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2, + &blk->bp, dargs->whichfork, + &xfs_scrub_da_btree_buf_ops); + if (!xfs_scrub_da_process_error(ds, level, &error)) + goto out_nobuf; + if (blk->bp) + xfs_scrub_buffer_recheck(ds->sc, blk->bp); + + /* + * We didn't find a dir btree root block, which means that + * there's no LEAF1/LEAFN tree (at least not where it's supposed + * to be), so jump out now. + */ + if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 && + blk->bp == NULL) + goto out_nobuf; + + /* It's /not/ ok for attr trees not to have a da btree. */ + if (blk->bp == NULL) { + xfs_scrub_da_set_corrupt(ds, level); + goto out_nobuf; + } + + hdr3 = blk->bp->b_addr; + blk->magic = be16_to_cpu(hdr3->hdr.magic); + pmaxrecs = &ds->maxrecs[level]; + + /* We only started zeroing the header on v5 filesystems. */ + if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad) + xfs_scrub_da_set_corrupt(ds, level); + + /* Check the owner. */ + if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) { + owner = be64_to_cpu(hdr3->owner); + if (owner != ip->i_ino) + xfs_scrub_da_set_corrupt(ds, level); + } + + /* Check the siblings. */ + error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr); + if (error) + goto out; + + /* Interpret the buffer. */ + switch (blk->magic) { + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + xfs_trans_buf_set_type(dargs->trans, blk->bp, + XFS_BLFT_ATTR_LEAF_BUF); + blk->magic = XFS_ATTR_LEAF_MAGIC; + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs); + if (ds->tree_level != 0) + xfs_scrub_da_set_corrupt(ds, level); + break; + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + xfs_trans_buf_set_type(dargs->trans, blk->bp, + XFS_BLFT_DIR_LEAFN_BUF); + blk->magic = XFS_DIR2_LEAFN_MAGIC; + blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); + if (ds->tree_level != 0) + xfs_scrub_da_set_corrupt(ds, level); + break; + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + xfs_trans_buf_set_type(dargs->trans, blk->bp, + XFS_BLFT_DIR_LEAF1_BUF); + blk->magic = XFS_DIR2_LEAF1_MAGIC; + blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); + if (ds->tree_level != 0) + xfs_scrub_da_set_corrupt(ds, level); + break; + case XFS_DA_NODE_MAGIC: + case XFS_DA3_NODE_MAGIC: + xfs_trans_buf_set_type(dargs->trans, blk->bp, + XFS_BLFT_DA_NODE_BUF); + blk->magic = XFS_DA_NODE_MAGIC; + node = blk->bp->b_addr; + ip->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = ip->d_ops->node_tree_p(node); + *pmaxrecs = nodehdr.count; + blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); + if (level == 0) { + if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { + xfs_scrub_da_set_corrupt(ds, level); + goto out_freebp; + } + ds->tree_level = nodehdr.level; + } else { + if (ds->tree_level != nodehdr.level) { + xfs_scrub_da_set_corrupt(ds, level); + goto out_freebp; + } + } + + /* XXX: Check hdr3.pad32 once we know how to fix it. */ + break; + default: + xfs_scrub_da_set_corrupt(ds, level); + goto out_freebp; + } + +out: + return error; +out_freebp: + xfs_trans_brelse(dargs->trans, blk->bp); + blk->bp = NULL; +out_nobuf: + blk->blkno = 0; + return error; +} + +/* Visit all nodes and leaves of a da btree. */ +int +xfs_scrub_da_btree( + struct xfs_scrub_context *sc, + int whichfork, + xfs_scrub_da_btree_rec_fn scrub_fn, + void *private) +{ + struct xfs_scrub_da_btree ds = {}; + struct xfs_mount *mp = sc->mp; + struct xfs_da_state_blk *blks; + struct xfs_da_node_entry *key; + void *rec; + xfs_dablk_t blkno; + int level; + int error; + + /* Skip short format data structures; no btree to scan. */ + if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE) + return 0; + + /* Set up initial da state. */ + ds.dargs.dp = sc->ip; + ds.dargs.whichfork = whichfork; + ds.dargs.trans = sc->tp; + ds.dargs.op_flags = XFS_DA_OP_OKNOENT; + ds.state = xfs_da_state_alloc(); + ds.state->args = &ds.dargs; + ds.state->mp = mp; + ds.sc = sc; + ds.private = private; + if (whichfork == XFS_ATTR_FORK) { + ds.dargs.geo = mp->m_attr_geo; + ds.lowest = 0; + ds.highest = 0; + } else { + ds.dargs.geo = mp->m_dir_geo; + ds.lowest = ds.dargs.geo->leafblk; + ds.highest = ds.dargs.geo->freeblk; + } + blkno = ds.lowest; + level = 0; + + /* Find the root of the da tree, if present. */ + blks = ds.state->path.blk; + error = xfs_scrub_da_btree_block(&ds, level, blkno); + if (error) + goto out_state; + /* + * We didn't find a block at ds.lowest, which means that there's + * no LEAF1/LEAFN tree (at least not where it's supposed to be), + * so jump out now. + */ + if (blks[level].bp == NULL) + goto out_state; + + blks[level].index = 0; + while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) { + /* Handle leaf block. */ + if (blks[level].magic != XFS_DA_NODE_MAGIC) { + /* End of leaf, pop back towards the root. */ + if (blks[level].index >= ds.maxrecs[level]) { + if (level > 0) + blks[level - 1].index++; + ds.tree_level++; + level--; + continue; + } + + /* Dispatch record scrubbing. */ + rec = xfs_scrub_da_btree_entry(&ds, level, + blks[level].index); + error = scrub_fn(&ds, level, rec); + if (error) + break; + if (xfs_scrub_should_terminate(sc, &error) || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + break; + + blks[level].index++; + continue; + } + + + /* End of node, pop back towards the root. */ + if (blks[level].index >= ds.maxrecs[level]) { + if (level > 0) + blks[level - 1].index++; + ds.tree_level++; + level--; + continue; + } + + /* Hashes in order for scrub? */ + key = xfs_scrub_da_btree_entry(&ds, level, blks[level].index); + error = xfs_scrub_da_btree_hash(&ds, level, &key->hashval); + if (error) + goto out; + + /* Drill another level deeper. */ + blkno = be32_to_cpu(key->before); + level++; + ds.tree_level--; + error = xfs_scrub_da_btree_block(&ds, level, blkno); + if (error) + goto out; + if (blks[level].bp == NULL) + goto out; + + blks[level].index = 0; + } + +out: + /* Release all the buffers we're tracking. */ + for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) { + if (blks[level].bp == NULL) + continue; + xfs_trans_brelse(sc->tp, blks[level].bp); + blks[level].bp = NULL; + } + +out_state: + xfs_da_state_free(ds.state); + return error; +} diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h new file mode 100644 index 000000000000..d31468d68cef --- /dev/null +++ b/fs/xfs/scrub/dabtree.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_SCRUB_DABTREE_H__ +#define __XFS_SCRUB_DABTREE_H__ + +/* dir/attr btree */ + +struct xfs_scrub_da_btree { + struct xfs_da_args dargs; + xfs_dahash_t hashes[XFS_DA_NODE_MAXDEPTH]; + int maxrecs[XFS_DA_NODE_MAXDEPTH]; + struct xfs_da_state *state; + struct xfs_scrub_context *sc; + void *private; + + /* + * Lowest and highest directory block address in which we expect + * to find dir/attr btree node blocks. For a directory this + * (presumably) means between LEAF_OFFSET and FREE_OFFSET; for + * attributes there is no limit. + */ + xfs_dablk_t lowest; + xfs_dablk_t highest; + + int tree_level; +}; + +typedef int (*xfs_scrub_da_btree_rec_fn)(struct xfs_scrub_da_btree *ds, + int level, void *rec); + +/* Check for da btree operation errors. */ +bool xfs_scrub_da_process_error(struct xfs_scrub_da_btree *ds, int level, int *error); + +/* Check for da btree corruption. */ +void xfs_scrub_da_set_corrupt(struct xfs_scrub_da_btree *ds, int level); + +int xfs_scrub_da_btree_hash(struct xfs_scrub_da_btree *ds, int level, + __be32 *hashp); +int xfs_scrub_da_btree(struct xfs_scrub_context *sc, int whichfork, + xfs_scrub_da_btree_rec_fn scrub_fn, void *private); + +#endif /* __XFS_SCRUB_DABTREE_H__ */ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c new file mode 100644 index 000000000000..38f29806eb54 --- /dev/null +++ b/fs/xfs/scrub/dir.c @@ -0,0 +1,842 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_itable.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_ialloc.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/dabtree.h" + +/* Set us up to scrub directories. */ +int +xfs_scrub_setup_directory( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + return xfs_scrub_setup_inode_contents(sc, ip, 0); +} + +/* Directories */ + +/* Scrub a directory entry. */ + +struct xfs_scrub_dir_ctx { + /* VFS fill-directory iterator */ + struct dir_context dir_iter; + + struct xfs_scrub_context *sc; +}; + +/* Check that an inode's mode matches a given DT_ type. */ +STATIC int +xfs_scrub_dir_check_ftype( + struct xfs_scrub_dir_ctx *sdc, + xfs_fileoff_t offset, + xfs_ino_t inum, + int dtype) +{ + struct xfs_mount *mp = sdc->sc->mp; + struct xfs_inode *ip; + int ino_dtype; + int error = 0; + + if (!xfs_sb_version_hasftype(&mp->m_sb)) { + if (dtype != DT_UNKNOWN && dtype != DT_DIR) + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + offset); + goto out; + } + + /* + * Grab the inode pointed to by the dirent. We release the + * inode before we cancel the scrub transaction. Since we're + * don't know a priori that releasing the inode won't trigger + * eofblocks cleanup (which allocates what would be a nested + * transaction), we can't use DONTCACHE here because DONTCACHE + * inodes can trigger immediate inactive cleanup of the inode. + */ + error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); + if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, + &error)) + goto out; + + /* Convert mode to the DT_* values that dir_emit uses. */ + ino_dtype = xfs_dir3_get_dtype(mp, + xfs_mode_to_ftype(VFS_I(ip)->i_mode)); + if (ino_dtype != dtype) + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + iput(VFS_I(ip)); +out: + return error; +} + +/* + * Scrub a single directory entry. + * + * We use the VFS directory iterator (i.e. readdir) to call this + * function for every directory entry in a directory. Once we're here, + * we check the inode number to make sure it's sane, then we check that + * we can look up this filename. Finally, we check the ftype. + */ +STATIC int +xfs_scrub_dir_actor( + struct dir_context *dir_iter, + const char *name, + int namelen, + loff_t pos, + u64 ino, + unsigned type) +{ + struct xfs_mount *mp; + struct xfs_inode *ip; + struct xfs_scrub_dir_ctx *sdc; + struct xfs_name xname; + xfs_ino_t lookup_ino; + xfs_dablk_t offset; + int error = 0; + + sdc = container_of(dir_iter, struct xfs_scrub_dir_ctx, dir_iter); + ip = sdc->sc->ip; + mp = ip->i_mount; + offset = xfs_dir2_db_to_da(mp->m_dir_geo, + xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos)); + + /* Does this inode number make sense? */ + if (!xfs_verify_dir_ino(mp, ino)) { + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + goto out; + } + + if (!strncmp(".", name, namelen)) { + /* If this is "." then check that the inum matches the dir. */ + if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + offset); + if (ino != ip->i_ino) + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + offset); + } else if (!strncmp("..", name, namelen)) { + /* + * If this is ".." in the root inode, check that the inum + * matches this dir. + */ + if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + offset); + if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino) + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + offset); + } + + /* Verify that we can look up this name by hash. */ + xname.name = name; + xname.len = namelen; + xname.type = XFS_DIR3_FT_UNKNOWN; + + error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL); + if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, + &error)) + goto fail_xref; + if (lookup_ino != ino) { + xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + goto out; + } + + /* Verify the file type. This function absorbs error codes. */ + error = xfs_scrub_dir_check_ftype(sdc, offset, lookup_ino, type); + if (error) + goto out; +out: + return error; +fail_xref: + return error; +} + +/* Scrub a directory btree record. */ +STATIC int +xfs_scrub_dir_rec( + struct xfs_scrub_da_btree *ds, + int level, + void *rec) +{ + struct xfs_mount *mp = ds->state->mp; + struct xfs_dir2_leaf_entry *ent = rec; + struct xfs_inode *dp = ds->dargs.dp; + struct xfs_dir2_data_entry *dent; + struct xfs_buf *bp; + char *p, *endp; + xfs_ino_t ino; + xfs_dablk_t rec_bno; + xfs_dir2_db_t db; + xfs_dir2_data_aoff_t off; + xfs_dir2_dataptr_t ptr; + xfs_dahash_t calc_hash; + xfs_dahash_t hash; + unsigned int tag; + int error; + + /* Check the hash of the entry. */ + error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval); + if (error) + goto out; + + /* Valid hash pointer? */ + ptr = be32_to_cpu(ent->address); + if (ptr == 0) + return 0; + + /* Find the directory entry's location. */ + db = xfs_dir2_dataptr_to_db(mp->m_dir_geo, ptr); + off = xfs_dir2_dataptr_to_off(mp->m_dir_geo, ptr); + rec_bno = xfs_dir2_db_to_da(mp->m_dir_geo, db); + + if (rec_bno >= mp->m_dir_geo->leafblk) { + xfs_scrub_da_set_corrupt(ds, level); + goto out; + } + error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, -2, &bp); + if (!xfs_scrub_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno, + &error)) + goto out; + if (!bp) { + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + goto out; + } + xfs_scrub_buffer_recheck(ds->sc, bp); + + dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); + + /* Make sure we got a real directory entry. */ + p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr); + endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); + if (!endp) { + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + goto out_relse; + } + while (p < endp) { + struct xfs_dir2_data_entry *dep; + struct xfs_dir2_data_unused *dup; + + dup = (struct xfs_dir2_data_unused *)p; + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + p += be16_to_cpu(dup->length); + continue; + } + dep = (struct xfs_dir2_data_entry *)p; + if (dep == dent) + break; + p += mp->m_dir_inode_ops->data_entsize(dep->namelen); + } + if (p >= endp) { + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + goto out_relse; + } + + /* Retrieve the entry, sanity check it, and compare hashes. */ + ino = be64_to_cpu(dent->inumber); + hash = be32_to_cpu(ent->hashval); + tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent)); + if (!xfs_verify_dir_ino(mp, ino) || tag != off) + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + if (dent->namelen == 0) { + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + goto out_relse; + } + calc_hash = xfs_da_hashname(dent->name, dent->namelen); + if (calc_hash != hash) + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + +out_relse: + xfs_trans_brelse(ds->dargs.trans, bp); +out: + return error; +} + +/* + * Is this unused entry either in the bestfree or smaller than all of + * them? We've already checked that the bestfrees are sorted longest to + * shortest, and that there aren't any bogus entries. + */ +STATIC void +xfs_scrub_directory_check_free_entry( + struct xfs_scrub_context *sc, + xfs_dablk_t lblk, + struct xfs_dir2_data_free *bf, + struct xfs_dir2_data_unused *dup) +{ + struct xfs_dir2_data_free *dfp; + unsigned int dup_length; + + dup_length = be16_to_cpu(dup->length); + + /* Unused entry is shorter than any of the bestfrees */ + if (dup_length < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length)) + return; + + for (dfp = &bf[XFS_DIR2_DATA_FD_COUNT - 1]; dfp >= bf; dfp--) + if (dup_length == be16_to_cpu(dfp->length)) + return; + + /* Unused entry should be in the bestfrees but wasn't found. */ + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); +} + +/* Check free space info in a directory data block. */ +STATIC int +xfs_scrub_directory_data_bestfree( + struct xfs_scrub_context *sc, + xfs_dablk_t lblk, + bool is_block) +{ + struct xfs_dir2_data_unused *dup; + struct xfs_dir2_data_free *dfp; + struct xfs_buf *bp; + struct xfs_dir2_data_free *bf; + struct xfs_mount *mp = sc->mp; + const struct xfs_dir_ops *d_ops; + char *ptr; + char *endptr; + u16 tag; + unsigned int nr_bestfrees = 0; + unsigned int nr_frees = 0; + unsigned int smallest_bestfree; + int newlen; + int offset; + int error; + + d_ops = sc->ip->d_ops; + + if (is_block) { + /* dir block format */ + if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + error = xfs_dir3_block_read(sc->tp, sc->ip, &bp); + } else { + /* dir data format */ + error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, -1, &bp); + } + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) + goto out; + xfs_scrub_buffer_recheck(sc, bp); + + /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */ + + /* Do the bestfrees correspond to actual free space? */ + bf = d_ops->data_bestfree_p(bp->b_addr); + smallest_bestfree = UINT_MAX; + for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { + offset = be16_to_cpu(dfp->offset); + if (offset == 0) + continue; + if (offset >= mp->m_dir_geo->blksize) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out_buf; + } + dup = (struct xfs_dir2_data_unused *)(bp->b_addr + offset); + tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)); + + /* bestfree doesn't match the entry it points at? */ + if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) || + be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) || + tag != ((char *)dup - (char *)bp->b_addr)) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out_buf; + } + + /* bestfree records should be ordered largest to smallest */ + if (smallest_bestfree < be16_to_cpu(dfp->length)) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out_buf; + } + + smallest_bestfree = be16_to_cpu(dfp->length); + nr_bestfrees++; + } + + /* Make sure the bestfrees are actually the best free spaces. */ + ptr = (char *)d_ops->data_entry_p(bp->b_addr); + endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); + + /* Iterate the entries, stopping when we hit or go past the end. */ + while (ptr < endptr) { + dup = (struct xfs_dir2_data_unused *)ptr; + /* Skip real entries */ + if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) { + struct xfs_dir2_data_entry *dep; + + dep = (struct xfs_dir2_data_entry *)ptr; + newlen = d_ops->data_entsize(dep->namelen); + if (newlen <= 0) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, + lblk); + goto out_buf; + } + ptr += newlen; + continue; + } + + /* Spot check this free entry */ + tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)); + if (tag != ((char *)dup - (char *)bp->b_addr)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + + /* + * Either this entry is a bestfree or it's smaller than + * any of the bestfrees. + */ + xfs_scrub_directory_check_free_entry(sc, lblk, bf, dup); + + /* Move on. */ + newlen = be16_to_cpu(dup->length); + if (newlen <= 0) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out_buf; + } + ptr += newlen; + if (ptr <= endptr) + nr_frees++; + } + + /* We're required to fill all the space. */ + if (ptr != endptr) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + + /* Did we see at least as many free slots as there are bestfrees? */ + if (nr_frees < nr_bestfrees) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); +out_buf: + xfs_trans_brelse(sc->tp, bp); +out: + return error; +} + +/* + * Does the free space length in the free space index block ($len) match + * the longest length in the directory data block's bestfree array? + * Assume that we've already checked that the data block's bestfree + * array is in order. + */ +STATIC void +xfs_scrub_directory_check_freesp( + struct xfs_scrub_context *sc, + xfs_dablk_t lblk, + struct xfs_buf *dbp, + unsigned int len) +{ + struct xfs_dir2_data_free *dfp; + + dfp = sc->ip->d_ops->data_bestfree_p(dbp->b_addr); + + if (len != be16_to_cpu(dfp->length)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + + if (len > 0 && be16_to_cpu(dfp->offset) == 0) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); +} + +/* Check free space info in a directory leaf1 block. */ +STATIC int +xfs_scrub_directory_leaf1_bestfree( + struct xfs_scrub_context *sc, + struct xfs_da_args *args, + xfs_dablk_t lblk) +{ + struct xfs_dir3_icleaf_hdr leafhdr; + struct xfs_dir2_leaf_entry *ents; + struct xfs_dir2_leaf_tail *ltp; + struct xfs_dir2_leaf *leaf; + struct xfs_buf *dbp; + struct xfs_buf *bp; + const struct xfs_dir_ops *d_ops = sc->ip->d_ops; + struct xfs_da_geometry *geo = sc->mp->m_dir_geo; + __be16 *bestp; + __u16 best; + __u32 hash; + __u32 lasthash = 0; + __u32 bestcount; + unsigned int stale = 0; + int i; + int error; + + /* Read the free space block. */ + error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) + goto out; + xfs_scrub_buffer_recheck(sc, bp); + + leaf = bp->b_addr; + d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = d_ops->leaf_ents_p(leaf); + ltp = xfs_dir2_leaf_tail_p(geo, leaf); + bestcount = be32_to_cpu(ltp->bestcount); + bestp = xfs_dir2_leaf_bests_p(ltp); + + if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { + struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; + + if (hdr3->pad != cpu_to_be32(0)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + } + + /* + * There should be as many bestfree slots as there are dir data + * blocks that can fit under i_size. + */ + if (bestcount != xfs_dir2_byte_to_db(geo, sc->ip->i_d.di_size)) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out; + } + + /* Is the leaf count even remotely sane? */ + if (leafhdr.count > d_ops->leaf_max_ents(geo)) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out; + } + + /* Leaves and bests don't overlap in leaf format. */ + if ((char *)&ents[leafhdr.count] > (char *)bestp) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out; + } + + /* Check hash value order, count stale entries. */ + for (i = 0; i < leafhdr.count; i++) { + hash = be32_to_cpu(ents[i].hashval); + if (i > 0 && lasthash > hash) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + lasthash = hash; + if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) + stale++; + } + if (leafhdr.stale != stale) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + + /* Check all the bestfree entries. */ + for (i = 0; i < bestcount; i++, bestp++) { + best = be16_to_cpu(*bestp); + if (best == NULLDATAOFF) + continue; + error = xfs_dir3_data_read(sc->tp, sc->ip, + i * args->geo->fsbcount, -1, &dbp); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, + &error)) + continue; + xfs_scrub_directory_check_freesp(sc, lblk, dbp, best); + xfs_trans_brelse(sc->tp, dbp); + } +out: + return error; +} + +/* Check free space info in a directory freespace block. */ +STATIC int +xfs_scrub_directory_free_bestfree( + struct xfs_scrub_context *sc, + struct xfs_da_args *args, + xfs_dablk_t lblk) +{ + struct xfs_dir3_icfree_hdr freehdr; + struct xfs_buf *dbp; + struct xfs_buf *bp; + __be16 *bestp; + __u16 best; + unsigned int stale = 0; + int i; + int error; + + /* Read the free space block */ + error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) + goto out; + xfs_scrub_buffer_recheck(sc, bp); + + if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { + struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; + + if (hdr3->pad != cpu_to_be32(0)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + } + + /* Check all the entries. */ + sc->ip->d_ops->free_hdr_from_disk(&freehdr, bp->b_addr); + bestp = sc->ip->d_ops->free_bests_p(bp->b_addr); + for (i = 0; i < freehdr.nvalid; i++, bestp++) { + best = be16_to_cpu(*bestp); + if (best == NULLDATAOFF) { + stale++; + continue; + } + error = xfs_dir3_data_read(sc->tp, sc->ip, + (freehdr.firstdb + i) * args->geo->fsbcount, + -1, &dbp); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, + &error)) + continue; + xfs_scrub_directory_check_freesp(sc, lblk, dbp, best); + xfs_trans_brelse(sc->tp, dbp); + } + + if (freehdr.nused + stale != freehdr.nvalid) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); +out: + return error; +} + +/* Check free space information in directories. */ +STATIC int +xfs_scrub_directory_blocks( + struct xfs_scrub_context *sc) +{ + struct xfs_bmbt_irec got; + struct xfs_da_args args; + struct xfs_ifork *ifp; + struct xfs_mount *mp = sc->mp; + xfs_fileoff_t leaf_lblk; + xfs_fileoff_t free_lblk; + xfs_fileoff_t lblk; + struct xfs_iext_cursor icur; + xfs_dablk_t dabno; + bool found; + int is_block = 0; + int error; + + /* Ignore local format directories. */ + if (sc->ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && + sc->ip->i_d.di_format != XFS_DINODE_FMT_BTREE) + return 0; + + ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); + lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET); + leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET); + free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET); + + /* Is this a block dir? */ + args.dp = sc->ip; + args.geo = mp->m_dir_geo; + args.trans = sc->tp; + error = xfs_dir2_isblock(&args, &is_block); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) + goto out; + + /* Iterate all the data extents in the directory... */ + found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got); + while (found) { + /* Block directories only have a single block at offset 0. */ + if (is_block && + (got.br_startoff > 0 || + got.br_blockcount != args.geo->fsbcount)) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, + got.br_startoff); + break; + } + + /* No more data blocks... */ + if (got.br_startoff >= leaf_lblk) + break; + + /* + * Check each data block's bestfree data. + * + * Iterate all the fsbcount-aligned block offsets in + * this directory. The directory block reading code is + * smart enough to do its own bmap lookups to handle + * discontiguous directory blocks. When we're done + * with the extent record, re-query the bmap at the + * next fsbcount-aligned offset to avoid redundant + * block checks. + */ + for (lblk = roundup((xfs_dablk_t)got.br_startoff, + args.geo->fsbcount); + lblk < got.br_startoff + got.br_blockcount; + lblk += args.geo->fsbcount) { + error = xfs_scrub_directory_data_bestfree(sc, lblk, + is_block); + if (error) + goto out; + } + dabno = got.br_startoff + got.br_blockcount; + lblk = roundup(dabno, args.geo->fsbcount); + found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got); + } + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + + /* Look for a leaf1 block, which has free info. */ + if (xfs_iext_lookup_extent(sc->ip, ifp, leaf_lblk, &icur, &got) && + got.br_startoff == leaf_lblk && + got.br_blockcount == args.geo->fsbcount && + !xfs_iext_next_extent(ifp, &icur, &got)) { + if (is_block) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out; + } + error = xfs_scrub_directory_leaf1_bestfree(sc, &args, + leaf_lblk); + if (error) + goto out; + } + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + + /* Scan for free blocks */ + lblk = free_lblk; + found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got); + while (found) { + /* + * Dirs can't have blocks mapped above 2^32. + * Single-block dirs shouldn't even be here. + */ + lblk = got.br_startoff; + if (lblk & ~0xFFFFFFFFULL) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out; + } + if (is_block) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + goto out; + } + + /* + * Check each dir free block's bestfree data. + * + * Iterate all the fsbcount-aligned block offsets in + * this directory. The directory block reading code is + * smart enough to do its own bmap lookups to handle + * discontiguous directory blocks. When we're done + * with the extent record, re-query the bmap at the + * next fsbcount-aligned offset to avoid redundant + * block checks. + */ + for (lblk = roundup((xfs_dablk_t)got.br_startoff, + args.geo->fsbcount); + lblk < got.br_startoff + got.br_blockcount; + lblk += args.geo->fsbcount) { + error = xfs_scrub_directory_free_bestfree(sc, &args, + lblk); + if (error) + goto out; + } + dabno = got.br_startoff + got.br_blockcount; + lblk = roundup(dabno, args.geo->fsbcount); + found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got); + } +out: + return error; +} + +/* Scrub a whole directory. */ +int +xfs_scrub_directory( + struct xfs_scrub_context *sc) +{ + struct xfs_scrub_dir_ctx sdc = { + .dir_iter.actor = xfs_scrub_dir_actor, + .dir_iter.pos = 0, + .sc = sc, + }; + size_t bufsize; + loff_t oldpos; + int error = 0; + + if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) + return -ENOENT; + + /* Plausible size? */ + if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) { + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + goto out; + } + + /* Check directory tree structure */ + error = xfs_scrub_da_btree(sc, XFS_DATA_FORK, xfs_scrub_dir_rec, NULL); + if (error) + return error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return error; + + /* Check the freespace. */ + error = xfs_scrub_directory_blocks(sc); + if (error) + return error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return error; + + /* + * Check that every dirent we see can also be looked up by hash. + * Userspace usually asks for a 32k buffer, so we will too. + */ + bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, + sc->ip->i_d.di_size); + + /* + * Look up every name in this directory by hash. + * + * Use the xfs_readdir function to call xfs_scrub_dir_actor on + * every directory entry in this directory. In _actor, we check + * the name, inode number, and ftype (if applicable) of the + * entry. xfs_readdir uses the VFS filldir functions to provide + * iteration context. + * + * The VFS grabs a read or write lock via i_rwsem before it reads + * or writes to a directory. If we've gotten this far we've + * already obtained IOLOCK_EXCL, which (since 4.10) is the same as + * getting a write lock on i_rwsem. Therefore, it is safe for us + * to drop the ILOCK here in order to reuse the _readdir and + * _dir_lookup routines, which do their own ILOCK locking. + */ + oldpos = 0; + sc->ilock_flags &= ~XFS_ILOCK_EXCL; + xfs_iunlock(sc->ip, XFS_ILOCK_EXCL); + while (true) { + error = xfs_readdir(sc->tp, sc->ip, &sdc.dir_iter, bufsize); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, + &error)) + goto out; + if (oldpos == sdc.dir_iter.pos) + break; + oldpos = sdc.dir_iter.pos; + } + +out: + return error; +} diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c new file mode 100644 index 000000000000..106ca4bd753f --- /dev/null +++ b/fs/xfs/scrub/ialloc.c @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" +#include "xfs_icache.h" +#include "xfs_rmap.h" +#include "xfs_log.h" +#include "xfs_trans_priv.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" + +/* + * Set us up to scrub inode btrees. + * If we detect a discrepancy between the inobt and the inode, + * try again after forcing logged inode cores out to disk. + */ +int +xfs_scrub_setup_ag_iallocbt( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + return xfs_scrub_setup_ag_btree(sc, ip, sc->try_harder); +} + +/* Inode btree scrubber. */ + +/* + * If we're checking the finobt, cross-reference with the inobt. + * Otherwise we're checking the inobt; if there is an finobt, make sure + * we have a record or not depending on freecount. + */ +static inline void +xfs_scrub_iallocbt_chunk_xref_other( + struct xfs_scrub_context *sc, + struct xfs_inobt_rec_incore *irec, + xfs_agino_t agino) +{ + struct xfs_btree_cur **pcur; + bool has_irec; + int error; + + if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT) + pcur = &sc->sa.ino_cur; + else + pcur = &sc->sa.fino_cur; + if (!(*pcur)) + return; + error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec); + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + return; + if (((irec->ir_freecount > 0 && !has_irec) || + (irec->ir_freecount == 0 && has_irec))) + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_iallocbt_chunk_xref( + struct xfs_scrub_context *sc, + struct xfs_inobt_rec_incore *irec, + xfs_agino_t agino, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + struct xfs_owner_info oinfo; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, len); + xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, len); +} + +/* Is this chunk worth checking? */ +STATIC bool +xfs_scrub_iallocbt_chunk( + struct xfs_scrub_btree *bs, + struct xfs_inobt_rec_incore *irec, + xfs_agino_t agino, + xfs_extlen_t len) +{ + struct xfs_mount *mp = bs->cur->bc_mp; + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agblock_t bno; + + bno = XFS_AGINO_TO_AGBNO(mp, agino); + if (bno + len <= bno || + !xfs_verify_agbno(mp, agno, bno) || + !xfs_verify_agbno(mp, agno, bno + len - 1)) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); + + return true; +} + +/* Count the number of free inodes. */ +static unsigned int +xfs_scrub_iallocbt_freecount( + xfs_inofree_t freemask) +{ + BUILD_BUG_ON(sizeof(freemask) != sizeof(__u64)); + return hweight64(freemask); +} + +/* Check a particular inode with ir_free. */ +STATIC int +xfs_scrub_iallocbt_check_cluster_freemask( + struct xfs_scrub_btree *bs, + xfs_ino_t fsino, + xfs_agino_t chunkino, + xfs_agino_t clusterino, + struct xfs_inobt_rec_incore *irec, + struct xfs_buf *bp) +{ + struct xfs_dinode *dip; + struct xfs_mount *mp = bs->cur->bc_mp; + bool inode_is_free = false; + bool freemask_ok; + bool inuse; + int error = 0; + + if (xfs_scrub_should_terminate(bs->sc, &error)) + return error; + + dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize); + if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || + (dip->di_version >= 3 && + be64_to_cpu(dip->di_ino) != fsino + clusterino)) { + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + goto out; + } + + if (irec->ir_free & XFS_INOBT_MASK(chunkino + clusterino)) + inode_is_free = true; + error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, + fsino + clusterino, &inuse); + if (error == -ENODATA) { + /* Not cached, just read the disk buffer */ + freemask_ok = inode_is_free ^ !!(dip->di_mode); + if (!bs->sc->try_harder && !freemask_ok) + return -EDEADLOCK; + } else if (error < 0) { + /* + * Inode is only half assembled, or there was an IO error, + * or the verifier failed, so don't bother trying to check. + * The inode scrubber can deal with this. + */ + goto out; + } else { + /* Inode is all there. */ + freemask_ok = inode_is_free ^ inuse; + } + if (!freemask_ok) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); +out: + return 0; +} + +/* Make sure the free mask is consistent with what the inodes think. */ +STATIC int +xfs_scrub_iallocbt_check_freemask( + struct xfs_scrub_btree *bs, + struct xfs_inobt_rec_incore *irec) +{ + struct xfs_owner_info oinfo; + struct xfs_imap imap; + struct xfs_mount *mp = bs->cur->bc_mp; + struct xfs_dinode *dip; + struct xfs_buf *bp; + xfs_ino_t fsino; + xfs_agino_t nr_inodes; + xfs_agino_t agino; + xfs_agino_t chunkino; + xfs_agino_t clusterino; + xfs_agblock_t agbno; + int blks_per_cluster; + uint16_t holemask; + uint16_t ir_holemask; + int error = 0; + + /* Make sure the freemask matches the inode records. */ + blks_per_cluster = xfs_icluster_size_fsb(mp); + nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + + for (agino = irec->ir_startino; + agino < irec->ir_startino + XFS_INODES_PER_CHUNK; + agino += blks_per_cluster * mp->m_sb.sb_inopblock) { + fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino); + chunkino = agino - irec->ir_startino; + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + + /* Compute the holemask mask for this cluster. */ + for (clusterino = 0, holemask = 0; clusterino < nr_inodes; + clusterino += XFS_INODES_PER_HOLEMASK_BIT) + holemask |= XFS_INOBT_MASK((chunkino + clusterino) / + XFS_INODES_PER_HOLEMASK_BIT); + + /* The whole cluster must be a hole or not a hole. */ + ir_holemask = (irec->ir_holemask & holemask); + if (ir_holemask != holemask && ir_holemask != 0) { + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + continue; + } + + /* If any part of this is a hole, skip it. */ + if (ir_holemask) { + xfs_scrub_xref_is_not_owned_by(bs->sc, agbno, + blks_per_cluster, &oinfo); + continue; + } + + xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster, + &oinfo); + + /* Grab the inode cluster buffer. */ + imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno, + agbno); + imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); + imap.im_boffset = 0; + + error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, + &dip, &bp, 0, 0); + if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 0, + &error)) + continue; + + /* Which inodes are free? */ + for (clusterino = 0; clusterino < nr_inodes; clusterino++) { + error = xfs_scrub_iallocbt_check_cluster_freemask(bs, + fsino, chunkino, clusterino, irec, bp); + if (error) { + xfs_trans_brelse(bs->cur->bc_tp, bp); + return error; + } + } + + xfs_trans_brelse(bs->cur->bc_tp, bp); + } + + return error; +} + +/* Scrub an inobt/finobt record. */ +STATIC int +xfs_scrub_iallocbt_rec( + struct xfs_scrub_btree *bs, + union xfs_btree_rec *rec) +{ + struct xfs_mount *mp = bs->cur->bc_mp; + xfs_filblks_t *inode_blocks = bs->private; + struct xfs_inobt_rec_incore irec; + uint64_t holes; + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agino_t agino; + xfs_agblock_t agbno; + xfs_extlen_t len; + int holecount; + int i; + int error = 0; + unsigned int real_freecount; + uint16_t holemask; + + xfs_inobt_btrec_to_irec(mp, rec, &irec); + + if (irec.ir_count > XFS_INODES_PER_CHUNK || + irec.ir_freecount > XFS_INODES_PER_CHUNK) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + real_freecount = irec.ir_freecount + + (XFS_INODES_PER_CHUNK - irec.ir_count); + if (real_freecount != xfs_scrub_iallocbt_freecount(irec.ir_free)) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + agino = irec.ir_startino; + /* Record has to be properly aligned within the AG. */ + if (!xfs_verify_agino(mp, agno, agino) || + !xfs_verify_agino(mp, agno, agino + XFS_INODES_PER_CHUNK - 1)) { + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + goto out; + } + + /* Make sure this record is aligned to cluster and inoalignmnt size. */ + agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino); + if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) || + (agbno & (xfs_icluster_size_fsb(mp) - 1))) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + *inode_blocks += XFS_B_TO_FSB(mp, + irec.ir_count * mp->m_sb.sb_inodesize); + + /* Handle non-sparse inodes */ + if (!xfs_inobt_issparse(irec.ir_holemask)) { + len = XFS_B_TO_FSB(mp, + XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize); + if (irec.ir_count != XFS_INODES_PER_CHUNK) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len)) + goto out; + goto check_freemask; + } + + /* Check each chunk of a sparse inode cluster. */ + holemask = irec.ir_holemask; + holecount = 0; + len = XFS_B_TO_FSB(mp, + XFS_INODES_PER_HOLEMASK_BIT * mp->m_sb.sb_inodesize); + holes = ~xfs_inobt_irec_to_allocmask(&irec); + if ((holes & irec.ir_free) != holes || + irec.ir_freecount > irec.ir_count) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) { + if (holemask & 1) + holecount += XFS_INODES_PER_HOLEMASK_BIT; + else if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len)) + break; + holemask >>= 1; + agino += XFS_INODES_PER_HOLEMASK_BIT; + } + + if (holecount > XFS_INODES_PER_CHUNK || + holecount + irec.ir_count != XFS_INODES_PER_CHUNK) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + +check_freemask: + error = xfs_scrub_iallocbt_check_freemask(bs, &irec); + if (error) + goto out; + +out: + return error; +} + +/* + * Make sure the inode btrees are as large as the rmap thinks they are. + * Don't bother if we're missing btree cursors, as we're already corrupt. + */ +STATIC void +xfs_scrub_iallocbt_xref_rmap_btreeblks( + struct xfs_scrub_context *sc, + int which) +{ + struct xfs_owner_info oinfo; + xfs_filblks_t blocks; + xfs_extlen_t inobt_blocks = 0; + xfs_extlen_t finobt_blocks = 0; + int error; + + if (!sc->sa.ino_cur || !sc->sa.rmap_cur || + (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur)) + return; + + /* Check that we saw as many inobt blocks as the rmap says. */ + error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks); + if (!xfs_scrub_process_error(sc, 0, 0, &error)) + return; + + if (sc->sa.fino_cur) { + error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks); + if (!xfs_scrub_process_error(sc, 0, 0, &error)) + return; + } + + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != inobt_blocks + finobt_blocks) + xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); +} + +/* + * Make sure that the inobt records point to the same number of blocks as + * the rmap says are owned by inodes. + */ +STATIC void +xfs_scrub_iallocbt_xref_rmap_inodes( + struct xfs_scrub_context *sc, + int which, + xfs_filblks_t inode_blocks) +{ + struct xfs_owner_info oinfo; + xfs_filblks_t blocks; + int error; + + if (!sc->sa.rmap_cur) + return; + + /* Check that we saw as many inode blocks as the rmap knows about. */ + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != inode_blocks) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} + +/* Scrub the inode btrees for some AG. */ +STATIC int +xfs_scrub_iallocbt( + struct xfs_scrub_context *sc, + xfs_btnum_t which) +{ + struct xfs_btree_cur *cur; + struct xfs_owner_info oinfo; + xfs_filblks_t inode_blocks = 0; + int error; + + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); + cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur; + error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, + &inode_blocks); + if (error) + return error; + + xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which); + + /* + * If we're scrubbing the inode btree, inode_blocks is the number of + * blocks pointed to by all the inode chunk records. Therefore, we + * should compare to the number of inode chunk blocks that the rmap + * knows about. We can't do this for the finobt since it only points + * to inode chunks with free inodes. + */ + if (which == XFS_BTNUM_INO) + xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks); + + return error; +} + +int +xfs_scrub_inobt( + struct xfs_scrub_context *sc) +{ + return xfs_scrub_iallocbt(sc, XFS_BTNUM_INO); +} + +int +xfs_scrub_finobt( + struct xfs_scrub_context *sc) +{ + return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO); +} + +/* See if an inode btree has (or doesn't have) an inode chunk record. */ +static inline void +xfs_scrub_xref_inode_check( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len, + struct xfs_btree_cur **icur, + bool should_have_inodes) +{ + bool has_inodes; + int error; + + if (!(*icur)) + return; + + error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes); + if (!xfs_scrub_should_check_xref(sc, &error, icur)) + return; + if (has_inodes != should_have_inodes) + xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0); +} + +/* xref check that the extent is not covered by inodes */ +void +xfs_scrub_xref_is_not_inode_chunk( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false); + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false); +} + +/* xref check that the extent is covered by inodes */ +void +xfs_scrub_xref_is_inode_chunk( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true); +} diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c new file mode 100644 index 000000000000..df14930e4fc5 --- /dev/null +++ b/fs/xfs/scrub/inode.c @@ -0,0 +1,611 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_inode_buf.h" +#include "xfs_inode_fork.h" +#include "xfs_ialloc.h" +#include "xfs_da_format.h" +#include "xfs_reflink.h" +#include "xfs_rmap.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" + +/* + * Grab total control of the inode metadata. It doesn't matter here if + * the file data is still changing; exclusive access to the metadata is + * the goal. + */ +int +xfs_scrub_setup_inode( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = sc->mp; + int error; + + /* + * Try to get the inode. If the verifiers fail, we try again + * in raw mode. + */ + error = xfs_scrub_get_inode(sc, ip); + switch (error) { + case 0: + break; + case -EFSCORRUPTED: + case -EFSBADCRC: + return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); + default: + return error; + } + + /* Got the inode, lock it and we're ready to go. */ + sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; + xfs_ilock(sc->ip, sc->ilock_flags); + error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); + if (error) + goto out; + sc->ilock_flags |= XFS_ILOCK_EXCL; + xfs_ilock(sc->ip, XFS_ILOCK_EXCL); + +out: + /* scrub teardown will unlock and release the inode for us */ + return error; +} + +/* Inode core */ + +/* Validate di_extsize hint. */ +STATIC void +xfs_scrub_inode_extsize( + struct xfs_scrub_context *sc, + struct xfs_dinode *dip, + xfs_ino_t ino, + uint16_t mode, + uint16_t flags) +{ + xfs_failaddr_t fa; + + fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), + mode, flags); + if (fa) + xfs_scrub_ino_set_corrupt(sc, ino); +} + +/* + * Validate di_cowextsize hint. + * + * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). + * These functions must be kept in sync with each other. + */ +STATIC void +xfs_scrub_inode_cowextsize( + struct xfs_scrub_context *sc, + struct xfs_dinode *dip, + xfs_ino_t ino, + uint16_t mode, + uint16_t flags, + uint64_t flags2) +{ + xfs_failaddr_t fa; + + fa = xfs_inode_validate_cowextsize(sc->mp, + be32_to_cpu(dip->di_cowextsize), mode, flags, + flags2); + if (fa) + xfs_scrub_ino_set_corrupt(sc, ino); +} + +/* Make sure the di_flags make sense for the inode. */ +STATIC void +xfs_scrub_inode_flags( + struct xfs_scrub_context *sc, + struct xfs_dinode *dip, + xfs_ino_t ino, + uint16_t mode, + uint16_t flags) +{ + struct xfs_mount *mp = sc->mp; + + if (flags & ~XFS_DIFLAG_ANY) + goto bad; + + /* rt flags require rt device */ + if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && + !mp->m_rtdev_targp) + goto bad; + + /* new rt bitmap flag only valid for rbmino */ + if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) + goto bad; + + /* directory-only flags */ + if ((flags & (XFS_DIFLAG_RTINHERIT | + XFS_DIFLAG_EXTSZINHERIT | + XFS_DIFLAG_PROJINHERIT | + XFS_DIFLAG_NOSYMLINKS)) && + !S_ISDIR(mode)) + goto bad; + + /* file-only flags */ + if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && + !S_ISREG(mode)) + goto bad; + + /* filestreams and rt make no sense */ + if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) + goto bad; + + return; +bad: + xfs_scrub_ino_set_corrupt(sc, ino); +} + +/* Make sure the di_flags2 make sense for the inode. */ +STATIC void +xfs_scrub_inode_flags2( + struct xfs_scrub_context *sc, + struct xfs_dinode *dip, + xfs_ino_t ino, + uint16_t mode, + uint16_t flags, + uint64_t flags2) +{ + struct xfs_mount *mp = sc->mp; + + if (flags2 & ~XFS_DIFLAG2_ANY) + goto bad; + + /* reflink flag requires reflink feature */ + if ((flags2 & XFS_DIFLAG2_REFLINK) && + !xfs_sb_version_hasreflink(&mp->m_sb)) + goto bad; + + /* cowextsize flag is checked w.r.t. mode separately */ + + /* file/dir-only flags */ + if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) + goto bad; + + /* file-only flags */ + if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) + goto bad; + + /* realtime and reflink make no sense, currently */ + if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) + goto bad; + + /* dax and reflink make no sense, currently */ + if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) + goto bad; + + return; +bad: + xfs_scrub_ino_set_corrupt(sc, ino); +} + +/* Scrub all the ondisk inode fields. */ +STATIC void +xfs_scrub_dinode( + struct xfs_scrub_context *sc, + struct xfs_dinode *dip, + xfs_ino_t ino) +{ + struct xfs_mount *mp = sc->mp; + size_t fork_recs; + unsigned long long isize; + uint64_t flags2; + uint32_t nextents; + uint16_t flags; + uint16_t mode; + + flags = be16_to_cpu(dip->di_flags); + if (dip->di_version >= 3) + flags2 = be64_to_cpu(dip->di_flags2); + else + flags2 = 0; + + /* di_mode */ + mode = be16_to_cpu(dip->di_mode); + switch (mode & S_IFMT) { + case S_IFLNK: + case S_IFREG: + case S_IFDIR: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + /* mode is recognized */ + break; + default: + xfs_scrub_ino_set_corrupt(sc, ino); + break; + } + + /* v1/v2 fields */ + switch (dip->di_version) { + case 1: + /* + * We autoconvert v1 inodes into v2 inodes on writeout, + * so just mark this inode for preening. + */ + xfs_scrub_ino_set_preen(sc, ino); + break; + case 2: + case 3: + if (dip->di_onlink != 0) + xfs_scrub_ino_set_corrupt(sc, ino); + + if (dip->di_mode == 0 && sc->ip) + xfs_scrub_ino_set_corrupt(sc, ino); + + if (dip->di_projid_hi != 0 && + !xfs_sb_version_hasprojid32bit(&mp->m_sb)) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + default: + xfs_scrub_ino_set_corrupt(sc, ino); + return; + } + + /* + * di_uid/di_gid -- -1 isn't invalid, but there's no way that + * userspace could have created that. + */ + if (dip->di_uid == cpu_to_be32(-1U) || + dip->di_gid == cpu_to_be32(-1U)) + xfs_scrub_ino_set_warning(sc, ino); + + /* di_format */ + switch (dip->di_format) { + case XFS_DINODE_FMT_DEV: + if (!S_ISCHR(mode) && !S_ISBLK(mode) && + !S_ISFIFO(mode) && !S_ISSOCK(mode)) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + case XFS_DINODE_FMT_LOCAL: + if (!S_ISDIR(mode) && !S_ISLNK(mode)) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + case XFS_DINODE_FMT_EXTENTS: + if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + case XFS_DINODE_FMT_BTREE: + if (!S_ISREG(mode) && !S_ISDIR(mode)) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + case XFS_DINODE_FMT_UUID: + default: + xfs_scrub_ino_set_corrupt(sc, ino); + break; + } + + /* di_[amc]time.nsec */ + if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino); + if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino); + if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino); + + /* + * di_size. xfs_dinode_verify checks for things that screw up + * the VFS such as the upper bit being set and zero-length + * symlinks/directories, but we can do more here. + */ + isize = be64_to_cpu(dip->di_size); + if (isize & (1ULL << 63)) + xfs_scrub_ino_set_corrupt(sc, ino); + + /* Devices, fifos, and sockets must have zero size */ + if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) + xfs_scrub_ino_set_corrupt(sc, ino); + + /* Directories can't be larger than the data section size (32G) */ + if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) + xfs_scrub_ino_set_corrupt(sc, ino); + + /* Symlinks can't be larger than SYMLINK_MAXLEN */ + if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) + xfs_scrub_ino_set_corrupt(sc, ino); + + /* + * Warn if the running kernel can't handle the kinds of offsets + * needed to deal with the file size. In other words, if the + * pagecache can't cache all the blocks in this file due to + * overly large offsets, flag the inode for admin review. + */ + if (isize >= mp->m_super->s_maxbytes) + xfs_scrub_ino_set_warning(sc, ino); + + /* di_nblocks */ + if (flags2 & XFS_DIFLAG2_REFLINK) { + ; /* nblocks can exceed dblocks */ + } else if (flags & XFS_DIFLAG_REALTIME) { + /* + * nblocks is the sum of data extents (in the rtdev), + * attr extents (in the datadev), and both forks' bmbt + * blocks (in the datadev). This clumsy check is the + * best we can do without cross-referencing with the + * inode forks. + */ + if (be64_to_cpu(dip->di_nblocks) >= + mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) + xfs_scrub_ino_set_corrupt(sc, ino); + } else { + if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) + xfs_scrub_ino_set_corrupt(sc, ino); + } + + xfs_scrub_inode_flags(sc, dip, ino, mode, flags); + + xfs_scrub_inode_extsize(sc, dip, ino, mode, flags); + + /* di_nextents */ + nextents = be32_to_cpu(dip->di_nextents); + fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); + switch (dip->di_format) { + case XFS_DINODE_FMT_EXTENTS: + if (nextents > fork_recs) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + case XFS_DINODE_FMT_BTREE: + if (nextents <= fork_recs) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + default: + if (nextents != 0) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + } + + /* di_forkoff */ + if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) + xfs_scrub_ino_set_corrupt(sc, ino); + if (dip->di_anextents != 0 && dip->di_forkoff == 0) + xfs_scrub_ino_set_corrupt(sc, ino); + if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) + xfs_scrub_ino_set_corrupt(sc, ino); + + /* di_aformat */ + if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && + dip->di_aformat != XFS_DINODE_FMT_EXTENTS && + dip->di_aformat != XFS_DINODE_FMT_BTREE) + xfs_scrub_ino_set_corrupt(sc, ino); + + /* di_anextents */ + nextents = be16_to_cpu(dip->di_anextents); + fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); + switch (dip->di_aformat) { + case XFS_DINODE_FMT_EXTENTS: + if (nextents > fork_recs) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + case XFS_DINODE_FMT_BTREE: + if (nextents <= fork_recs) + xfs_scrub_ino_set_corrupt(sc, ino); + break; + default: + if (nextents != 0) + xfs_scrub_ino_set_corrupt(sc, ino); + } + + if (dip->di_version >= 3) { + if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino); + xfs_scrub_inode_flags2(sc, dip, ino, mode, flags, flags2); + xfs_scrub_inode_cowextsize(sc, dip, ino, mode, flags, + flags2); + } +} + +/* + * Make sure the finobt doesn't think this inode is free. + * We don't have to check the inobt ourselves because we got the inode via + * IGET_UNTRUSTED, which checks the inobt for us. + */ +static void +xfs_scrub_inode_xref_finobt( + struct xfs_scrub_context *sc, + xfs_ino_t ino) +{ + struct xfs_inobt_rec_incore rec; + xfs_agino_t agino; + int has_record; + int error; + + if (!sc->sa.fino_cur) + return; + + agino = XFS_INO_TO_AGINO(sc->mp, ino); + + /* + * Try to get the finobt record. If we can't get it, then we're + * in good shape. + */ + error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, + &has_record); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || + !has_record) + return; + + error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || + !has_record) + return; + + /* + * Otherwise, make sure this record either doesn't cover this inode, + * or that it does but it's marked present. + */ + if (rec.ir_startino > agino || + rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) + return; + + if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); +} + +/* Cross reference the inode fields with the forks. */ +STATIC void +xfs_scrub_inode_xref_bmap( + struct xfs_scrub_context *sc, + struct xfs_dinode *dip) +{ + xfs_extnum_t nextents; + xfs_filblks_t count; + xfs_filblks_t acount; + int error; + + /* Walk all the extents to check nextents/naextents/nblocks. */ + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, + &nextents, &count); + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + return; + if (nextents < be32_to_cpu(dip->di_nextents)) + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); + + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, + &nextents, &acount); + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + return; + if (nextents != be16_to_cpu(dip->di_anextents)) + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); + + /* Check nblocks against the inode. */ + if (count + acount != be64_to_cpu(dip->di_nblocks)) + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_inode_xref( + struct xfs_scrub_context *sc, + xfs_ino_t ino, + struct xfs_dinode *dip) +{ + struct xfs_owner_info oinfo; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + agno = XFS_INO_TO_AGNO(sc->mp, ino); + agbno = XFS_INO_TO_AGBNO(sc->mp, ino); + + error = xfs_scrub_ag_init(sc, agno, &sc->sa); + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_inode_xref_finobt(sc, ino); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xfs_scrub_inode_xref_bmap(sc, dip); + + xfs_scrub_ag_free(sc, &sc->sa); +} + +/* + * If the reflink iflag disagrees with a scan for shared data fork extents, + * either flag an error (shared extents w/ no flag) or a preen (flag set w/o + * any shared extents). We already checked for reflink iflag set on a non + * reflink filesystem. + */ +static void +xfs_scrub_inode_check_reflink_iflag( + struct xfs_scrub_context *sc, + xfs_ino_t ino) +{ + struct xfs_mount *mp = sc->mp; + bool has_shared; + int error; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return; + + error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, + &has_shared); + if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), + XFS_INO_TO_AGBNO(mp, ino), &error)) + return; + if (xfs_is_reflink_inode(sc->ip) && !has_shared) + xfs_scrub_ino_set_preen(sc, ino); + else if (!xfs_is_reflink_inode(sc->ip) && has_shared) + xfs_scrub_ino_set_corrupt(sc, ino); +} + +/* Scrub an inode. */ +int +xfs_scrub_inode( + struct xfs_scrub_context *sc) +{ + struct xfs_dinode di; + int error = 0; + + /* + * If sc->ip is NULL, that means that the setup function called + * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED + * and a NULL inode, so flag the corruption error and return. + */ + if (!sc->ip) { + xfs_scrub_ino_set_corrupt(sc, sc->sm->sm_ino); + return 0; + } + + /* Scrub the inode core. */ + xfs_inode_to_disk(sc->ip, &di, 0); + xfs_scrub_dinode(sc, &di, sc->ip->i_ino); + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + + /* + * Look for discrepancies between file's data blocks and the reflink + * iflag. We already checked the iflag against the file mode when + * we scrubbed the dinode. + */ + if (S_ISREG(VFS_I(sc->ip)->i_mode)) + xfs_scrub_inode_check_reflink_iflag(sc, sc->ip->i_ino); + + xfs_scrub_inode_xref(sc, sc->ip->i_ino, &di); +out: + return error; +} diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c new file mode 100644 index 000000000000..1fb88c18d455 --- /dev/null +++ b/fs/xfs/scrub/parent.c @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_ialloc.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" + +/* Set us up to scrub parents. */ +int +xfs_scrub_setup_parent( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + return xfs_scrub_setup_inode_contents(sc, ip, 0); +} + +/* Parent pointers */ + +/* Look for an entry in a parent pointing to this inode. */ + +struct xfs_scrub_parent_ctx { + struct dir_context dc; + xfs_ino_t ino; + xfs_nlink_t nlink; +}; + +/* Look for a single entry in a directory pointing to an inode. */ +STATIC int +xfs_scrub_parent_actor( + struct dir_context *dc, + const char *name, + int namelen, + loff_t pos, + u64 ino, + unsigned type) +{ + struct xfs_scrub_parent_ctx *spc; + + spc = container_of(dc, struct xfs_scrub_parent_ctx, dc); + if (spc->ino == ino) + spc->nlink++; + return 0; +} + +/* Count the number of dentries in the parent dir that point to this inode. */ +STATIC int +xfs_scrub_parent_count_parent_dentries( + struct xfs_scrub_context *sc, + struct xfs_inode *parent, + xfs_nlink_t *nlink) +{ + struct xfs_scrub_parent_ctx spc = { + .dc.actor = xfs_scrub_parent_actor, + .dc.pos = 0, + .ino = sc->ip->i_ino, + .nlink = 0, + }; + size_t bufsize; + loff_t oldpos; + uint lock_mode; + int error = 0; + + /* + * If there are any blocks, read-ahead block 0 as we're almost + * certain to have the next operation be a read there. This is + * how we guarantee that the parent's extent map has been loaded, + * if there is one. + */ + lock_mode = xfs_ilock_data_map_shared(parent); + if (parent->i_d.di_nextents > 0) + error = xfs_dir3_data_readahead(parent, 0, -1); + xfs_iunlock(parent, lock_mode); + if (error) + return error; + + /* + * Iterate the parent dir to confirm that there is + * exactly one entry pointing back to the inode being + * scanned. + */ + bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, + parent->i_d.di_size); + oldpos = 0; + while (true) { + error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); + if (error) + goto out; + if (oldpos == spc.dc.pos) + break; + oldpos = spc.dc.pos; + } + *nlink = spc.nlink; +out: + return error; +} + +/* + * Given the inode number of the alleged parent of the inode being + * scrubbed, try to validate that the parent has exactly one directory + * entry pointing back to the inode being scrubbed. + */ +STATIC int +xfs_scrub_parent_validate( + struct xfs_scrub_context *sc, + xfs_ino_t dnum, + bool *try_again) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_inode *dp = NULL; + xfs_nlink_t expected_nlink; + xfs_nlink_t nlink; + int error = 0; + + *try_again = false; + + /* '..' must not point to ourselves. */ + if (sc->ip->i_ino == dnum) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out; + } + + /* + * If we're an unlinked directory, the parent /won't/ have a link + * to us. Otherwise, it should have one link. + */ + expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; + + /* + * Grab this parent inode. We release the inode before we + * cancel the scrub transaction. Since we're don't know a + * priori that releasing the inode won't trigger eofblocks + * cleanup (which allocates what would be a nested transaction) + * if the parent pointer erroneously points to a file, we + * can't use DONTCACHE here because DONTCACHE inodes can trigger + * immediate inactive cleanup of the inode. + * + * If _iget returns -EINVAL then the parent inode number is garbage + * and the directory is corrupt. If the _iget returns -EFSCORRUPTED + * or -EFSBADCRC then the parent is corrupt which is a cross + * referencing error. Any other error is an operational error. + */ + error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); + if (error == -EINVAL) { + error = -EFSCORRUPTED; + xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); + goto out; + } + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) + goto out; + if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out_rele; + } + + /* + * We prefer to keep the inode locked while we lock and search + * its alleged parent for a forward reference. If we can grab + * the iolock, validate the pointers and we're done. We must + * use nowait here to avoid an ABBA deadlock on the parent and + * the child inodes. + */ + if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { + error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, + &error)) + goto out_unlock; + if (nlink != expected_nlink) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out_unlock; + } + + /* + * The game changes if we get here. We failed to lock the parent, + * so we're going to try to verify both pointers while only holding + * one lock so as to avoid deadlocking with something that's actually + * trying to traverse down the directory tree. + */ + xfs_iunlock(sc->ip, sc->ilock_flags); + sc->ilock_flags = 0; + xfs_ilock(dp, XFS_IOLOCK_SHARED); + + /* Go looking for our dentry. */ + error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) + goto out_unlock; + + /* Drop the parent lock, relock this inode. */ + xfs_iunlock(dp, XFS_IOLOCK_SHARED); + sc->ilock_flags = XFS_IOLOCK_EXCL; + xfs_ilock(sc->ip, sc->ilock_flags); + + /* + * If we're an unlinked directory, the parent /won't/ have a link + * to us. Otherwise, it should have one link. We have to re-set + * it here because we dropped the lock on sc->ip. + */ + expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; + + /* Look up '..' to see if the inode changed. */ + error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + goto out_rele; + + /* Drat, parent changed. Try again! */ + if (dnum != dp->i_ino) { + iput(VFS_I(dp)); + *try_again = true; + return 0; + } + iput(VFS_I(dp)); + + /* + * '..' didn't change, so check that there was only one entry + * for us in the parent. + */ + if (nlink != expected_nlink) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + return error; + +out_unlock: + xfs_iunlock(dp, XFS_IOLOCK_SHARED); +out_rele: + iput(VFS_I(dp)); +out: + return error; +} + +/* Scrub a parent pointer. */ +int +xfs_scrub_parent( + struct xfs_scrub_context *sc) +{ + struct xfs_mount *mp = sc->mp; + xfs_ino_t dnum; + bool try_again; + int tries = 0; + int error = 0; + + /* + * If we're a directory, check that the '..' link points up to + * a directory that has one entry pointing to us. + */ + if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) + return -ENOENT; + + /* We're not a special inode, are we? */ + if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out; + } + + /* + * The VFS grabs a read or write lock via i_rwsem before it reads + * or writes to a directory. If we've gotten this far we've + * already obtained IOLOCK_EXCL, which (since 4.10) is the same as + * getting a write lock on i_rwsem. Therefore, it is safe for us + * to drop the ILOCK here in order to do directory lookups. + */ + sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); + xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); + + /* Look up '..' */ + error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + goto out; + if (!xfs_verify_dir_ino(mp, dnum)) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out; + } + + /* Is this the root dir? Then '..' must point to itself. */ + if (sc->ip == mp->m_rootip) { + if (sc->ip->i_ino != mp->m_sb.sb_rootino || + sc->ip->i_ino != dnum) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out; + } + + do { + error = xfs_scrub_parent_validate(sc, dnum, &try_again); + if (error) + goto out; + } while (try_again && ++tries < 20); + + /* + * We gave it our best shot but failed, so mark this scrub + * incomplete. Userspace can decide if it wants to try again. + */ + if (try_again && tries == 20) + xfs_scrub_set_incomplete(sc); +out: + return error; +} diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c new file mode 100644 index 000000000000..6ba465e6c885 --- /dev/null +++ b/fs/xfs/scrub/quota.c @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_inode_fork.h" +#include "xfs_alloc.h" +#include "xfs_bmap.h" +#include "xfs_quota.h" +#include "xfs_qm.h" +#include "xfs_dquot.h" +#include "xfs_dquot_item.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" + +/* Convert a scrub type code to a DQ flag, or return 0 if error. */ +static inline uint +xfs_scrub_quota_to_dqtype( + struct xfs_scrub_context *sc) +{ + switch (sc->sm->sm_type) { + case XFS_SCRUB_TYPE_UQUOTA: + return XFS_DQ_USER; + case XFS_SCRUB_TYPE_GQUOTA: + return XFS_DQ_GROUP; + case XFS_SCRUB_TYPE_PQUOTA: + return XFS_DQ_PROJ; + default: + return 0; + } +} + +/* Set us up to scrub a quota. */ +int +xfs_scrub_setup_quota( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + uint dqtype; + + dqtype = xfs_scrub_quota_to_dqtype(sc); + if (dqtype == 0) + return -EINVAL; + if (!xfs_this_quota_on(sc->mp, dqtype)) + return -ENOENT; + return 0; +} + +/* Quotas. */ + +/* Scrub the fields in an individual quota item. */ +STATIC void +xfs_scrub_quota_item( + struct xfs_scrub_context *sc, + uint dqtype, + struct xfs_dquot *dq, + xfs_dqid_t id) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_disk_dquot *d = &dq->q_core; + struct xfs_quotainfo *qi = mp->m_quotainfo; + xfs_fileoff_t offset; + unsigned long long bsoft; + unsigned long long isoft; + unsigned long long rsoft; + unsigned long long bhard; + unsigned long long ihard; + unsigned long long rhard; + unsigned long long bcount; + unsigned long long icount; + unsigned long long rcount; + xfs_ino_t fs_icount; + + offset = id / qi->qi_dqperchunk; + + /* + * We fed $id and DQNEXT into the xfs_qm_dqget call, which means + * that the actual dquot we got must either have the same id or + * the next higher id. + */ + if (id > be32_to_cpu(d->d_id)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + + /* Did we get the dquot type we wanted? */ + if (dqtype != (d->d_flags & XFS_DQ_ALLTYPES)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + + if (d->d_pad0 != cpu_to_be32(0) || d->d_pad != cpu_to_be16(0)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + + /* Check the limits. */ + bhard = be64_to_cpu(d->d_blk_hardlimit); + ihard = be64_to_cpu(d->d_ino_hardlimit); + rhard = be64_to_cpu(d->d_rtb_hardlimit); + + bsoft = be64_to_cpu(d->d_blk_softlimit); + isoft = be64_to_cpu(d->d_ino_softlimit); + rsoft = be64_to_cpu(d->d_rtb_softlimit); + + /* + * Warn if the hard limits are larger than the fs. + * Administrators can do this, though in production this seems + * suspect, which is why we flag it for review. + * + * Complain about corruption if the soft limit is greater than + * the hard limit. + */ + if (bhard > mp->m_sb.sb_dblocks) + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + if (bsoft > bhard) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + + if (ihard > mp->m_maxicount) + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + if (isoft > ihard) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + + if (rhard > mp->m_sb.sb_rblocks) + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + if (rsoft > rhard) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + + /* Check the resource counts. */ + bcount = be64_to_cpu(d->d_bcount); + icount = be64_to_cpu(d->d_icount); + rcount = be64_to_cpu(d->d_rtbcount); + fs_icount = percpu_counter_sum(&mp->m_icount); + + /* + * Check that usage doesn't exceed physical limits. However, on + * a reflink filesystem we're allowed to exceed physical space + * if there are no quota limits. + */ + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (mp->m_sb.sb_dblocks < bcount) + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, + offset); + } else { + if (mp->m_sb.sb_dblocks < bcount) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, + offset); + } + if (icount > fs_icount || rcount > mp->m_sb.sb_rblocks) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + + /* + * We can violate the hard limits if the admin suddenly sets a + * lower limit than the actual usage. However, we flag it for + * admin review. + */ + if (id != 0 && bhard != 0 && bcount > bhard) + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + if (id != 0 && ihard != 0 && icount > ihard) + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + if (id != 0 && rhard != 0 && rcount > rhard) + xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); +} + +/* Scrub all of a quota type's items. */ +int +xfs_scrub_quota( + struct xfs_scrub_context *sc) +{ + struct xfs_bmbt_irec irec = { 0 }; + struct xfs_mount *mp = sc->mp; + struct xfs_inode *ip; + struct xfs_quotainfo *qi = mp->m_quotainfo; + struct xfs_dquot *dq; + xfs_fileoff_t max_dqid_off; + xfs_fileoff_t off = 0; + xfs_dqid_t id = 0; + uint dqtype; + int nimaps; + int error = 0; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return -ENOENT; + + mutex_lock(&qi->qi_quotaofflock); + dqtype = xfs_scrub_quota_to_dqtype(sc); + if (!xfs_this_quota_on(sc->mp, dqtype)) { + error = -ENOENT; + goto out_unlock_quota; + } + + /* Attach to the quota inode and set sc->ip so that reporting works. */ + ip = xfs_quota_inode(sc->mp, dqtype); + sc->ip = ip; + + /* Look for problem extents. */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) { + xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + goto out_unlock_inode; + } + max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk; + while (1) { + if (xfs_scrub_should_terminate(sc, &error)) + break; + + off = irec.br_startoff + irec.br_blockcount; + nimaps = 1; + error = xfs_bmapi_read(ip, off, -1, &irec, &nimaps, + XFS_BMAPI_ENTIRE); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, off, + &error)) + goto out_unlock_inode; + if (!nimaps) + break; + if (irec.br_startblock == HOLESTARTBLOCK) + continue; + + /* Check the extent record doesn't point to crap. */ + if (irec.br_startblock + irec.br_blockcount <= + irec.br_startblock) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, + irec.br_startoff); + if (!xfs_verify_fsbno(mp, irec.br_startblock) || + !xfs_verify_fsbno(mp, irec.br_startblock + + irec.br_blockcount - 1)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, + irec.br_startoff); + + /* + * Unwritten extents or blocks mapped above the highest + * quota id shouldn't happen. + */ + if (isnullstartblock(irec.br_startblock) || + irec.br_startoff > max_dqid_off || + irec.br_startoff + irec.br_blockcount > max_dqid_off + 1) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, off); + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + + /* Check all the quota items. */ + while (id < ((xfs_dqid_t)-1ULL)) { + if (xfs_scrub_should_terminate(sc, &error)) + break; + + error = xfs_qm_dqget(mp, NULL, id, dqtype, XFS_QMOPT_DQNEXT, + &dq); + if (error == -ENOENT) + break; + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, + id * qi->qi_dqperchunk, &error)) + break; + + xfs_scrub_quota_item(sc, dqtype, dq, id); + + id = be32_to_cpu(dq->q_core.d_id) + 1; + xfs_qm_dqput(dq); + if (!id) + break; + } + +out: + /* We set sc->ip earlier, so make sure we clear it now. */ + sc->ip = NULL; +out_unlock_quota: + mutex_unlock(&qi->qi_quotaofflock); + return error; + +out_unlock_inode: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + goto out; +} diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c new file mode 100644 index 000000000000..400f1561cd3d --- /dev/null +++ b/fs/xfs/scrub/refcount.c @@ -0,0 +1,515 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_alloc.h" +#include "xfs_rmap.h" +#include "xfs_refcount.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" + +/* + * Set us up to scrub reference count btrees. + */ +int +xfs_scrub_setup_ag_refcountbt( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + return xfs_scrub_setup_ag_btree(sc, ip, false); +} + +/* Reference count btree scrubber. */ + +/* + * Confirming Reference Counts via Reverse Mappings + * + * We want to count the reverse mappings overlapping a refcount record + * (bno, len, refcount), allowing for the possibility that some of the + * overlap may come from smaller adjoining reverse mappings, while some + * comes from single extents which overlap the range entirely. The + * outer loop is as follows: + * + * 1. For all reverse mappings overlapping the refcount extent, + * a. If a given rmap completely overlaps, mark it as seen. + * b. Otherwise, record the fragment (in agbno order) for later + * processing. + * + * Once we've seen all the rmaps, we know that for all blocks in the + * refcount record we want to find $refcount owners and we've already + * visited $seen extents that overlap all the blocks. Therefore, we + * need to find ($refcount - $seen) owners for every block in the + * extent; call that quantity $target_nr. Proceed as follows: + * + * 2. Pull the first $target_nr fragments from the list; all of them + * should start at or before the start of the extent. + * Call this subset of fragments the working set. + * 3. Until there are no more unprocessed fragments, + * a. Find the shortest fragments in the set and remove them. + * b. Note the block number of the end of these fragments. + * c. Pull the same number of fragments from the list. All of these + * fragments should start at the block number recorded in the + * previous step. + * d. Put those fragments in the set. + * 4. Check that there are $target_nr fragments remaining in the list, + * and that they all end at or beyond the end of the refcount extent. + * + * If the refcount is correct, all the check conditions in the algorithm + * should always hold true. If not, the refcount is incorrect. + */ +struct xfs_scrub_refcnt_frag { + struct list_head list; + struct xfs_rmap_irec rm; +}; + +struct xfs_scrub_refcnt_check { + struct xfs_scrub_context *sc; + struct list_head fragments; + + /* refcount extent we're examining */ + xfs_agblock_t bno; + xfs_extlen_t len; + xfs_nlink_t refcount; + + /* number of owners seen */ + xfs_nlink_t seen; +}; + +/* + * Decide if the given rmap is large enough that we can redeem it + * towards refcount verification now, or if it's a fragment, in + * which case we'll hang onto it in the hopes that we'll later + * discover that we've collected exactly the correct number of + * fragments as the refcountbt says we should have. + */ +STATIC int +xfs_scrub_refcountbt_rmap_check( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_scrub_refcnt_check *refchk = priv; + struct xfs_scrub_refcnt_frag *frag; + xfs_agblock_t rm_last; + xfs_agblock_t rc_last; + int error = 0; + + if (xfs_scrub_should_terminate(refchk->sc, &error)) + return error; + + rm_last = rec->rm_startblock + rec->rm_blockcount - 1; + rc_last = refchk->bno + refchk->len - 1; + + /* Confirm that a single-owner refc extent is a CoW stage. */ + if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { + xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0); + return 0; + } + + if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) { + /* + * The rmap overlaps the refcount record, so we can confirm + * one refcount owner seen. + */ + refchk->seen++; + } else { + /* + * This rmap covers only part of the refcount record, so + * save the fragment for later processing. If the rmapbt + * is healthy each rmap_irec we see will be in agbno order + * so we don't need insertion sort here. + */ + frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag), + KM_MAYFAIL | KM_NOFS); + if (!frag) + return -ENOMEM; + memcpy(&frag->rm, rec, sizeof(frag->rm)); + list_add_tail(&frag->list, &refchk->fragments); + } + + return 0; +} + +/* + * Given a bunch of rmap fragments, iterate through them, keeping + * a running tally of the refcount. If this ever deviates from + * what we expect (which is the refcountbt's refcount minus the + * number of extents that totally covered the refcountbt extent), + * we have a refcountbt error. + */ +STATIC void +xfs_scrub_refcountbt_process_rmap_fragments( + struct xfs_scrub_refcnt_check *refchk) +{ + struct list_head worklist; + struct xfs_scrub_refcnt_frag *frag; + struct xfs_scrub_refcnt_frag *n; + xfs_agblock_t bno; + xfs_agblock_t rbno; + xfs_agblock_t next_rbno; + xfs_nlink_t nr; + xfs_nlink_t target_nr; + + target_nr = refchk->refcount - refchk->seen; + if (target_nr == 0) + return; + + /* + * There are (refchk->rc.rc_refcount - refchk->nr refcount) + * references we haven't found yet. Pull that many off the + * fragment list and figure out where the smallest rmap ends + * (and therefore the next rmap should start). All the rmaps + * we pull off should start at or before the beginning of the + * refcount record's range. + */ + INIT_LIST_HEAD(&worklist); + rbno = NULLAGBLOCK; + nr = 1; + + /* Make sure the fragments actually /are/ in agbno order. */ + bno = 0; + list_for_each_entry(frag, &refchk->fragments, list) { + if (frag->rm.rm_startblock < bno) + goto done; + bno = frag->rm.rm_startblock; + } + + /* + * Find all the rmaps that start at or before the refc extent, + * and put them on the worklist. + */ + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { + if (frag->rm.rm_startblock > refchk->bno) + goto done; + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; + if (bno < rbno) + rbno = bno; + list_move_tail(&frag->list, &worklist); + if (nr == target_nr) + break; + nr++; + } + + /* + * We should have found exactly $target_nr rmap fragments starting + * at or before the refcount extent. + */ + if (nr != target_nr) + goto done; + + while (!list_empty(&refchk->fragments)) { + /* Discard any fragments ending at rbno from the worklist. */ + nr = 0; + next_rbno = NULLAGBLOCK; + list_for_each_entry_safe(frag, n, &worklist, list) { + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; + if (bno != rbno) { + if (bno < next_rbno) + next_rbno = bno; + continue; + } + list_del(&frag->list); + kmem_free(frag); + nr++; + } + + /* Try to add nr rmaps starting at rbno to the worklist. */ + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; + if (frag->rm.rm_startblock != rbno) + goto done; + list_move_tail(&frag->list, &worklist); + if (next_rbno > bno) + next_rbno = bno; + nr--; + if (nr == 0) + break; + } + + /* + * If we get here and nr > 0, this means that we added fewer + * items to the worklist than we discarded because the fragment + * list ran out of items. Therefore, we cannot maintain the + * required refcount. Something is wrong, so we're done. + */ + if (nr) + goto done; + + rbno = next_rbno; + } + + /* + * Make sure the last extent we processed ends at or beyond + * the end of the refcount extent. + */ + if (rbno < refchk->bno + refchk->len) + goto done; + + /* Actually record us having seen the remaining refcount. */ + refchk->seen = refchk->refcount; +done: + /* Delete fragments and work list. */ + list_for_each_entry_safe(frag, n, &worklist, list) { + list_del(&frag->list); + kmem_free(frag); + } + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { + list_del(&frag->list); + kmem_free(frag); + } +} + +/* Use the rmap entries covering this extent to verify the refcount. */ +STATIC void +xfs_scrub_refcountbt_xref_rmap( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + xfs_nlink_t refcount) +{ + struct xfs_scrub_refcnt_check refchk = { + .sc = sc, + .bno = bno, + .len = len, + .refcount = refcount, + .seen = 0, + }; + struct xfs_rmap_irec low; + struct xfs_rmap_irec high; + struct xfs_scrub_refcnt_frag *frag; + struct xfs_scrub_refcnt_frag *n; + int error; + + if (!sc->sa.rmap_cur) + return; + + /* Cross-reference with the rmapbt to confirm the refcount. */ + memset(&low, 0, sizeof(low)); + low.rm_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.rm_startblock = bno + len - 1; + + INIT_LIST_HEAD(&refchk.fragments); + error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, + &xfs_scrub_refcountbt_rmap_check, &refchk); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + goto out_free; + + xfs_scrub_refcountbt_process_rmap_fragments(&refchk); + if (refcount != refchk.seen) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + +out_free: + list_for_each_entry_safe(frag, n, &refchk.fragments, list) { + list_del(&frag->list); + kmem_free(frag); + } +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_refcountbt_xref( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len, + xfs_nlink_t refcount) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, len); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount); +} + +/* Scrub a refcountbt record. */ +STATIC int +xfs_scrub_refcountbt_rec( + struct xfs_scrub_btree *bs, + union xfs_btree_rec *rec) +{ + struct xfs_mount *mp = bs->cur->bc_mp; + xfs_agblock_t *cow_blocks = bs->private; + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agblock_t bno; + xfs_extlen_t len; + xfs_nlink_t refcount; + bool has_cowflag; + int error = 0; + + bno = be32_to_cpu(rec->refc.rc_startblock); + len = be32_to_cpu(rec->refc.rc_blockcount); + refcount = be32_to_cpu(rec->refc.rc_refcount); + + /* Only CoW records can have refcount == 1. */ + has_cowflag = (bno & XFS_REFC_COW_START); + if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + if (has_cowflag) + (*cow_blocks) += len; + + /* Check the extent. */ + bno &= ~XFS_REFC_COW_START; + if (bno + len <= bno || + !xfs_verify_agbno(mp, agno, bno) || + !xfs_verify_agbno(mp, agno, bno + len - 1)) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + if (refcount == 0) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount); + + return error; +} + +/* Make sure we have as many refc blocks as the rmap says. */ +STATIC void +xfs_scrub_refcount_xref_rmap( + struct xfs_scrub_context *sc, + struct xfs_owner_info *oinfo, + xfs_filblks_t cow_blocks) +{ + xfs_extlen_t refcbt_blocks = 0; + xfs_filblks_t blocks; + int error; + + if (!sc->sa.rmap_cur) + return; + + /* Check that we saw as many refcbt blocks as the rmap knows about. */ + error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); + if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) + return; + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != refcbt_blocks) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + + /* Check that we saw as many cow blocks as the rmap knows about. */ + xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW); + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != cow_blocks) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} + +/* Scrub the refcount btree for some AG. */ +int +xfs_scrub_refcountbt( + struct xfs_scrub_context *sc) +{ + struct xfs_owner_info oinfo; + xfs_agblock_t cow_blocks = 0; + int error; + + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); + error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, + &oinfo, &cow_blocks); + if (error) + return error; + + xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks); + + return 0; +} + +/* xref check that a cow staging extent is marked in the refcountbt. */ +void +xfs_scrub_xref_is_cow_staging( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + struct xfs_refcount_irec rc; + bool has_cowflag; + int has_refcount; + int error; + + if (!sc->sa.refc_cur) + return; + + /* Find the CoW staging extent. */ + error = xfs_refcount_lookup_le(sc->sa.refc_cur, + agbno + XFS_REFC_COW_START, &has_refcount); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (!has_refcount) { + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + return; + } + + error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (!has_refcount) { + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + return; + } + + /* CoW flag must be set, refcount must be 1. */ + has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); + if (!has_cowflag || rc.rc_refcount != 1) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + + /* Must be at least as long as what was passed in */ + if (rc.rc_blockcount < len) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); +} + +/* + * xref check that the extent is not shared. Only file data blocks + * can have multiple owners. + */ +void +xfs_scrub_xref_is_not_shared( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + bool shared; + int error; + + if (!sc->sa.refc_cur) + return; + + error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (shared) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); +} diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c new file mode 100644 index 000000000000..8f2a7c3ff455 --- /dev/null +++ b/fs/xfs/scrub/rmap.c @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" +#include "xfs_rmap.h" +#include "xfs_refcount.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" + +/* + * Set us up to scrub reverse mapping btrees. + */ +int +xfs_scrub_setup_ag_rmapbt( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + return xfs_scrub_setup_ag_btree(sc, ip, false); +} + +/* Reverse-mapping scrubber. */ + +/* Cross-reference a rmap against the refcount btree. */ +STATIC void +xfs_scrub_rmapbt_xref_refc( + struct xfs_scrub_context *sc, + struct xfs_rmap_irec *irec) +{ + xfs_agblock_t fbno; + xfs_extlen_t flen; + bool non_inode; + bool is_bmbt; + bool is_attr; + bool is_unwritten; + int error; + + if (!sc->sa.refc_cur) + return; + + non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner); + is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK; + is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK; + is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN; + + /* If this is shared, must be a data fork extent. */ + error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock, + irec->rm_blockcount, &fbno, &flen, false); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten)) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); +} + +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_rmapbt_xref( + struct xfs_scrub_context *sc, + struct xfs_rmap_irec *irec) +{ + xfs_agblock_t agbno = irec->rm_startblock; + xfs_extlen_t len = irec->rm_blockcount; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, len); + if (irec->rm_owner == XFS_RMAP_OWN_INODES) + xfs_scrub_xref_is_inode_chunk(sc, agbno, len); + else + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + if (irec->rm_owner == XFS_RMAP_OWN_COW) + xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock, + irec->rm_blockcount); + else + xfs_scrub_rmapbt_xref_refc(sc, irec); +} + +/* Scrub an rmapbt record. */ +STATIC int +xfs_scrub_rmapbt_rec( + struct xfs_scrub_btree *bs, + union xfs_btree_rec *rec) +{ + struct xfs_mount *mp = bs->cur->bc_mp; + struct xfs_rmap_irec irec; + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + bool non_inode; + bool is_unwritten; + bool is_bmbt; + bool is_attr; + int error; + + error = xfs_rmap_btrec_to_irec(rec, &irec); + if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error)) + goto out; + + /* Check extent. */ + if (irec.rm_startblock + irec.rm_blockcount <= irec.rm_startblock) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + if (irec.rm_owner == XFS_RMAP_OWN_FS) { + /* + * xfs_verify_agbno returns false for static fs metadata. + * Since that only exists at the start of the AG, validate + * that by hand. + */ + if (irec.rm_startblock != 0 || + irec.rm_blockcount != XFS_AGFL_BLOCK(mp) + 1) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + } else { + /* + * Otherwise we must point somewhere past the static metadata + * but before the end of the FS. Run the regular check. + */ + if (!xfs_verify_agbno(mp, agno, irec.rm_startblock) || + !xfs_verify_agbno(mp, agno, irec.rm_startblock + + irec.rm_blockcount - 1)) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + } + + /* Check flags. */ + non_inode = XFS_RMAP_NON_INODE_OWNER(irec.rm_owner); + is_bmbt = irec.rm_flags & XFS_RMAP_BMBT_BLOCK; + is_attr = irec.rm_flags & XFS_RMAP_ATTR_FORK; + is_unwritten = irec.rm_flags & XFS_RMAP_UNWRITTEN; + + if (is_bmbt && irec.rm_offset != 0) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + if (non_inode && irec.rm_offset != 0) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + if (is_unwritten && (is_bmbt || non_inode || is_attr)) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + if (non_inode && (is_bmbt || is_unwritten || is_attr)) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + + if (!non_inode) { + if (!xfs_verify_ino(mp, irec.rm_owner)) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + } else { + /* Non-inode owner within the magic values? */ + if (irec.rm_owner <= XFS_RMAP_OWN_MIN || + irec.rm_owner > XFS_RMAP_OWN_FS) + xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + } + + xfs_scrub_rmapbt_xref(bs->sc, &irec); +out: + return error; +} + +/* Scrub the rmap btree for some AG. */ +int +xfs_scrub_rmapbt( + struct xfs_scrub_context *sc) +{ + struct xfs_owner_info oinfo; + + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); + return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec, + &oinfo, NULL); +} + +/* xref check that the extent is owned by a given owner */ +static inline void +xfs_scrub_xref_check_owner( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo, + bool should_have_rmap) +{ + bool has_rmap; + int error; + + if (!sc->sa.rmap_cur) + return; + + error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo, + &has_rmap); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (has_rmap != should_have_rmap) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} + +/* xref check that the extent is owned by a given owner */ +void +xfs_scrub_xref_is_owned_by( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) +{ + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true); +} + +/* xref check that the extent is not owned by a given owner */ +void +xfs_scrub_xref_is_not_owned_by( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) +{ + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false); +} + +/* xref check that the extent has no reverse mapping at all */ +void +xfs_scrub_xref_has_no_owner( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len) +{ + bool has_rmap; + int error; + + if (!sc->sa.rmap_cur) + return; + + error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (has_rmap) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c new file mode 100644 index 000000000000..39c41dfe08ee --- /dev/null +++ b/fs/xfs/scrub/rtbitmap.c @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_inode.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" + +/* Set us up with the realtime metadata locked. */ +int +xfs_scrub_setup_rt( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + int error; + + error = xfs_scrub_setup_fs(sc, ip); + if (error) + return error; + + sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP; + sc->ip = sc->mp->m_rbmip; + xfs_ilock(sc->ip, sc->ilock_flags); + + return 0; +} + +/* Realtime bitmap. */ + +/* Scrub a free extent record from the realtime bitmap. */ +STATIC int +xfs_scrub_rtbitmap_rec( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_scrub_context *sc = priv; + + if (rec->ar_startblock + rec->ar_blockcount <= rec->ar_startblock || + !xfs_verify_rtbno(sc->mp, rec->ar_startblock) || + !xfs_verify_rtbno(sc->mp, rec->ar_startblock + + rec->ar_blockcount - 1)) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + return 0; +} + +/* Scrub the realtime bitmap. */ +int +xfs_scrub_rtbitmap( + struct xfs_scrub_context *sc) +{ + int error; + + error = xfs_rtalloc_query_all(sc->tp, xfs_scrub_rtbitmap_rec, sc); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + goto out; + +out: + return error; +} + +/* Scrub the realtime summary. */ +int +xfs_scrub_rtsummary( + struct xfs_scrub_context *sc) +{ + /* XXX: implement this some day */ + return -ENOENT; +} + + +/* xref check that the extent is not free in the rtbitmap */ +void +xfs_scrub_xref_is_used_rt_space( + struct xfs_scrub_context *sc, + xfs_rtblock_t fsbno, + xfs_extlen_t len) +{ + bool is_free; + int error; + + xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); + error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len, + &is_free); + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + goto out_unlock; + if (is_free) + xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino); +out_unlock: + xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); +} diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c new file mode 100644 index 000000000000..26c75967a072 --- /dev/null +++ b/fs/xfs/scrub/scrub.c @@ -0,0 +1,462 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_itable.h" +#include "xfs_alloc.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" +#include "xfs_refcount.h" +#include "xfs_refcount_btree.h" +#include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/btree.h" + +/* + * Online Scrub and Repair + * + * Traditionally, XFS (the kernel driver) did not know how to check or + * repair on-disk data structures. That task was left to the xfs_check + * and xfs_repair tools, both of which require taking the filesystem + * offline for a thorough but time consuming examination. Online + * scrub & repair, on the other hand, enables us to check the metadata + * for obvious errors while carefully stepping around the filesystem's + * ongoing operations, locking rules, etc. + * + * Given that most XFS metadata consist of records stored in a btree, + * most of the checking functions iterate the btree blocks themselves + * looking for irregularities. When a record block is encountered, each + * record can be checked for obviously bad values. Record values can + * also be cross-referenced against other btrees to look for potential + * misunderstandings between pieces of metadata. + * + * It is expected that the checkers responsible for per-AG metadata + * structures will lock the AG headers (AGI, AGF, AGFL), iterate the + * metadata structure, and perform any relevant cross-referencing before + * unlocking the AG and returning the results to userspace. These + * scrubbers must not keep an AG locked for too long to avoid tying up + * the block and inode allocators. + * + * Block maps and b-trees rooted in an inode present a special challenge + * because they can involve extents from any AG. The general scrubber + * structure of lock -> check -> xref -> unlock still holds, but AG + * locking order rules /must/ be obeyed to avoid deadlocks. The + * ordering rule, of course, is that we must lock in increasing AG + * order. Helper functions are provided to track which AG headers we've + * already locked. If we detect an imminent locking order violation, we + * can signal a potential deadlock, in which case the scrubber can jump + * out to the top level, lock all the AGs in order, and retry the scrub. + * + * For file data (directories, extended attributes, symlinks) scrub, we + * can simply lock the inode and walk the data. For btree data + * (directories and attributes) we follow the same btree-scrubbing + * strategy outlined previously to check the records. + * + * We use a bit of trickery with transactions to avoid buffer deadlocks + * if there is a cycle in the metadata. The basic problem is that + * travelling down a btree involves locking the current buffer at each + * tree level. If a pointer should somehow point back to a buffer that + * we've already examined, we will deadlock due to the second buffer + * locking attempt. Note however that grabbing a buffer in transaction + * context links the locked buffer to the transaction. If we try to + * re-grab the buffer in the context of the same transaction, we avoid + * the second lock attempt and continue. Between the verifier and the + * scrubber, something will notice that something is amiss and report + * the corruption. Therefore, each scrubber will allocate an empty + * transaction, attach buffers to it, and cancel the transaction at the + * end of the scrub run. Cancelling a non-dirty transaction simply + * unlocks the buffers. + * + * There are four pieces of data that scrub can communicate to + * userspace. The first is the error code (errno), which can be used to + * communicate operational errors in performing the scrub. There are + * also three flags that can be set in the scrub context. If the data + * structure itself is corrupt, the CORRUPT flag will be set. If + * the metadata is correct but otherwise suboptimal, the PREEN flag + * will be set. + * + * We perform secondary validation of filesystem metadata by + * cross-referencing every record with all other available metadata. + * For example, for block mapping extents, we verify that there are no + * records in the free space and inode btrees corresponding to that + * space extent and that there is a corresponding entry in the reverse + * mapping btree. Inconsistent metadata is noted by setting the + * XCORRUPT flag; btree query function errors are noted by setting the + * XFAIL flag and deleting the cursor to prevent further attempts to + * cross-reference with a defective btree. + */ + +/* + * Scrub probe -- userspace uses this to probe if we're willing to scrub + * or repair a given mountpoint. This will be used by xfs_scrub to + * probe the kernel's abilities to scrub (and repair) the metadata. We + * do this by validating the ioctl inputs from userspace, preparing the + * filesystem for a scrub (or a repair) operation, and immediately + * returning to userspace. Userspace can use the returned errno and + * structure state to decide (in broad terms) if scrub/repair are + * supported by the running kernel. + */ +static int +xfs_scrub_probe( + struct xfs_scrub_context *sc) +{ + int error = 0; + + if (xfs_scrub_should_terminate(sc, &error)) + return error; + + return 0; +} + +/* Scrub setup and teardown */ + +/* Free all the resources and finish the transactions. */ +STATIC int +xfs_scrub_teardown( + struct xfs_scrub_context *sc, + struct xfs_inode *ip_in, + int error) +{ + xfs_scrub_ag_free(sc, &sc->sa); + if (sc->tp) { + xfs_trans_cancel(sc->tp); + sc->tp = NULL; + } + if (sc->ip) { + if (sc->ilock_flags) + xfs_iunlock(sc->ip, sc->ilock_flags); + if (sc->ip != ip_in && + !xfs_internal_inum(sc->mp, sc->ip->i_ino)) + iput(VFS_I(sc->ip)); + sc->ip = NULL; + } + if (sc->buf) { + kmem_free(sc->buf); + sc->buf = NULL; + } + return error; +} + +/* Scrubbing dispatch. */ + +static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { + [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ + .type = ST_NONE, + .setup = xfs_scrub_setup_fs, + .scrub = xfs_scrub_probe, + }, + [XFS_SCRUB_TYPE_SB] = { /* superblock */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, + .scrub = xfs_scrub_superblock, + }, + [XFS_SCRUB_TYPE_AGF] = { /* agf */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, + .scrub = xfs_scrub_agf, + }, + [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, + .scrub = xfs_scrub_agfl, + }, + [XFS_SCRUB_TYPE_AGI] = { /* agi */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, + .scrub = xfs_scrub_agi, + }, + [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_ag_allocbt, + .scrub = xfs_scrub_bnobt, + }, + [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_ag_allocbt, + .scrub = xfs_scrub_cntbt, + }, + [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_ag_iallocbt, + .scrub = xfs_scrub_inobt, + }, + [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_ag_iallocbt, + .scrub = xfs_scrub_finobt, + .has = xfs_sb_version_hasfinobt, + }, + [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_ag_rmapbt, + .scrub = xfs_scrub_rmapbt, + .has = xfs_sb_version_hasrmapbt, + }, + [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ + .type = ST_PERAG, + .setup = xfs_scrub_setup_ag_refcountbt, + .scrub = xfs_scrub_refcountbt, + .has = xfs_sb_version_hasreflink, + }, + [XFS_SCRUB_TYPE_INODE] = { /* inode record */ + .type = ST_INODE, + .setup = xfs_scrub_setup_inode, + .scrub = xfs_scrub_inode, + }, + [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ + .type = ST_INODE, + .setup = xfs_scrub_setup_inode_bmap, + .scrub = xfs_scrub_bmap_data, + }, + [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ + .type = ST_INODE, + .setup = xfs_scrub_setup_inode_bmap, + .scrub = xfs_scrub_bmap_attr, + }, + [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ + .type = ST_INODE, + .setup = xfs_scrub_setup_inode_bmap, + .scrub = xfs_scrub_bmap_cow, + }, + [XFS_SCRUB_TYPE_DIR] = { /* directory */ + .type = ST_INODE, + .setup = xfs_scrub_setup_directory, + .scrub = xfs_scrub_directory, + }, + [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ + .type = ST_INODE, + .setup = xfs_scrub_setup_xattr, + .scrub = xfs_scrub_xattr, + }, + [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ + .type = ST_INODE, + .setup = xfs_scrub_setup_symlink, + .scrub = xfs_scrub_symlink, + }, + [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ + .type = ST_INODE, + .setup = xfs_scrub_setup_parent, + .scrub = xfs_scrub_parent, + }, + [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ + .type = ST_FS, + .setup = xfs_scrub_setup_rt, + .scrub = xfs_scrub_rtbitmap, + .has = xfs_sb_version_hasrealtime, + }, + [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ + .type = ST_FS, + .setup = xfs_scrub_setup_rt, + .scrub = xfs_scrub_rtsummary, + .has = xfs_sb_version_hasrealtime, + }, + [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ + .type = ST_FS, + .setup = xfs_scrub_setup_quota, + .scrub = xfs_scrub_quota, + }, + [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ + .type = ST_FS, + .setup = xfs_scrub_setup_quota, + .scrub = xfs_scrub_quota, + }, + [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ + .type = ST_FS, + .setup = xfs_scrub_setup_quota, + .scrub = xfs_scrub_quota, + }, +}; + +/* This isn't a stable feature, warn once per day. */ +static inline void +xfs_scrub_experimental_warning( + struct xfs_mount *mp) +{ + static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT( + "xfs_scrub_warning", 86400 * HZ, 1); + ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE); + + if (__ratelimit(&scrub_warning)) + xfs_alert(mp, +"EXPERIMENTAL online scrub feature in use. Use at your own risk!"); +} + +static int +xfs_scrub_validate_inputs( + struct xfs_mount *mp, + struct xfs_scrub_metadata *sm) +{ + int error; + const struct xfs_scrub_meta_ops *ops; + + error = -EINVAL; + /* Check our inputs. */ + sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; + if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) + goto out; + /* sm_reserved[] must be zero */ + if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) + goto out; + + error = -ENOENT; + /* Do we know about this type of metadata? */ + if (sm->sm_type >= XFS_SCRUB_TYPE_NR) + goto out; + ops = &meta_scrub_ops[sm->sm_type]; + if (ops->setup == NULL || ops->scrub == NULL) + goto out; + /* Does this fs even support this type of metadata? */ + if (ops->has && !ops->has(&mp->m_sb)) + goto out; + + error = -EINVAL; + /* restricting fields must be appropriate for type */ + switch (ops->type) { + case ST_NONE: + case ST_FS: + if (sm->sm_ino || sm->sm_gen || sm->sm_agno) + goto out; + break; + case ST_PERAG: + if (sm->sm_ino || sm->sm_gen || + sm->sm_agno >= mp->m_sb.sb_agcount) + goto out; + break; + case ST_INODE: + if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) + goto out; + break; + default: + goto out; + } + + error = -EOPNOTSUPP; + /* + * We won't scrub any filesystem that doesn't have the ability + * to record unwritten extents. The option was made default in + * 2003, removed from mkfs in 2007, and cannot be disabled in + * v5, so if we find a filesystem without this flag it's either + * really old or totally unsupported. Avoid it either way. + * We also don't support v1-v3 filesystems, which aren't + * mountable. + */ + if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) + goto out; + + /* We don't know how to repair anything yet. */ + if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) + goto out; + + error = 0; +out: + return error; +} + +/* Dispatch metadata scrubbing. */ +int +xfs_scrub_metadata( + struct xfs_inode *ip, + struct xfs_scrub_metadata *sm) +{ + struct xfs_scrub_context sc; + struct xfs_mount *mp = ip->i_mount; + bool try_harder = false; + int error = 0; + + BUILD_BUG_ON(sizeof(meta_scrub_ops) != + (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR)); + + trace_xfs_scrub_start(ip, sm, error); + + /* Forbidden if we are shut down or mounted norecovery. */ + error = -ESHUTDOWN; + if (XFS_FORCED_SHUTDOWN(mp)) + goto out; + error = -ENOTRECOVERABLE; + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + goto out; + + error = xfs_scrub_validate_inputs(mp, sm); + if (error) + goto out; + + xfs_scrub_experimental_warning(mp); + +retry_op: + /* Set up for the operation. */ + memset(&sc, 0, sizeof(sc)); + sc.mp = ip->i_mount; + sc.sm = sm; + sc.ops = &meta_scrub_ops[sm->sm_type]; + sc.try_harder = try_harder; + sc.sa.agno = NULLAGNUMBER; + error = sc.ops->setup(&sc, ip); + if (error) + goto out_teardown; + + /* Scrub for errors. */ + error = sc.ops->scrub(&sc); + if (!try_harder && error == -EDEADLOCK) { + /* + * Scrubbers return -EDEADLOCK to mean 'try harder'. + * Tear down everything we hold, then set up again with + * preparation for worst-case scenarios. + */ + error = xfs_scrub_teardown(&sc, ip, 0); + if (error) + goto out; + try_harder = true; + goto retry_op; + } else if (error) + goto out_teardown; + + if (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | + XFS_SCRUB_OFLAG_XCORRUPT)) + xfs_alert_ratelimited(mp, "Corruption detected during scrub."); + +out_teardown: + error = xfs_scrub_teardown(&sc, ip, error); +out: + trace_xfs_scrub_done(ip, sm, error); + if (error == -EFSCORRUPTED || error == -EFSBADCRC) { + sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + error = 0; + } + return error; +} diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h new file mode 100644 index 000000000000..0d92af86f67a --- /dev/null +++ b/fs/xfs/scrub/scrub.h @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_SCRUB_SCRUB_H__ +#define __XFS_SCRUB_SCRUB_H__ + +struct xfs_scrub_context; + +/* Type info and names for the scrub types. */ +enum xfs_scrub_type { + ST_NONE = 1, /* disabled */ + ST_PERAG, /* per-AG metadata */ + ST_FS, /* per-FS metadata */ + ST_INODE, /* per-inode metadata */ +}; + +struct xfs_scrub_meta_ops { + /* Acquire whatever resources are needed for the operation. */ + int (*setup)(struct xfs_scrub_context *, + struct xfs_inode *); + + /* Examine metadata for errors. */ + int (*scrub)(struct xfs_scrub_context *); + + /* Decide if we even have this piece of metadata. */ + bool (*has)(struct xfs_sb *); + + /* type describing required/allowed inputs */ + enum xfs_scrub_type type; +}; + +/* Buffer pointers and btree cursors for an entire AG. */ +struct xfs_scrub_ag { + xfs_agnumber_t agno; + + /* AG btree roots */ + struct xfs_buf *agf_bp; + struct xfs_buf *agfl_bp; + struct xfs_buf *agi_bp; + + /* AG btrees */ + struct xfs_btree_cur *bno_cur; + struct xfs_btree_cur *cnt_cur; + struct xfs_btree_cur *ino_cur; + struct xfs_btree_cur *fino_cur; + struct xfs_btree_cur *rmap_cur; + struct xfs_btree_cur *refc_cur; +}; + +struct xfs_scrub_context { + /* General scrub state. */ + struct xfs_mount *mp; + struct xfs_scrub_metadata *sm; + const struct xfs_scrub_meta_ops *ops; + struct xfs_trans *tp; + struct xfs_inode *ip; + void *buf; + uint ilock_flags; + bool try_harder; + + /* State tracking for single-AG operations. */ + struct xfs_scrub_ag sa; +}; + +/* Metadata scrubbers */ +int xfs_scrub_tester(struct xfs_scrub_context *sc); +int xfs_scrub_superblock(struct xfs_scrub_context *sc); +int xfs_scrub_agf(struct xfs_scrub_context *sc); +int xfs_scrub_agfl(struct xfs_scrub_context *sc); +int xfs_scrub_agi(struct xfs_scrub_context *sc); +int xfs_scrub_bnobt(struct xfs_scrub_context *sc); +int xfs_scrub_cntbt(struct xfs_scrub_context *sc); +int xfs_scrub_inobt(struct xfs_scrub_context *sc); +int xfs_scrub_finobt(struct xfs_scrub_context *sc); +int xfs_scrub_rmapbt(struct xfs_scrub_context *sc); +int xfs_scrub_refcountbt(struct xfs_scrub_context *sc); +int xfs_scrub_inode(struct xfs_scrub_context *sc); +int xfs_scrub_bmap_data(struct xfs_scrub_context *sc); +int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc); +int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc); +int xfs_scrub_directory(struct xfs_scrub_context *sc); +int xfs_scrub_xattr(struct xfs_scrub_context *sc); +int xfs_scrub_symlink(struct xfs_scrub_context *sc); +int xfs_scrub_parent(struct xfs_scrub_context *sc); +#ifdef CONFIG_XFS_RT +int xfs_scrub_rtbitmap(struct xfs_scrub_context *sc); +int xfs_scrub_rtsummary(struct xfs_scrub_context *sc); +#else +static inline int +xfs_scrub_rtbitmap(struct xfs_scrub_context *sc) +{ + return -ENOENT; +} +static inline int +xfs_scrub_rtsummary(struct xfs_scrub_context *sc) +{ + return -ENOENT; +} +#endif +#ifdef CONFIG_XFS_QUOTA +int xfs_scrub_quota(struct xfs_scrub_context *sc); +#else +static inline int +xfs_scrub_quota(struct xfs_scrub_context *sc) +{ + return -ENOENT; +} +#endif + +/* cross-referencing helpers */ +void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len, + struct xfs_owner_info *oinfo); +void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len, + struct xfs_owner_info *oinfo); +void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc, + xfs_agblock_t bno, xfs_extlen_t len); +void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc, + xfs_agblock_t bno, xfs_extlen_t len); +#ifdef CONFIG_XFS_RT +void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc, + xfs_rtblock_t rtbno, xfs_extlen_t len); +#else +# define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) +#endif + +#endif /* __XFS_SCRUB_SCRUB_H__ */ diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c new file mode 100644 index 000000000000..3aa3d60f7c16 --- /dev/null +++ b/fs/xfs/scrub/symlink.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_inode_fork.h" +#include "xfs_symlink.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" + +/* Set us up to scrub a symbolic link. */ +int +xfs_scrub_setup_symlink( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + /* Allocate the buffer without the inode lock held. */ + sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, KM_SLEEP); + if (!sc->buf) + return -ENOMEM; + + return xfs_scrub_setup_inode_contents(sc, ip, 0); +} + +/* Symbolic links. */ + +int +xfs_scrub_symlink( + struct xfs_scrub_context *sc) +{ + struct xfs_inode *ip = sc->ip; + struct xfs_ifork *ifp; + loff_t len; + int error = 0; + + if (!S_ISLNK(VFS_I(ip)->i_mode)) + return -ENOENT; + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + len = ip->i_d.di_size; + + /* Plausible size? */ + if (len > XFS_SYMLINK_MAXLEN || len <= 0) { + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out; + } + + /* Inline symlink? */ + if (ifp->if_flags & XFS_IFINLINE) { + if (len > XFS_IFORK_DSIZE(ip) || + len > strnlen(ifp->if_u1.if_data, XFS_IFORK_DSIZE(ip))) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out; + } + + /* Remote symlink; must read the contents. */ + error = xfs_readlink_bmap_ilocked(sc->ip, sc->buf); + if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + goto out; + if (strnlen(sc->buf, XFS_SYMLINK_MAXLEN) < len) + xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); +out: + return error; +} diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c new file mode 100644 index 000000000000..86daed0e3a45 --- /dev/null +++ b/fs/xfs/scrub/trace.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_da_format.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_trans.h" +#include "xfs_bit.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" + +/* Figure out which block the btree cursor was pointing to. */ +static inline xfs_fsblock_t +xfs_scrub_btree_cur_fsbno( + struct xfs_btree_cur *cur, + int level) +{ + if (level < cur->bc_nlevels && cur->bc_bufs[level]) + return XFS_DADDR_TO_FSB(cur->bc_mp, cur->bc_bufs[level]->b_bn); + else if (level == cur->bc_nlevels - 1 && + cur->bc_flags & XFS_BTREE_LONG_PTRS) + return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_private.b.ip->i_ino); + else if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) + return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, 0); + return NULLFSBLOCK; +} + +/* + * We include this last to have the helpers above available for the trace + * event implementations. + */ +#define CREATE_TRACE_POINTS +#include "scrub/trace.h" diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h new file mode 100644 index 000000000000..5d2b1c241be5 --- /dev/null +++ b/fs/xfs/scrub/trace.h @@ -0,0 +1,500 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xfs_scrub + +#if !defined(_TRACE_XFS_SCRUB_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_XFS_SCRUB_TRACE_H + +#include <linux/tracepoint.h> +#include "xfs_bit.h" + +DECLARE_EVENT_CLASS(xfs_scrub_class, + TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, + int error), + TP_ARGS(ip, sm, error), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, type) + __field(xfs_agnumber_t, agno) + __field(xfs_ino_t, inum) + __field(unsigned int, gen) + __field(unsigned int, flags) + __field(int, error) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->type = sm->sm_type; + __entry->agno = sm->sm_agno; + __entry->inum = sm->sm_ino; + __entry->gen = sm->sm_gen; + __entry->flags = sm->sm_flags; + __entry->error = error; + ), + TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->type, + __entry->agno, + __entry->inum, + __entry->gen, + __entry->flags, + __entry->error) +) +#define DEFINE_SCRUB_EVENT(name) \ +DEFINE_EVENT(xfs_scrub_class, name, \ + TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, \ + int error), \ + TP_ARGS(ip, sm, error)) + +DEFINE_SCRUB_EVENT(xfs_scrub_start); +DEFINE_SCRUB_EVENT(xfs_scrub_done); +DEFINE_SCRUB_EVENT(xfs_scrub_deadlock_retry); + +TRACE_EVENT(xfs_scrub_op_error, + TP_PROTO(struct xfs_scrub_context *sc, xfs_agnumber_t agno, + xfs_agblock_t bno, int error, void *ret_ip), + TP_ARGS(sc, agno, bno, error, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, bno) + __field(int, error) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->agno = agno; + __entry->bno = bno; + __entry->error = error; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->agno, + __entry->bno, + __entry->error, + __entry->ret_ip) +); + +TRACE_EVENT(xfs_scrub_file_op_error, + TP_PROTO(struct xfs_scrub_context *sc, int whichfork, + xfs_fileoff_t offset, int error, void *ret_ip), + TP_ARGS(sc, whichfork, offset, error, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, whichfork) + __field(unsigned int, type) + __field(xfs_fileoff_t, offset) + __field(int, error) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = sc->ip->i_mount->m_super->s_dev; + __entry->ino = sc->ip->i_ino; + __entry->whichfork = whichfork; + __entry->type = sc->sm->sm_type; + __entry->offset = offset; + __entry->error = error; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->whichfork, + __entry->type, + __entry->offset, + __entry->error, + __entry->ret_ip) +); + +DECLARE_EVENT_CLASS(xfs_scrub_block_error_class, + TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, void *ret_ip), + TP_ARGS(sc, daddr, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, bno) + __field(void *, ret_ip) + ), + TP_fast_assign( + xfs_fsblock_t fsbno; + xfs_agnumber_t agno; + xfs_agblock_t bno; + + fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr); + agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); + bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); + + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->agno = agno; + __entry->bno = bno; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->agno, + __entry->bno, + __entry->ret_ip) +) + +#define DEFINE_SCRUB_BLOCK_ERROR_EVENT(name) \ +DEFINE_EVENT(xfs_scrub_block_error_class, name, \ + TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, \ + void *ret_ip), \ + TP_ARGS(sc, daddr, ret_ip)) + +DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error); +DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen); + +DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class, + TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, void *ret_ip), + TP_ARGS(sc, ino, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, type) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->ino = ino; + __entry->type = sc->sm->sm_type; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d ino 0x%llx type %u ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->type, + __entry->ret_ip) +) + +#define DEFINE_SCRUB_INO_ERROR_EVENT(name) \ +DEFINE_EVENT(xfs_scrub_ino_error_class, name, \ + TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \ + void *ret_ip), \ + TP_ARGS(sc, ino, ret_ip)) + +DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error); +DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen); +DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_warning); + +DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class, + TP_PROTO(struct xfs_scrub_context *sc, int whichfork, + xfs_fileoff_t offset, void *ret_ip), + TP_ARGS(sc, whichfork, offset, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, whichfork) + __field(unsigned int, type) + __field(xfs_fileoff_t, offset) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = sc->ip->i_mount->m_super->s_dev; + __entry->ino = sc->ip->i_ino; + __entry->whichfork = whichfork; + __entry->type = sc->sm->sm_type; + __entry->offset = offset; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->whichfork, + __entry->type, + __entry->offset, + __entry->ret_ip) +); + +#define DEFINE_SCRUB_FBLOCK_ERROR_EVENT(name) \ +DEFINE_EVENT(xfs_scrub_fblock_error_class, name, \ + TP_PROTO(struct xfs_scrub_context *sc, int whichfork, \ + xfs_fileoff_t offset, void *ret_ip), \ + TP_ARGS(sc, whichfork, offset, ret_ip)) + +DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_error); +DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_warning); + +TRACE_EVENT(xfs_scrub_incomplete, + TP_PROTO(struct xfs_scrub_context *sc, void *ret_ip), + TP_ARGS(sc, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d type %u ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->ret_ip) +); + +TRACE_EVENT(xfs_scrub_btree_op_error, + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, + int level, int error, void *ret_ip), + TP_ARGS(sc, cur, level, error, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(xfs_btnum_t, btnum) + __field(int, level) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, bno) + __field(int, ptr); + __field(int, error) + __field(void *, ret_ip) + ), + TP_fast_assign( + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->btnum = cur->bc_btnum; + __entry->level = level; + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); + __entry->ptr = cur->bc_ptrs[level]; + __entry->error = error; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->btnum, + __entry->level, + __entry->ptr, + __entry->agno, + __entry->bno, + __entry->error, + __entry->ret_ip) +); + +TRACE_EVENT(xfs_scrub_ifork_btree_op_error, + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, + int level, int error, void *ret_ip), + TP_ARGS(sc, cur, level, error, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, whichfork) + __field(unsigned int, type) + __field(xfs_btnum_t, btnum) + __field(int, level) + __field(int, ptr) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, bno) + __field(int, error) + __field(void *, ret_ip) + ), + TP_fast_assign( + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + __entry->dev = sc->mp->m_super->s_dev; + __entry->ino = sc->ip->i_ino; + __entry->whichfork = cur->bc_private.b.whichfork; + __entry->type = sc->sm->sm_type; + __entry->btnum = cur->bc_btnum; + __entry->level = level; + __entry->ptr = cur->bc_ptrs[level]; + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); + __entry->error = error; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->whichfork, + __entry->type, + __entry->btnum, + __entry->level, + __entry->ptr, + __entry->agno, + __entry->bno, + __entry->error, + __entry->ret_ip) +); + +TRACE_EVENT(xfs_scrub_btree_error, + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, + int level, void *ret_ip), + TP_ARGS(sc, cur, level, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(xfs_btnum_t, btnum) + __field(int, level) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, bno) + __field(int, ptr); + __field(void *, ret_ip) + ), + TP_fast_assign( + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->btnum = cur->bc_btnum; + __entry->level = level; + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); + __entry->ptr = cur->bc_ptrs[level]; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->btnum, + __entry->level, + __entry->ptr, + __entry->agno, + __entry->bno, + __entry->ret_ip) +); + +TRACE_EVENT(xfs_scrub_ifork_btree_error, + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, + int level, void *ret_ip), + TP_ARGS(sc, cur, level, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, whichfork) + __field(unsigned int, type) + __field(xfs_btnum_t, btnum) + __field(int, level) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, bno) + __field(int, ptr); + __field(void *, ret_ip) + ), + TP_fast_assign( + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + __entry->dev = sc->mp->m_super->s_dev; + __entry->ino = sc->ip->i_ino; + __entry->whichfork = cur->bc_private.b.whichfork; + __entry->type = sc->sm->sm_type; + __entry->btnum = cur->bc_btnum; + __entry->level = level; + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); + __entry->ptr = cur->bc_ptrs[level]; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->whichfork, + __entry->type, + __entry->btnum, + __entry->level, + __entry->ptr, + __entry->agno, + __entry->bno, + __entry->ret_ip) +); + +DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class, + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, + int level), + TP_ARGS(sc, cur, level), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(xfs_btnum_t, btnum) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, bno) + __field(int, level) + __field(int, nlevels) + __field(int, ptr) + ), + TP_fast_assign( + xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->btnum = cur->bc_btnum; + __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); + __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); + __entry->level = level; + __entry->nlevels = cur->bc_nlevels; + __entry->ptr = cur->bc_ptrs[level]; + ), + TP_printk("dev %d:%d type %u btnum %d agno %u agbno %u level %d nlevels %d ptr %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->btnum, + __entry->agno, + __entry->bno, + __entry->level, + __entry->nlevels, + __entry->ptr) +) +#define DEFINE_SCRUB_SBTREE_EVENT(name) \ +DEFINE_EVENT(xfs_scrub_sbtree_class, name, \ + TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, \ + int level), \ + TP_ARGS(sc, cur, level)) + +DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec); +DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key); + +TRACE_EVENT(xfs_scrub_xref_error, + TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip), + TP_ARGS(sc, error, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(int, error) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->error = error; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d type %u xref error %d ret_ip %pF", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->error, + __entry->ret_ip) +); + +#endif /* _TRACE_XFS_SCRUB_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE scrub/trace +#include <trace/define_trace.h> diff --git a/fs/xfs/scrub/xfs_scrub.h b/fs/xfs/scrub/xfs_scrub.h new file mode 100644 index 000000000000..e00e0eadac6a --- /dev/null +++ b/fs/xfs/scrub/xfs_scrub.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_SCRUB_H__ +#define __XFS_SCRUB_H__ + +#ifndef CONFIG_XFS_ONLINE_SCRUB +# define xfs_scrub_metadata(ip, sm) (-ENOTTY) +#else +int xfs_scrub_metadata(struct xfs_inode *ip, struct xfs_scrub_metadata *sm); +#endif /* CONFIG_XFS_ONLINE_SCRUB */ + +#endif /* __XFS_SCRUB_H__ */ |