summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/repair.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub/repair.c')
-rw-r--r--fs/xfs/scrub/repair.c1201
1 files changed, 821 insertions, 380 deletions
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 1c8eecfe52b8..efd5a7ccdf62 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -1,7 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Copyright (C) 2018 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
@@ -9,34 +9,49 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
+#include "xfs_rtbitmap.h"
#include "xfs_extent_busy.h"
+#include "xfs_ag.h"
#include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
#include "xfs_quota.h"
-#include "xfs_attr.h"
+#include "xfs_qm.h"
+#include "xfs_defer.h"
+#include "xfs_errortag.h"
+#include "xfs_error.h"
#include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
+#include "xfs_health.h"
+#include "xfs_buf_mem.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_dir2.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtalloc.h"
+#include "xfs_metafile.h"
+#include "xfs_rtrefcount_btree.h"
+#include "xfs_zone_alloc.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
+#include "scrub/stats.h"
+#include "scrub/xfile.h"
+#include "scrub/attr_repair.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -45,20 +60,24 @@
*/
int
xrep_attempt(
- struct xfs_inode *ip,
struct xfs_scrub *sc,
- bool *fixed)
+ struct xchk_stats_run *run)
{
+ u64 repair_start;
int error = 0;
- trace_xrep_attempt(ip, sc->sm, error);
+ trace_xrep_attempt(XFS_I(file_inode(sc->file)), sc->sm, error);
xchk_ag_btcur_free(&sc->sa);
+ xchk_rtgroup_btcur_free(&sc->sr);
/* Repair whatever's broken. */
ASSERT(sc->ops->repair);
+ run->repair_attempted = true;
+ repair_start = xchk_stats_now();
error = sc->ops->repair(sc);
- trace_xrep_done(ip, sc->sm, error);
+ trace_xrep_done(XFS_I(file_inode(sc->file)), sc->sm, error);
+ run->repair_ns += xchk_stats_elapsed_ns(repair_start);
switch (error) {
case 0:
/*
@@ -66,22 +85,32 @@ xrep_attempt(
* scrub so that we can tell userspace if we fixed the problem.
*/
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
- *fixed = true;
+ sc->flags |= XREP_ALREADY_FIXED;
+ run->repair_succeeded = true;
+ return -EAGAIN;
+ case -ECHRNG:
+ sc->flags |= XCHK_NEED_DRAIN;
+ run->retries++;
return -EAGAIN;
case -EDEADLOCK:
- case -EAGAIN:
/* Tell the caller to try again having grabbed all the locks. */
- if (!sc->try_harder) {
- sc->try_harder = true;
+ if (!(sc->flags & XCHK_TRY_HARDER)) {
+ sc->flags |= XCHK_TRY_HARDER;
+ run->retries++;
return -EAGAIN;
}
/*
* We tried harder but still couldn't grab all the resources
* we needed to fix it. The corruption has not been fixed,
- * so report back to userspace.
+ * so exit to userspace with the scan's output flags unchanged.
*/
- return -EFSCORRUPTED;
+ return 0;
default:
+ /*
+ * EAGAIN tells the caller to re-scrub, so we cannot return
+ * that here.
+ */
+ ASSERT(error != -EAGAIN);
return error;
}
}
@@ -129,43 +158,102 @@ xrep_roll_ag_trans(
{
int error;
- /* Keep the AG header buffers locked so we can keep going. */
- if (sc->sa.agi_bp)
+ /*
+ * Keep the AG header buffers locked while we roll the transaction.
+ * Ensure that both AG buffers are dirty and held when we roll the
+ * transaction so that they move forward in the log without losing the
+ * bli (and hence the bli type) when the transaction commits.
+ *
+ * Normal code would never hold clean buffers across a roll, but repair
+ * needs both buffers to maintain a total lock on the AG.
+ */
+ if (sc->sa.agi_bp) {
+ xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
- if (sc->sa.agf_bp)
+ }
+
+ if (sc->sa.agf_bp) {
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
- if (sc->sa.agfl_bp)
- xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
+ }
- /* Roll the transaction. */
+ /*
+ * Roll the transaction. We still hold the AG header buffers locked
+ * regardless of whether or not that succeeds. On failure, the buffers
+ * will be released during teardown on our way out of the kernel. If
+ * successful, join the buffers to the new transaction and move on.
+ */
error = xfs_trans_roll(&sc->tp);
if (error)
- goto out_release;
+ return error;
- /* Join AG headers to the new transaction. */
+ /* Join the AG headers to the new transaction. */
if (sc->sa.agi_bp)
xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
if (sc->sa.agf_bp)
xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
- if (sc->sa.agfl_bp)
- xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
return 0;
+}
+
+/* Roll the scrub transaction, holding the primary metadata locked. */
+int
+xrep_roll_trans(
+ struct xfs_scrub *sc)
+{
+ if (!sc->ip)
+ return xrep_roll_ag_trans(sc);
+ return xfs_trans_roll_inode(&sc->tp, sc->ip);
+}
+
+/* Finish all deferred work attached to the repair transaction. */
+int
+xrep_defer_finish(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ /*
+ * Keep the AG header buffers locked while we complete deferred work
+ * items. Ensure that both AG buffers are dirty and held when we roll
+ * the transaction so that they move forward in the log without losing
+ * the bli (and hence the bli type) when the transaction commits.
+ *
+ * Normal code would never hold clean buffers across a roll, but repair
+ * needs both buffers to maintain a total lock on the AG.
+ */
+ if (sc->sa.agi_bp) {
+ xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
+ xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
+ }
+
+ if (sc->sa.agf_bp) {
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
+ xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+ }
+
+ /*
+ * Finish all deferred work items. We still hold the AG header buffers
+ * locked regardless of whether or not that succeeds. On failure, the
+ * buffers will be released during teardown on our way out of the
+ * kernel. If successful, join the buffers to the new transaction
+ * and move on.
+ */
+ error = xfs_defer_finish(&sc->tp);
+ if (error)
+ return error;
-out_release:
/*
- * Rolling failed, so release the hold on the buffers. The
- * buffers will be released during teardown on our way out
- * of the kernel.
+ * Release the hold that we set above because defer_finish won't do
+ * that for us. The defer roll code redirties held buffers after each
+ * roll, so the AG header buffers should be ready for logging.
*/
if (sc->sa.agi_bp)
xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
if (sc->sa.agf_bp)
xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
- if (sc->sa.agfl_bp)
- xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
- return error;
+ return 0;
}
/*
@@ -211,12 +299,12 @@ xrep_calc_ag_resblks(
return 0;
pag = xfs_perag_get(mp, sm->sm_agno);
- if (pag->pagi_init) {
+ if (xfs_perag_initialised_agi(pag)) {
/* Use in-core icount if possible. */
icount = pag->pagi_count;
} else {
/* Try to get the actual counters from disk. */
- error = xfs_ialloc_read_agi(mp, NULL, sm->sm_agno, &bp);
+ error = xfs_ialloc_read_agi(pag, NULL, 0, &bp);
if (!error) {
icount = pag->pagi_count;
xfs_buf_relse(bp);
@@ -224,35 +312,36 @@ xrep_calc_ag_resblks(
}
/* Now grab the block counters from the AGF. */
- error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp);
- if (!error) {
- aglen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_length);
- freelen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_freeblks);
+ error = xfs_alloc_read_agf(pag, NULL, 0, &bp);
+ if (error) {
+ aglen = pag_group(pag)->xg_block_count;
+ freelen = aglen;
+ usedlen = aglen;
+ } else {
+ struct xfs_agf *agf = bp->b_addr;
+
+ aglen = be32_to_cpu(agf->agf_length);
+ freelen = be32_to_cpu(agf->agf_freeblks);
usedlen = aglen - freelen;
xfs_buf_relse(bp);
}
- xfs_perag_put(pag);
/* If the icount is impossible, make some worst-case assumptions. */
if (icount == NULLAGINO ||
- !xfs_verify_agino(mp, sm->sm_agno, icount)) {
- xfs_agino_t first, last;
-
- xfs_agino_range(mp, sm->sm_agno, &first, &last);
- icount = last - first + 1;
+ !xfs_verify_agino(pag, icount)) {
+ icount = pag->agino_max - pag->agino_min + 1;
}
/* If the block counts are impossible, make worst-case assumptions. */
if (aglen == NULLAGBLOCK ||
- aglen != xfs_ag_block_count(mp, sm->sm_agno) ||
+ aglen != pag_group(pag)->xg_block_count ||
freelen >= aglen) {
- aglen = xfs_ag_block_count(mp, sm->sm_agno);
+ aglen = pag_group(pag)->xg_block_count;
freelen = aglen;
usedlen = aglen;
}
- trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
- freelen, usedlen);
+ trace_xrep_calc_ag_resblks(pag, icount, aglen, freelen, usedlen);
/*
* Figure out how many blocks we'd need worst case to rebuild
@@ -260,19 +349,19 @@ xrep_calc_ag_resblks(
* bnobt/cntbt or inobt/finobt as pairs.
*/
bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen);
- if (xfs_sb_version_hassparseinodes(&mp->m_sb))
+ if (xfs_has_sparseinodes(mp))
inobt_sz = xfs_iallocbt_calc_size(mp, icount /
XFS_INODES_PER_HOLEMASK_BIT);
else
inobt_sz = xfs_iallocbt_calc_size(mp, icount /
XFS_INODES_PER_CHUNK);
- if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ if (xfs_has_finobt(mp))
inobt_sz *= 2;
- if (xfs_sb_version_hasreflink(&mp->m_sb))
+ if (xfs_has_reflink(mp))
refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen);
else
refcbt_sz = 0;
- if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ if (xfs_has_rmapbt(mp)) {
/*
* Guess how many blocks we need to rebuild the rmapbt.
* For non-reflink filesystems we can't have more records than
@@ -281,7 +370,7 @@ xrep_calc_ag_resblks(
* many rmaps there could be in the AG, so we start off with
* what we hope is an generous over-estimation.
*/
- if (xfs_sb_version_hasreflink(&mp->m_sb))
+ if (xfs_has_reflink(mp))
rmapbt_sz = xfs_rmapbt_calc_size(mp,
(unsigned long long)aglen * 2);
else
@@ -290,91 +379,47 @@ xrep_calc_ag_resblks(
rmapbt_sz = 0;
}
- trace_xrep_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
- inobt_sz, rmapbt_sz, refcbt_sz);
+ trace_xrep_calc_ag_resblks_btsize(pag, bnobt_sz, inobt_sz, rmapbt_sz,
+ refcbt_sz);
+ xfs_perag_put(pag);
return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
}
-/* Allocate a block in an AG. */
-int
-xrep_alloc_ag_block(
- struct xfs_scrub *sc,
- const struct xfs_owner_info *oinfo,
- xfs_fsblock_t *fsbno,
- enum xfs_ag_resv_type resv)
+#ifdef CONFIG_XFS_RT
+/*
+ * Figure out how many blocks to reserve for a rtgroup repair. We calculate
+ * the worst case estimate for the number of blocks we'd need to rebuild one of
+ * any type of per-rtgroup btree.
+ */
+xfs_extlen_t
+xrep_calc_rtgroup_resblks(
+ struct xfs_scrub *sc)
{
- struct xfs_alloc_arg args = {0};
- xfs_agblock_t bno;
- int error;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_scrub_metadata *sm = sc->sm;
+ uint64_t usedlen;
+ xfs_extlen_t rmapbt_sz = 0;
- switch (resv) {
- case XFS_AG_RESV_AGFL:
- case XFS_AG_RESV_RMAPBT:
- error = xfs_alloc_get_freelist(sc->tp, sc->sa.agf_bp, &bno, 1);
- if (error)
- return error;
- if (bno == NULLAGBLOCK)
- return -ENOSPC;
- xfs_extent_busy_reuse(sc->mp, sc->sa.agno, bno,
- 1, false);
- *fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, bno);
- if (resv == XFS_AG_RESV_RMAPBT)
- xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.agno);
+ if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
return 0;
- default:
- break;
+ if (!xfs_has_rtgroups(mp)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
}
- args.tp = sc->tp;
- args.mp = sc->mp;
- args.oinfo = *oinfo;
- args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.agno, 0);
- args.minlen = 1;
- args.maxlen = 1;
- args.prod = 1;
- args.type = XFS_ALLOCTYPE_THIS_AG;
- args.resv = resv;
-
- error = xfs_alloc_vextent(&args);
- if (error)
- return error;
- if (args.fsbno == NULLFSBLOCK)
- return -ENOSPC;
- ASSERT(args.len == 1);
- *fsbno = args.fsbno;
+ usedlen = xfs_rtbxlen_to_blen(mp, xfs_rtgroup_extents(mp, sm->sm_agno));
+ ASSERT(usedlen <= XFS_MAX_RGBLOCKS);
- return 0;
-}
-
-/* Initialize a new AG btree root block with zero entries. */
-int
-xrep_init_btblock(
- struct xfs_scrub *sc,
- xfs_fsblock_t fsb,
- struct xfs_buf **bpp,
- xfs_btnum_t btnum,
- const struct xfs_buf_ops *ops)
-{
- struct xfs_trans *tp = sc->tp;
- struct xfs_mount *mp = sc->mp;
- struct xfs_buf *bp;
-
- trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
- XFS_FSB_TO_AGBNO(mp, fsb), btnum);
+ if (xfs_has_rmapbt(mp))
+ rmapbt_sz = xfs_rtrmapbt_calc_size(mp, usedlen);
- ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
- XFS_FSB_TO_BB(mp, 1), 0);
- xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
- xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0);
- xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
- xfs_trans_log_buf(tp, bp, 0, bp->b_length);
- bp->b_ops = ops;
- *bpp = bp;
+ trace_xrep_calc_rtgroup_resblks_btsize(mp, sm->sm_agno, usedlen,
+ rmapbt_sz);
- return 0;
+ return rmapbt_sz;
}
+#endif /* CONFIG_XFS_RT */
/*
* Reconstructing per-AG Btrees
@@ -400,243 +445,23 @@ xrep_init_btblock(
* sublist. As with the other btrees we subtract sublist from bitmap, and the
* result (since the rmapbt lives in the free space) are the blocks from the
* old rmapbt.
- *
- * Disposal of Blocks from Old per-AG Btrees
- *
- * Now that we've constructed a new btree to replace the damaged one, we want
- * to dispose of the blocks that (we think) the old btree was using.
- * Previously, we used the rmapbt to collect the extents (bitmap) with the
- * rmap owner corresponding to the tree we rebuilt, collected extents for any
- * blocks with the same rmap owner that are owned by another data structure
- * (sublist), and subtracted sublist from bitmap. In theory the extents
- * remaining in bitmap are the old btree's blocks.
- *
- * Unfortunately, it's possible that the btree was crosslinked with other
- * blocks on disk. The rmap data can tell us if there are multiple owners, so
- * if the rmapbt says there is an owner of this block other than @oinfo, then
- * the block is crosslinked. Remove the reverse mapping and continue.
- *
- * If there is one rmap record, we can free the block, which removes the
- * reverse mapping but doesn't add the block to the free space. Our repair
- * strategy is to hope the other metadata objects crosslinked on this block
- * will be rebuilt (atop different blocks), thereby removing all the cross
- * links.
- *
- * If there are no rmap records at all, we also free the block. If the btree
- * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
- * supposed to be a rmap record and everything is ok. For other btrees there
- * had to have been an rmap entry for the block to have ended up on @bitmap,
- * so if it's gone now there's something wrong and the fs will shut down.
- *
- * Note: If there are multiple rmap records with only the same rmap owner as
- * the btree we're trying to rebuild and the block is indeed owned by another
- * data structure with the same rmap owner, then the block will be in sublist
- * and therefore doesn't need disposal. If there are multiple rmap records
- * with only the same rmap owner but the block is not owned by something with
- * the same rmap owner, the block will be freed.
- *
- * The caller is responsible for locking the AG headers for the entire rebuild
- * operation so that nothing else can sneak in and change the AG state while
- * we're not looking. We also assume that the caller already invalidated any
- * buffers associated with @bitmap.
*/
-/*
- * Invalidate buffers for per-AG btree blocks we're dumping. This function
- * is not intended for use with file data repairs; we have bunmapi for that.
- */
-int
-xrep_invalidate_blocks(
- struct xfs_scrub *sc,
- struct xfs_bitmap *bitmap)
-{
- struct xfs_bitmap_range *bmr;
- struct xfs_bitmap_range *n;
- struct xfs_buf *bp;
- xfs_fsblock_t fsbno;
-
- /*
- * For each block in each extent, see if there's an incore buffer for
- * exactly that block; if so, invalidate it. The buffer cache only
- * lets us look for one buffer at a time, so we have to look one block
- * at a time. Avoid invalidating AG headers and post-EOFS blocks
- * because we never own those; and if we can't TRYLOCK the buffer we
- * assume it's owned by someone else.
- */
- for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) {
- /* Skip AG headers and post-EOFS blocks */
- if (!xfs_verify_fsbno(sc->mp, fsbno))
- continue;
- bp = xfs_buf_incore(sc->mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(sc->mp, fsbno),
- XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK);
- if (bp) {
- xfs_trans_bjoin(sc->tp, bp);
- xfs_trans_binval(sc->tp, bp);
- }
- }
-
- return 0;
-}
-
/* Ensure the freelist is the correct size. */
int
xrep_fix_freelist(
struct xfs_scrub *sc,
- bool can_shrink)
+ int alloc_flags)
{
struct xfs_alloc_arg args = {0};
args.mp = sc->mp;
args.tp = sc->tp;
- args.agno = sc->sa.agno;
+ args.agno = pag_agno(sc->sa.pag);
args.alignment = 1;
args.pag = sc->sa.pag;
- return xfs_alloc_fix_freelist(&args,
- can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
-}
-
-/*
- * Put a block back on the AGFL.
- */
-STATIC int
-xrep_put_freelist(
- struct xfs_scrub *sc,
- xfs_agblock_t agbno)
-{
- int error;
-
- /* Make sure there's space on the freelist. */
- error = xrep_fix_freelist(sc, true);
- if (error)
- return error;
-
- /*
- * Since we're "freeing" a lost block onto the AGFL, we have to
- * create an rmap for the block prior to merging it or else other
- * parts will break.
- */
- error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno, agbno, 1,
- &XFS_RMAP_OINFO_AG);
- if (error)
- return error;
-
- /* Put the block on the AGFL. */
- error = xfs_alloc_put_freelist(sc->tp, sc->sa.agf_bp, sc->sa.agfl_bp,
- agbno, 0);
- if (error)
- return error;
- xfs_extent_busy_insert(sc->tp, sc->sa.agno, agbno, 1,
- XFS_EXTENT_BUSY_SKIP_DISCARD);
-
- return 0;
-}
-
-/* Dispose of a single block. */
-STATIC int
-xrep_reap_block(
- struct xfs_scrub *sc,
- xfs_fsblock_t fsbno,
- const struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type resv)
-{
- struct xfs_btree_cur *cur;
- struct xfs_buf *agf_bp = NULL;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- bool has_other_rmap;
- int error;
-
- agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
- agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
-
- /*
- * If we are repairing per-inode metadata, we need to read in the AGF
- * buffer. Otherwise, we're repairing a per-AG structure, so reuse
- * the AGF buffer that the setup functions already grabbed.
- */
- if (sc->ip) {
- error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf_bp);
- if (error)
- return error;
- if (!agf_bp)
- return -ENOMEM;
- } else {
- agf_bp = sc->sa.agf_bp;
- }
- cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf_bp, agno);
-
- /* Can we find any other rmappings? */
- error = xfs_rmap_has_other_keys(cur, agbno, 1, oinfo, &has_other_rmap);
- xfs_btree_del_cursor(cur, error);
- if (error)
- goto out_free;
-
- /*
- * If there are other rmappings, this block is cross linked and must
- * not be freed. Remove the reverse mapping and move on. Otherwise,
- * we were the only owner of the block, so free the extent, which will
- * also remove the rmap.
- *
- * XXX: XFS doesn't support detecting the case where a single block
- * metadata structure is crosslinked with a multi-block structure
- * because the buffer cache doesn't detect aliasing problems, so we
- * can't fix 100% of crosslinking problems (yet). The verifiers will
- * blow on writeout, the filesystem will shut down, and the admin gets
- * to run xfs_repair.
- */
- if (has_other_rmap)
- error = xfs_rmap_free(sc->tp, agf_bp, agno, agbno, 1, oinfo);
- else if (resv == XFS_AG_RESV_AGFL)
- error = xrep_put_freelist(sc, agbno);
- else
- error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
- if (agf_bp != sc->sa.agf_bp)
- xfs_trans_brelse(sc->tp, agf_bp);
- if (error)
- return error;
-
- if (sc->ip)
- return xfs_trans_roll_inode(&sc->tp, sc->ip);
- return xrep_roll_ag_trans(sc);
-
-out_free:
- if (agf_bp != sc->sa.agf_bp)
- xfs_trans_brelse(sc->tp, agf_bp);
- return error;
-}
-
-/* Dispose of every block of every extent in the bitmap. */
-int
-xrep_reap_extents(
- struct xfs_scrub *sc,
- struct xfs_bitmap *bitmap,
- const struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type type)
-{
- struct xfs_bitmap_range *bmr;
- struct xfs_bitmap_range *n;
- xfs_fsblock_t fsbno;
- int error = 0;
-
- ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
-
- for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) {
- ASSERT(sc->ip != NULL ||
- XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.agno);
- trace_xrep_dispose_btree_extent(sc->mp,
- XFS_FSB_TO_AGNO(sc->mp, fsbno),
- XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
-
- error = xrep_reap_block(sc, fsbno, oinfo, type);
- if (error)
- goto out;
- }
-
-out:
- xfs_bitmap_destroy(bitmap);
- return error;
+ return xfs_alloc_fix_freelist(&args, alloc_flags);
}
/*
@@ -682,7 +507,7 @@ xrep_findroot_agfl_walk(
{
xfs_agblock_t *agbno = priv;
- return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0;
+ return (*agbno == bno) ? -ECANCELED : 0;
}
/* Does this block match the btree information passed in? */
@@ -701,7 +526,7 @@ xrep_findroot_block(
int block_level;
int error = 0;
- daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno);
+ daddr = xfs_agbno_to_daddr(ri->sc->sa.pag, agbno);
/*
* Blocks in the AGFL have stale contents that might just happen to
@@ -712,7 +537,7 @@ xrep_findroot_block(
if (owner == XFS_RMAP_OWN_AG) {
error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
xrep_findroot_agfl_walk, &agbno);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error == -ECANCELED)
return 0;
if (error)
return error;
@@ -743,7 +568,8 @@ xrep_findroot_block(
/* Ensure the block magic matches the btree type we're looking for. */
btblock = XFS_BUF_TO_BLOCK(bp);
- if (be32_to_cpu(btblock->bb_magic) != fab->magic)
+ ASSERT(fab->buf_ops->magic[1] != 0);
+ if (btblock->bb_magic != fab->buf_ops->magic[1])
goto out;
/*
@@ -768,18 +594,23 @@ xrep_findroot_block(
if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
&mp->m_sb.sb_meta_uuid))
goto out;
+ /*
+ * Read verifiers can reference b_ops, so we set the pointer
+ * here. If the verifier fails we'll reset the buffer state
+ * to what it was before we touched the buffer.
+ */
+ bp->b_ops = fab->buf_ops;
fab->buf_ops->verify_read(bp);
if (bp->b_error) {
+ bp->b_ops = NULL;
bp->b_error = 0;
goto out;
}
/*
* Some read verifiers will (re)set b_ops, so we must be
- * careful not to blow away any such assignment.
+ * careful not to change b_ops after running the verifier.
*/
- if (!bp->b_ops)
- bp->b_ops = fab->buf_ops;
}
/*
@@ -824,7 +655,7 @@ xrep_findroot_block(
else
fab->root = NULLAGBLOCK;
- trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno,
+ trace_xrep_findroot_block(ri->sc->sa.pag, agbno,
be32_to_cpu(btblock->bb_magic), fab->height - 1);
out:
xfs_trans_brelse(ri->sc->tp, bp);
@@ -838,7 +669,7 @@ out:
STATIC int
xrep_findroot_rmap(
struct xfs_btree_cur *cur,
- struct xfs_rmap_irec *rec,
+ const struct xfs_rmap_irec *rec,
void *priv)
{
struct xrep_findroot *ri = priv;
@@ -889,7 +720,7 @@ xrep_find_ag_btree_roots(
ri.sc = sc;
ri.btree_info = btree_info;
- ri.agf = XFS_BUF_TO_AGF(agf_bp);
+ ri.agf = agf_bp->b_addr;
ri.agfl_bp = agfl_bp;
for (fab = btree_info; fab->buf_ops; fab++) {
ASSERT(agfl_bp || fab->rmap_owner != XFS_RMAP_OWN_AG);
@@ -898,30 +729,65 @@ xrep_find_ag_btree_roots(
fab->height = 0;
}
- cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
error = xfs_rmap_query_all(cur, xrep_findroot_rmap, &ri);
xfs_btree_del_cursor(cur, error);
return error;
}
+#ifdef CONFIG_XFS_QUOTA
+/* Update some quota flags in the superblock. */
+void
+xrep_update_qflags(
+ struct xfs_scrub *sc,
+ unsigned int clear_flags,
+ unsigned int set_flags)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+
+ mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
+ if ((mp->m_qflags & clear_flags) == 0 &&
+ (mp->m_qflags & set_flags) == set_flags)
+ goto no_update;
+
+ mp->m_qflags &= ~clear_flags;
+ mp->m_qflags |= set_flags;
+
+ spin_lock(&mp->m_sb_lock);
+ mp->m_sb.sb_qflags &= ~clear_flags;
+ mp->m_sb.sb_qflags |= set_flags;
+ spin_unlock(&mp->m_sb_lock);
+
+ /*
+ * Update the quota flags in the ondisk superblock without touching
+ * the summary counters. We have not quiesced inode chunk allocation,
+ * so we cannot coordinate with updates to the icount and ifree percpu
+ * counters.
+ */
+ bp = xfs_trans_getsb(sc->tp);
+ xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
+ xfs_trans_log_buf(sc->tp, bp, 0, sizeof(struct xfs_dsb) - 1);
+
+no_update:
+ mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
+}
+
/* Force a quotacheck the next time we mount. */
void
xrep_force_quotacheck(
struct xfs_scrub *sc,
- uint dqtype)
+ xfs_dqtype_t type)
{
uint flag;
- flag = xfs_quota_chkd_flag(dqtype);
+ flag = xfs_quota_chkd_flag(type);
if (!(flag & sc->mp->m_qflags))
return;
- sc->mp->m_qflags &= ~flag;
- spin_lock(&sc->mp->m_sb_lock);
- sc->mp->m_sb.sb_qflags &= ~flag;
- spin_unlock(&sc->mp->m_sb_lock);
- xfs_log_sb(sc->tp);
+ xrep_update_qflags(sc, flag, 0);
}
/*
@@ -929,10 +795,10 @@ xrep_force_quotacheck(
*
* This function ensures that the appropriate dquots are attached to an inode.
* We cannot allow the dquot code to allocate an on-disk dquot block here
- * because we're already in transaction context with the inode locked. The
- * on-disk dquot should already exist anyway. If the quota code signals
- * corruption or missing quota information, schedule quotacheck, which will
- * repair corruptions in the quota metadata.
+ * because we're already in transaction context. The on-disk dquot should
+ * already exist anyway. If the quota code signals corruption or missing quota
+ * information, schedule quotacheck, which will repair corruptions in the quota
+ * metadata.
*/
int
xrep_ino_dqattach(
@@ -940,7 +806,10 @@ xrep_ino_dqattach(
{
int error;
- error = xfs_qm_dqattach_locked(sc->ip, false);
+ ASSERT(sc->tp != NULL);
+ ASSERT(sc->ip != NULL);
+
+ error = xfs_qm_dqattach(sc->ip);
switch (error) {
case -EFSBADCRC:
case -EFSCORRUPTED:
@@ -949,12 +818,12 @@ xrep_ino_dqattach(
"inode %llu repair encountered quota error %d, quotacheck forced.",
(unsigned long long)sc->ip->i_ino, error);
if (XFS_IS_UQUOTA_ON(sc->mp) && !sc->ip->i_udquot)
- xrep_force_quotacheck(sc, XFS_DQ_USER);
+ xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
if (XFS_IS_GQUOTA_ON(sc->mp) && !sc->ip->i_gdquot)
- xrep_force_quotacheck(sc, XFS_DQ_GROUP);
+ xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
if (XFS_IS_PQUOTA_ON(sc->mp) && !sc->ip->i_pdquot)
- xrep_force_quotacheck(sc, XFS_DQ_PROJ);
- /* fall through */
+ xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
+ fallthrough;
case -ESRCH:
error = 0;
break;
@@ -964,3 +833,575 @@ xrep_ino_dqattach(
return error;
}
+#endif /* CONFIG_XFS_QUOTA */
+
+/*
+ * Ensure that the inode being repaired is ready to handle a certain number of
+ * extents, or return EFSCORRUPTED. Caller must hold the ILOCK of the inode
+ * being repaired and have joined it to the scrub transaction.
+ */
+int
+xrep_ino_ensure_extent_count(
+ struct xfs_scrub *sc,
+ int whichfork,
+ xfs_extnum_t nextents)
+{
+ xfs_extnum_t max_extents;
+ bool inode_has_nrext64;
+
+ inode_has_nrext64 = xfs_inode_has_large_extent_counts(sc->ip);
+ max_extents = xfs_iext_max_nextents(inode_has_nrext64, whichfork);
+ if (nextents <= max_extents)
+ return 0;
+ if (inode_has_nrext64)
+ return -EFSCORRUPTED;
+ if (!xfs_has_large_extent_counts(sc->mp))
+ return -EFSCORRUPTED;
+
+ max_extents = xfs_iext_max_nextents(true, whichfork);
+ if (nextents > max_extents)
+ return -EFSCORRUPTED;
+
+ sc->ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+ return 0;
+}
+
+/*
+ * Initialize all the btree cursors for an AG repair except for the btree that
+ * we're rebuilding.
+ */
+void
+xrep_ag_btcur_init(
+ struct xfs_scrub *sc,
+ struct xchk_ag *sa)
+{
+ struct xfs_mount *mp = sc->mp;
+
+ /* Set up a bnobt cursor for cross-referencing. */
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_BNOBT &&
+ sc->sm->sm_type != XFS_SCRUB_TYPE_CNTBT) {
+ sa->bno_cur = xfs_bnobt_init_cursor(mp, sc->tp, sa->agf_bp,
+ sc->sa.pag);
+ sa->cnt_cur = xfs_cntbt_init_cursor(mp, sc->tp, sa->agf_bp,
+ sc->sa.pag);
+ }
+
+ /* Set up a inobt cursor for cross-referencing. */
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_INOBT &&
+ sc->sm->sm_type != XFS_SCRUB_TYPE_FINOBT) {
+ sa->ino_cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp,
+ sa->agi_bp);
+ if (xfs_has_finobt(mp))
+ sa->fino_cur = xfs_finobt_init_cursor(sc->sa.pag,
+ sc->tp, sa->agi_bp);
+ }
+
+ /* Set up a rmapbt cursor for cross-referencing. */
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_RMAPBT &&
+ xfs_has_rmapbt(mp))
+ sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
+ sc->sa.pag);
+
+ /* Set up a refcountbt cursor for cross-referencing. */
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_REFCNTBT &&
+ xfs_has_reflink(mp))
+ sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
+ sa->agf_bp, sc->sa.pag);
+}
+
+/*
+ * Reinitialize the in-core AG state after a repair by rereading the AGF
+ * buffer. We had better get the same AGF buffer as the one that's attached
+ * to the scrub context.
+ */
+int
+xrep_reinit_pagf(
+ struct xfs_scrub *sc)
+{
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_buf *bp;
+ int error;
+
+ ASSERT(pag);
+ ASSERT(xfs_perag_initialised_agf(pag));
+
+ clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
+ error = xfs_alloc_read_agf(pag, sc->tp, 0, &bp);
+ if (error)
+ return error;
+
+ if (bp != sc->sa.agf_bp) {
+ ASSERT(bp == sc->sa.agf_bp);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
+/*
+ * Reinitialize the in-core AG state after a repair by rereading the AGI
+ * buffer. We had better get the same AGI buffer as the one that's attached
+ * to the scrub context.
+ */
+int
+xrep_reinit_pagi(
+ struct xfs_scrub *sc)
+{
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_buf *bp;
+ int error;
+
+ ASSERT(pag);
+ ASSERT(xfs_perag_initialised_agi(pag));
+
+ clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
+ error = xfs_ialloc_read_agi(pag, sc->tp, 0, &bp);
+ if (error)
+ return error;
+
+ if (bp != sc->sa.agi_bp) {
+ ASSERT(bp == sc->sa.agi_bp);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
+/*
+ * Given an active reference to a perag structure, load AG headers and cursors.
+ * This should only be called to scan an AG while repairing file-based metadata.
+ */
+int
+xrep_ag_init(
+ struct xfs_scrub *sc,
+ struct xfs_perag *pag,
+ struct xchk_ag *sa)
+{
+ int error;
+
+ ASSERT(!sa->pag);
+
+ error = xfs_ialloc_read_agi(pag, sc->tp, 0, &sa->agi_bp);
+ if (error)
+ return error;
+
+ error = xfs_alloc_read_agf(pag, sc->tp, 0, &sa->agf_bp);
+ if (error)
+ return error;
+
+ /* Grab our own passive reference from the caller's ref. */
+ sa->pag = xfs_perag_hold(pag);
+ xrep_ag_btcur_init(sc, sa);
+ return 0;
+}
+
+#ifdef CONFIG_XFS_RT
+/* Initialize all the btree cursors for a RT repair. */
+void
+xrep_rtgroup_btcur_init(
+ struct xfs_scrub *sc,
+ struct xchk_rt *sr)
+{
+ struct xfs_mount *mp = sc->mp;
+
+ ASSERT(sr->rtg != NULL);
+
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_RTRMAPBT &&
+ (sr->rtlock_flags & XFS_RTGLOCK_RMAP) &&
+ xfs_has_rtrmapbt(mp))
+ sr->rmap_cur = xfs_rtrmapbt_init_cursor(sc->tp, sr->rtg);
+
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_RTREFCBT &&
+ (sr->rtlock_flags & XFS_RTGLOCK_REFCOUNT) &&
+ xfs_has_rtreflink(mp))
+ sr->refc_cur = xfs_rtrefcountbt_init_cursor(sc->tp, sr->rtg);
+}
+
+/*
+ * Given a reference to a rtgroup structure, lock rtgroup btree inodes and
+ * create btree cursors. Must only be called to repair a regular rt file.
+ */
+int
+xrep_rtgroup_init(
+ struct xfs_scrub *sc,
+ struct xfs_rtgroup *rtg,
+ struct xchk_rt *sr,
+ unsigned int rtglock_flags)
+{
+ ASSERT(sr->rtg == NULL);
+
+ xfs_rtgroup_lock(rtg, rtglock_flags);
+ sr->rtlock_flags = rtglock_flags;
+
+ /* Grab our own passive reference from the caller's ref. */
+ sr->rtg = xfs_rtgroup_hold(rtg);
+ xrep_rtgroup_btcur_init(sc, sr);
+ return 0;
+}
+
+/* Ensure that all rt blocks in the given range are not marked free. */
+int
+xrep_require_rtext_inuse(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t rgbno,
+ xfs_filblks_t len)
+{
+ struct xfs_mount *mp = sc->mp;
+ xfs_rtxnum_t startrtx;
+ xfs_rtxnum_t endrtx;
+ bool is_free = false;
+ int error = 0;
+
+ if (xfs_has_zoned(mp)) {
+ if (!xfs_zone_rgbno_is_valid(sc->sr.rtg, rgbno + len - 1))
+ return -EFSCORRUPTED;
+ return 0;
+ }
+
+ startrtx = xfs_rgbno_to_rtx(mp, rgbno);
+ endrtx = xfs_rgbno_to_rtx(mp, rgbno + len - 1);
+
+ error = xfs_rtalloc_extent_is_free(sc->sr.rtg, sc->tp, startrtx,
+ endrtx - startrtx + 1, &is_free);
+ if (error)
+ return error;
+ if (is_free)
+ return -EFSCORRUPTED;
+
+ return 0;
+}
+#endif /* CONFIG_XFS_RT */
+
+/* Reinitialize the per-AG block reservation for the AG we just fixed. */
+int
+xrep_reset_perag_resv(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (!(sc->flags & XREP_RESET_PERAG_RESV))
+ return 0;
+
+ ASSERT(sc->sa.pag != NULL);
+ ASSERT(sc->ops->type == ST_PERAG);
+ ASSERT(sc->tp);
+
+ sc->flags &= ~XREP_RESET_PERAG_RESV;
+ xfs_ag_resv_free(sc->sa.pag);
+ error = xfs_ag_resv_init(sc->sa.pag, sc->tp);
+ if (error == -ENOSPC) {
+ xfs_err(sc->mp,
+"Insufficient free space to reset per-AG reservation for AG %u after repair.",
+ pag_agno(sc->sa.pag));
+ error = 0;
+ }
+
+ return error;
+}
+
+/* Decide if we are going to call the repair function for a scrub type. */
+bool
+xrep_will_attempt(
+ struct xfs_scrub *sc)
+{
+ /* Userspace asked us to rebuild the structure regardless. */
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD)
+ return true;
+
+ /* Let debug users force us into the repair routines. */
+ if (XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
+ return true;
+
+ /* Metadata is corrupt or failed cross-referencing. */
+ if (xchk_needs_repair(sc->sm))
+ return true;
+
+ return false;
+}
+
+/* Try to fix some part of a metadata inode by calling another scrubber. */
+STATIC int
+xrep_metadata_inode_subtype(
+ struct xfs_scrub *sc,
+ unsigned int scrub_type)
+{
+ struct xfs_scrub_subord *sub;
+ int error;
+
+ /*
+ * Let's see if the inode needs repair. Use a subordinate scrub context
+ * to call the scrub and repair functions so that we can hang on to the
+ * resources that we already acquired instead of using the standard
+ * setup/teardown routines.
+ */
+ sub = xchk_scrub_create_subord(sc, scrub_type);
+ error = sub->sc.ops->scrub(&sub->sc);
+ if (error)
+ goto out;
+ if (!xrep_will_attempt(&sub->sc))
+ goto out;
+
+ /*
+ * Repair some part of the inode. This will potentially join the inode
+ * to the transaction.
+ */
+ error = sub->sc.ops->repair(&sub->sc);
+ if (error)
+ goto out;
+
+ /*
+ * Finish all deferred intent items and then roll the transaction so
+ * that the inode will not be joined to the transaction when we exit
+ * the function.
+ */
+ error = xfs_defer_finish(&sub->sc.tp);
+ if (error)
+ goto out;
+ error = xfs_trans_roll(&sub->sc.tp);
+ if (error)
+ goto out;
+
+ /*
+ * Clear the corruption flags and re-check the metadata that we just
+ * repaired.
+ */
+ sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ error = sub->sc.ops->scrub(&sub->sc);
+ if (error)
+ goto out;
+
+ /* If corruption persists, the repair has failed. */
+ if (xchk_needs_repair(sub->sc.sm)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+out:
+ xchk_scrub_free_subord(sub);
+ return error;
+}
+
+/*
+ * Repair the ondisk forks of a metadata inode. The caller must ensure that
+ * sc->ip points to the metadata inode and the ILOCK is held on that inode.
+ * The inode must not be joined to the transaction before the call, and will
+ * not be afterwards.
+ */
+int
+xrep_metadata_inode_forks(
+ struct xfs_scrub *sc)
+{
+ bool dirty = false;
+ int error;
+
+ /* Repair the inode record and the data fork. */
+ error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
+ if (error)
+ return error;
+
+ error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
+ if (error)
+ return error;
+
+ /*
+ * Metadata files can only have extended attributes on metadir
+ * filesystems, either for parent pointers or for actual xattr data.
+ * For a non-metadir filesystem, make sure the attr fork looks ok
+ * before we delete it.
+ */
+ if (xfs_inode_hasattr(sc->ip)) {
+ error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
+ if (error)
+ return error;
+ }
+
+ /* Clear the reflink flag since metadata never shares. */
+ if (xfs_is_reflink_inode(sc->ip)) {
+ dirty = true;
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Metadata files on non-metadir filesystems cannot have attr forks,
+ * so clear them now.
+ */
+ if (xfs_inode_hasattr(sc->ip) && !xfs_has_metadir(sc->mp)) {
+ if (!dirty) {
+ dirty = true;
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ }
+ error = xrep_xattr_reset_fork(sc);
+ if (error)
+ return error;
+ }
+
+ /*
+ * If we modified the inode, roll the transaction but don't rejoin the
+ * inode to the new transaction because xrep_bmap_data can do that.
+ */
+ if (dirty) {
+ error = xfs_trans_roll(&sc->tp);
+ if (error)
+ return error;
+ dirty = false;
+ }
+
+ return 0;
+}
+
+/*
+ * Set up an in-memory buffer cache so that we can use the xfbtree. Allocating
+ * a shmem file might take loks, so we cannot be in transaction context. Park
+ * our resources in the scrub context and let the teardown function take care
+ * of them at the right time.
+ */
+int
+xrep_setup_xfbtree(
+ struct xfs_scrub *sc,
+ const char *descr)
+{
+ ASSERT(sc->tp == NULL);
+
+ return xmbuf_alloc(sc->mp, descr, &sc->xmbtp);
+}
+
+/*
+ * See if this buffer can pass the given ->verify_struct() function.
+ *
+ * If the buffer already has ops attached and they're not the ones that were
+ * passed in, we reject the buffer. Otherwise, we perform the structure test
+ * (note that we do not check CRCs) and return the outcome of the test. The
+ * buffer ops and error state are left unchanged.
+ */
+bool
+xrep_buf_verify_struct(
+ struct xfs_buf *bp,
+ const struct xfs_buf_ops *ops)
+{
+ const struct xfs_buf_ops *old_ops = bp->b_ops;
+ xfs_failaddr_t fa;
+ int old_error;
+
+ if (old_ops) {
+ if (old_ops != ops)
+ return false;
+ }
+
+ old_error = bp->b_error;
+ bp->b_ops = ops;
+ fa = bp->b_ops->verify_struct(bp);
+ bp->b_ops = old_ops;
+ bp->b_error = old_error;
+
+ return fa == NULL;
+}
+
+/* Check the sanity of a rmap record for a metadata btree inode. */
+int
+xrep_check_ino_btree_mapping(
+ struct xfs_scrub *sc,
+ const struct xfs_rmap_irec *rec)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ /*
+ * Metadata btree inodes never have extended attributes, and all blocks
+ * should have the bmbt block flag set.
+ */
+ if ((rec->rm_flags & XFS_RMAP_ATTR_FORK) ||
+ !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
+ return -EFSCORRUPTED;
+
+ /* Make sure the block is within the AG. */
+ if (!xfs_verify_agbext(sc->sa.pag, rec->rm_startblock,
+ rec->rm_blockcount))
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space. */
+ error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
+ rec->rm_blockcount, &outcome);
+ if (error)
+ return error;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ return -EFSCORRUPTED;
+
+ return 0;
+}
+
+/*
+ * Reset the block count of the inode being repaired, and adjust the dquot
+ * block usage to match. The inode must not have an xattr fork.
+ */
+void
+xrep_inode_set_nblocks(
+ struct xfs_scrub *sc,
+ int64_t new_blocks)
+{
+ int64_t delta =
+ new_blocks - sc->ip->i_nblocks;
+
+ sc->ip->i_nblocks = new_blocks;
+
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+ if (delta != 0)
+ xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT,
+ delta);
+}
+
+/* Reset the block reservation for a metadata inode. */
+int
+xrep_reset_metafile_resv(
+ struct xfs_scrub *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ int64_t delta;
+ int error;
+
+ delta = mp->m_metafile_resv_used + mp->m_metafile_resv_avail -
+ mp->m_metafile_resv_target;
+ if (delta == 0)
+ return 0;
+
+ /*
+ * Too many blocks have been reserved, transfer some from the incore
+ * reservation back to the filesystem.
+ */
+ if (delta > 0) {
+ int64_t give_back;
+
+ give_back = min_t(uint64_t, delta, mp->m_metafile_resv_avail);
+ if (give_back > 0) {
+ xfs_mod_sb_delalloc(mp, -give_back);
+ xfs_add_fdblocks(mp, give_back);
+ mp->m_metafile_resv_avail -= give_back;
+ }
+
+ return 0;
+ }
+
+ /*
+ * Not enough reservation; try to take some blocks from the filesystem
+ * to the metabtree reservation.
+ */
+ delta = -delta; /* delta is negative here, so invert the sign. */
+ error = xfs_dec_fdblocks(mp, delta, true);
+ while (error == -ENOSPC) {
+ delta--;
+ if (delta == 0) {
+ xfs_warn(sc->mp,
+"Insufficient free space to reset metabtree reservation after repair.");
+ return 0;
+ }
+ error = xfs_dec_fdblocks(mp, delta, true);
+ }
+ if (error)
+ return error;
+
+ xfs_mod_sb_delalloc(mp, delta);
+ mp->m_metafile_resv_avail += delta;
+ return 0;
+}