diff options
Diffstat (limited to 'fs/xfs/scrub/scrub.h')
| -rw-r--r-- | fs/xfs/scrub/scrub.h | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h new file mode 100644 index 000000000000..a3f1abc91390 --- /dev/null +++ b/fs/xfs/scrub/scrub.h @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2017-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_SCRUB_H__ +#define __XFS_SCRUB_SCRUB_H__ + +struct xfs_scrub; + +struct xchk_relax { + unsigned long next_resched; + unsigned int resched_nr; + bool interruptible; +}; + +/* Yield to the scheduler at most 10x per second. */ +#define XCHK_RELAX_NEXT (jiffies + (HZ / 10)) + +#define INIT_XCHK_RELAX \ + (struct xchk_relax){ \ + .next_resched = XCHK_RELAX_NEXT, \ + .resched_nr = 0, \ + .interruptible = true, \ + } + +/* + * Relax during a scrub operation and exit if there's a fatal signal pending. + * + * If preemption is disabled, we need to yield to the scheduler every now and + * then so that we don't run afoul of the soft lockup watchdog or RCU stall + * detector. cond_resched calls are somewhat expensive (~5ns) so we want to + * ratelimit this to 10x per second. Amortize the cost of the other checks by + * only doing it once every 100 calls. + */ +static inline int xchk_maybe_relax(struct xchk_relax *widget) +{ + /* Amortize the cost of scheduling and checking signals. */ + if (likely(++widget->resched_nr < 100)) + return 0; + widget->resched_nr = 0; + + if (unlikely(widget->next_resched <= jiffies)) { + cond_resched(); + widget->next_resched = XCHK_RELAX_NEXT; + } + + if (widget->interruptible && fatal_signal_pending(current)) + return -EINTR; + + return 0; +} + +/* + * Standard flags for allocating memory within scrub. NOFS context is + * configured by the process allocation scope. Scrub and repair must be able + * to back out gracefully if there isn't enough memory. Force-cast to avoid + * complaints from static checkers. + */ +#define XCHK_GFP_FLAGS ((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \ + __GFP_RETRY_MAYFAIL)) + +/* + * For opening files by handle for fsck operations, we don't trust the inumber + * or the allocation state; therefore, perform an untrusted lookup. We don't + * want these inodes to pollute the cache, so mark them for immediate removal. + */ +#define XCHK_IGET_FLAGS (XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE) + +/* Type info and names for the scrub types. */ +enum xchk_type { + ST_NONE = 1, /* disabled */ + ST_PERAG, /* per-AG metadata */ + ST_FS, /* per-FS metadata */ + ST_INODE, /* per-inode metadata */ + ST_GENERIC, /* determined by the scrubber */ + ST_RTGROUP, /* rtgroup metadata */ +}; + +struct xchk_meta_ops { + /* Acquire whatever resources are needed for the operation. */ + int (*setup)(struct xfs_scrub *sc); + + /* Examine metadata for errors. */ + int (*scrub)(struct xfs_scrub *); + + /* Repair or optimize the metadata. */ + int (*repair)(struct xfs_scrub *); + + /* + * Re-scrub the metadata we repaired, in case there's extra work that + * we need to do to check our repair work. If this is NULL, we'll use + * the ->scrub function pointer, assuming that the regular scrub is + * sufficient. + */ + int (*repair_eval)(struct xfs_scrub *sc); + + /* Decide if we even have this piece of metadata. */ + bool (*has)(const struct xfs_mount *); + + /* type describing required/allowed inputs */ + enum xchk_type type; +}; + +/* Buffer pointers and btree cursors for an entire AG. */ +struct xchk_ag { + struct xfs_perag *pag; + + /* AG btree roots */ + struct xfs_buf *agf_bp; + struct xfs_buf *agi_bp; + + /* AG btrees */ + struct xfs_btree_cur *bno_cur; + struct xfs_btree_cur *cnt_cur; + struct xfs_btree_cur *ino_cur; + struct xfs_btree_cur *fino_cur; + struct xfs_btree_cur *rmap_cur; + struct xfs_btree_cur *refc_cur; +}; + +/* Inode lock state for the RT volume. */ +struct xchk_rt { + /* incore rtgroup, if applicable */ + struct xfs_rtgroup *rtg; + + /* XFS_RTGLOCK_* lock state if locked */ + unsigned int rtlock_flags; + + /* rtgroup btrees */ + struct xfs_btree_cur *rmap_cur; + struct xfs_btree_cur *refc_cur; +}; + +struct xfs_scrub { + /* General scrub state. */ + struct xfs_mount *mp; + struct xfs_scrub_metadata *sm; + const struct xchk_meta_ops *ops; + struct xfs_trans *tp; + + /* File that scrub was called with. */ + struct file *file; + + /* + * File that is undergoing the scrub operation. This can differ from + * the file that scrub was called with if we're checking file-based fs + * metadata (e.g. rt bitmaps) or if we're doing a scrub-by-handle for + * something that can't be opened directly (e.g. symlinks). + */ + struct xfs_inode *ip; + + /* Kernel memory buffer used by scrubbers; freed at teardown. */ + void *buf; + + /* + * Clean up resources owned by whatever is in the buffer. Cleanup can + * be deferred with this hook as a means for scrub functions to pass + * data to repair functions. This function must not free the buffer + * itself. + */ + void (*buf_cleanup)(void *buf); + + /* xfile used by the scrubbers; freed at teardown. */ + struct xfile *xfile; + + /* buffer target for in-memory btrees; also freed at teardown. */ + struct xfs_buftarg *xmbtp; + + /* Lock flags for @ip. */ + uint ilock_flags; + + /* The orphanage, for stashing files that have lost their parent. */ + uint orphanage_ilock_flags; + struct xfs_inode *orphanage; + + /* A temporary file on this filesystem, for staging new metadata. */ + struct xfs_inode *tempip; + uint temp_ilock_flags; + + /* See the XCHK/XREP state flags below. */ + unsigned int flags; + + /* + * The XFS_SICK_* flags that correspond to the metadata being scrubbed + * or repaired. We will use this mask to update the in-core fs health + * status with whatever we find. + */ + unsigned int sick_mask; + + /* + * Clear these XFS_SICK_* flags but only if the scan is ok. Useful for + * removing ZAPPED flags after a repair. + */ + unsigned int healthy_mask; + + /* next time we want to cond_resched() */ + struct xchk_relax relax; + + /* State tracking for single-AG operations. */ + struct xchk_ag sa; + + /* State tracking for realtime operations. */ + struct xchk_rt sr; +}; + +/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */ +#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */ +#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */ +#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */ +#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */ +#define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */ +#define XCHK_FSGATES_DIRENTS (1U << 5) /* directory live update enabled */ +#define XCHK_FSGATES_RMAP (1U << 6) /* rmapbt live update enabled */ +#define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */ +#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */ + +/* + * The XCHK_FSGATES* flags reflect functionality in the main filesystem that + * are only enabled for this particular online fsck. When not in use, the + * features are gated off via dynamic code patching, which is why the state + * must be enabled during scrub setup and can only be torn down afterwards. + */ +#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN | \ + XCHK_FSGATES_QUOTA | \ + XCHK_FSGATES_DIRENTS | \ + XCHK_FSGATES_RMAP) + +struct xfs_scrub_subord { + struct xfs_scrub sc; + struct xfs_scrub *parent_sc; + unsigned int old_smtype; + unsigned int old_smflags; +}; + +struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc, + unsigned int subtype); +void xchk_scrub_free_subord(struct xfs_scrub_subord *sub); + +/* + * We /could/ terminate a scrub/repair operation early. If we're not + * in a good place to continue (fatal signal, etc.) then bail out. + * Note that we're careful not to make any judgements about *error. + */ +static inline bool +xchk_should_terminate( + struct xfs_scrub *sc, + int *error) +{ + if (xchk_maybe_relax(&sc->relax)) { + if (*error == 0) + *error = -EINTR; + return true; + } + return false; +} + +static inline int xchk_nothing(struct xfs_scrub *sc) +{ + return -ENOENT; +} + +/* Metadata scrubbers */ +int xchk_tester(struct xfs_scrub *sc); +int xchk_superblock(struct xfs_scrub *sc); +int xchk_agf(struct xfs_scrub *sc); +int xchk_agfl(struct xfs_scrub *sc); +int xchk_agi(struct xfs_scrub *sc); +int xchk_allocbt(struct xfs_scrub *sc); +int xchk_iallocbt(struct xfs_scrub *sc); +int xchk_rmapbt(struct xfs_scrub *sc); +int xchk_refcountbt(struct xfs_scrub *sc); +int xchk_inode(struct xfs_scrub *sc); +int xchk_bmap_data(struct xfs_scrub *sc); +int xchk_bmap_attr(struct xfs_scrub *sc); +int xchk_bmap_cow(struct xfs_scrub *sc); +int xchk_directory(struct xfs_scrub *sc); +int xchk_xattr(struct xfs_scrub *sc); +int xchk_symlink(struct xfs_scrub *sc); +int xchk_parent(struct xfs_scrub *sc); +int xchk_dirtree(struct xfs_scrub *sc); +int xchk_metapath(struct xfs_scrub *sc); +#ifdef CONFIG_XFS_RT +int xchk_rtbitmap(struct xfs_scrub *sc); +int xchk_rtsummary(struct xfs_scrub *sc); +int xchk_rgsuperblock(struct xfs_scrub *sc); +int xchk_rtrmapbt(struct xfs_scrub *sc); +int xchk_rtrefcountbt(struct xfs_scrub *sc); +#else +# define xchk_rtbitmap xchk_nothing +# define xchk_rtsummary xchk_nothing +# define xchk_rgsuperblock xchk_nothing +# define xchk_rtrmapbt xchk_nothing +# define xchk_rtrefcountbt xchk_nothing +#endif +#ifdef CONFIG_XFS_QUOTA +int xchk_quota(struct xfs_scrub *sc); +int xchk_quotacheck(struct xfs_scrub *sc); +#else +# define xchk_quota xchk_nothing +# define xchk_quotacheck xchk_nothing +#endif +int xchk_fscounters(struct xfs_scrub *sc); +int xchk_nlinks(struct xfs_scrub *sc); + +/* cross-referencing helpers */ +void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len); +void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len); +void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len); +void xchk_xref_is_only_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len, const struct xfs_owner_info *oinfo); +void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len, const struct xfs_owner_info *oinfo); +void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len); +void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno, + xfs_extlen_t len); +void xchk_xref_is_not_shared(struct xfs_scrub *sc, xfs_agblock_t bno, + xfs_extlen_t len); +void xchk_xref_is_not_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno, + xfs_extlen_t len); +#ifdef CONFIG_XFS_RT +void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno, + xfs_extlen_t len); +void xchk_xref_has_no_rt_owner(struct xfs_scrub *sc, xfs_rgblock_t rgbno, + xfs_extlen_t len); +void xchk_xref_has_rt_owner(struct xfs_scrub *sc, xfs_rgblock_t rgbno, + xfs_extlen_t len); +void xchk_xref_is_only_rt_owned_by(struct xfs_scrub *sc, xfs_rgblock_t rgbno, + xfs_extlen_t len, const struct xfs_owner_info *oinfo); +void xchk_xref_is_rt_cow_staging(struct xfs_scrub *sc, xfs_rgblock_t rgbno, + xfs_extlen_t len); +void xchk_xref_is_not_rt_shared(struct xfs_scrub *sc, xfs_rgblock_t rgbno, + xfs_extlen_t len); +void xchk_xref_is_not_rt_cow_staging(struct xfs_scrub *sc, xfs_rgblock_t rgbno, + xfs_extlen_t len); +#else +# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) +# define xchk_xref_has_no_rt_owner(sc, rtbno, len) do { } while (0) +# define xchk_xref_has_rt_owner(sc, rtbno, len) do { } while (0) +# define xchk_xref_is_only_rt_owned_by(sc, bno, len, oinfo) do { } while (0) +# define xchk_xref_is_rt_cow_staging(sc, bno, len) do { } while (0) +# define xchk_xref_is_not_rt_shared(sc, bno, len) do { } while (0) +# define xchk_xref_is_not_rt_cow_staging(sc, bno, len) do { } while (0) +#endif + +#endif /* __XFS_SCRUB_SCRUB_H__ */ |
