summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/readdir.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub/readdir.c')
-rw-r--r--fs/xfs/scrub/readdir.c421
1 files changed, 421 insertions, 0 deletions
diff --git a/fs/xfs/scrub/readdir.c b/fs/xfs/scrub/readdir.c
new file mode 100644
index 000000000000..01c9a2dc0f2c
--- /dev/null
+++ b/fs/xfs/scrub/readdir.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_trace.h"
+#include "xfs_bmap.h"
+#include "xfs_trans.h"
+#include "xfs_error.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/readdir.h"
+
+/* Call a function for every entry in a shortform directory. */
+STATIC int
+xchk_dir_walk_sf(
+ struct xfs_scrub *sc,
+ struct xfs_inode *dp,
+ xchk_dirent_fn dirent_fn,
+ void *priv)
+{
+ struct xfs_name name = {
+ .name = ".",
+ .len = 1,
+ .type = XFS_DIR3_FT_DIR,
+ };
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
+ struct xfs_dir2_sf_entry *sfep;
+ struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
+ xfs_ino_t ino;
+ xfs_dir2_dataptr_t dapos;
+ unsigned int i;
+ int error;
+
+ ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
+ ASSERT(sfp != NULL);
+
+ /* dot entry */
+ dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
+ geo->data_entry_offset);
+
+ error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv);
+ if (error)
+ return error;
+
+ /* dotdot entry */
+ dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
+ geo->data_entry_offset +
+ xfs_dir2_data_entsize(mp, sizeof(".") - 1));
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
+ name.name = "..";
+ name.len = 2;
+
+ error = dirent_fn(sc, dp, dapos, &name, ino, priv);
+ if (error)
+ return error;
+
+ /* iterate everything else */
+ sfep = xfs_dir2_sf_firstentry(sfp);
+ for (i = 0; i < sfp->count; i++) {
+ dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
+ xfs_dir2_sf_get_offset(sfep));
+ ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
+ name.name = sfep->name;
+ name.len = sfep->namelen;
+ name.type = xfs_dir2_sf_get_ftype(mp, sfep);
+
+ error = dirent_fn(sc, dp, dapos, &name, ino, priv);
+ if (error)
+ return error;
+
+ sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
+ }
+
+ return 0;
+}
+
+/* Call a function for every entry in a block directory. */
+STATIC int
+xchk_dir_walk_block(
+ struct xfs_scrub *sc,
+ struct xfs_inode *dp,
+ xchk_dirent_fn dirent_fn,
+ void *priv)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
+ struct xfs_buf *bp;
+ unsigned int off, next_off, end;
+ int error;
+
+ error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp);
+ if (error)
+ return error;
+
+ /* Walk each directory entry. */
+ end = xfs_dir3_data_end_offset(geo, bp->b_addr);
+ for (off = geo->data_entry_offset; off < end; off = next_off) {
+ struct xfs_name name = { };
+ struct xfs_dir2_data_unused *dup = bp->b_addr + off;
+ struct xfs_dir2_data_entry *dep = bp->b_addr + off;
+ xfs_ino_t ino;
+ xfs_dir2_dataptr_t dapos;
+
+ /* Skip an empty entry. */
+ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+ next_off = off + be16_to_cpu(dup->length);
+ continue;
+ }
+
+ /* Otherwise, find the next entry and report it. */
+ next_off = off + xfs_dir2_data_entsize(mp, dep->namelen);
+ if (next_off > end)
+ break;
+
+ dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off);
+ ino = be64_to_cpu(dep->inumber);
+ name.name = dep->name;
+ name.len = dep->namelen;
+ name.type = xfs_dir2_data_get_ftype(mp, dep);
+
+ error = dirent_fn(sc, dp, dapos, &name, ino, priv);
+ if (error)
+ break;
+ }
+
+ xfs_trans_brelse(sc->tp, bp);
+ return error;
+}
+
+/* Read a leaf-format directory buffer. */
+STATIC int
+xchk_read_leaf_dir_buf(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_da_geometry *geo,
+ xfs_dir2_off_t *curoff,
+ struct xfs_buf **bpp)
+{
+ struct xfs_iext_cursor icur;
+ struct xfs_bmbt_irec map;
+ struct xfs_ifork *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
+ xfs_dablk_t last_da;
+ xfs_dablk_t map_off;
+ xfs_dir2_off_t new_off;
+
+ *bpp = NULL;
+
+ /*
+ * Look for mapped directory blocks at or above the current offset.
+ * Truncate down to the nearest directory block to start the scanning
+ * operation.
+ */
+ last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
+ map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff));
+
+ if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
+ return 0;
+ if (map.br_startoff >= last_da)
+ return 0;
+ xfs_trim_extent(&map, map_off, last_da - map_off);
+
+ /* Read the directory block of that first mapping. */
+ new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
+ if (new_off > *curoff)
+ *curoff = new_off;
+
+ return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp);
+}
+
+/* Call a function for every entry in a leaf directory. */
+STATIC int
+xchk_dir_walk_leaf(
+ struct xfs_scrub *sc,
+ struct xfs_inode *dp,
+ xchk_dirent_fn dirent_fn,
+ void *priv)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
+ struct xfs_buf *bp = NULL;
+ xfs_dir2_off_t curoff = 0;
+ unsigned int offset = 0;
+ int error;
+
+ /* Iterate every directory offset in this directory. */
+ while (curoff < XFS_DIR2_LEAF_OFFSET) {
+ struct xfs_name name = { };
+ struct xfs_dir2_data_unused *dup;
+ struct xfs_dir2_data_entry *dep;
+ xfs_ino_t ino;
+ unsigned int length;
+ xfs_dir2_dataptr_t dapos;
+
+ /*
+ * If we have no buffer, or we're off the end of the
+ * current buffer, need to get another one.
+ */
+ if (!bp || offset >= geo->blksize) {
+ if (bp) {
+ xfs_trans_brelse(sc->tp, bp);
+ bp = NULL;
+ }
+
+ error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff,
+ &bp);
+ if (error || !bp)
+ break;
+
+ /*
+ * Find our position in the block.
+ */
+ offset = geo->data_entry_offset;
+ curoff += geo->data_entry_offset;
+ }
+
+ /* Skip an empty entry. */
+ dup = bp->b_addr + offset;
+ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+ length = be16_to_cpu(dup->length);
+ offset += length;
+ curoff += length;
+ continue;
+ }
+
+ /* Otherwise, find the next entry and report it. */
+ dep = bp->b_addr + offset;
+ length = xfs_dir2_data_entsize(mp, dep->namelen);
+
+ dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
+ ino = be64_to_cpu(dep->inumber);
+ name.name = dep->name;
+ name.len = dep->namelen;
+ name.type = xfs_dir2_data_get_ftype(mp, dep);
+
+ error = dirent_fn(sc, dp, dapos, &name, ino, priv);
+ if (error)
+ break;
+
+ /* Advance to the next entry. */
+ offset += length;
+ curoff += length;
+ }
+
+ if (bp)
+ xfs_trans_brelse(sc->tp, bp);
+ return error;
+}
+
+/*
+ * Call a function for every entry in a directory.
+ *
+ * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*.
+ */
+int
+xchk_dir_walk(
+ struct xfs_scrub *sc,
+ struct xfs_inode *dp,
+ xchk_dirent_fn dirent_fn,
+ void *priv)
+{
+ struct xfs_da_args args = {
+ .dp = dp,
+ .geo = dp->i_mount->m_dir_geo,
+ .trans = sc->tp,
+ .owner = dp->i_ino,
+ };
+ int error;
+
+ if (xfs_is_shutdown(dp->i_mount))
+ return -EIO;
+
+ ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
+ xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
+
+ switch (xfs_dir2_format(&args, &error)) {
+ case XFS_DIR2_FMT_SF:
+ return xchk_dir_walk_sf(sc, dp, dirent_fn, priv);
+ case XFS_DIR2_FMT_BLOCK:
+ return xchk_dir_walk_block(sc, dp, dirent_fn, priv);
+ case XFS_DIR2_FMT_LEAF:
+ case XFS_DIR2_FMT_NODE:
+ return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv);
+ default:
+ return error;
+ }
+}
+
+/*
+ * Look up the inode number for an exact name in a directory.
+ *
+ * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. Names are not
+ * checked for correctness.
+ */
+int
+xchk_dir_lookup(
+ struct xfs_scrub *sc,
+ struct xfs_inode *dp,
+ const struct xfs_name *name,
+ xfs_ino_t *ino)
+{
+ struct xfs_da_args args = {
+ .dp = dp,
+ .geo = dp->i_mount->m_dir_geo,
+ .trans = sc->tp,
+ .name = name->name,
+ .namelen = name->len,
+ .filetype = name->type,
+ .hashval = xfs_dir2_hashname(dp->i_mount, name),
+ .whichfork = XFS_DATA_FORK,
+ .op_flags = XFS_DA_OP_OKNOENT,
+ .owner = dp->i_ino,
+ };
+ int error;
+
+ if (xfs_is_shutdown(dp->i_mount))
+ return -EIO;
+
+ /*
+ * A temporary directory's block headers are written with the owner
+ * set to sc->ip, so we must switch the owner here for the lookup.
+ */
+ if (dp == sc->tempip)
+ args.owner = sc->ip->i_ino;
+
+ ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
+ xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
+
+ error = xfs_dir_lookup_args(&args);
+ if (!error)
+ *ino = args.inumber;
+ return error;
+}
+
+/*
+ * Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
+ * state. The caller may have a transaction, so we must use trylock for both
+ * IOLOCKs.
+ */
+static inline unsigned int
+xchk_dir_trylock_both(
+ struct xfs_scrub *sc,
+ struct xfs_inode *ip)
+{
+ if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
+ return 0;
+
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
+ goto parent_iolock;
+
+ xchk_ilock(sc, XFS_ILOCK_EXCL);
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+ goto parent_ilock;
+
+ return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
+
+parent_ilock:
+ xchk_iunlock(sc, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+parent_iolock:
+ xchk_iunlock(sc, XFS_IOLOCK_EXCL);
+ return 0;
+}
+
+/*
+ * Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
+ * (@sc->ip) and the inode at the other end (@ip) of a directory or parent
+ * pointer link so that we can check that link.
+ *
+ * We do not know ahead of time that the directory tree is /not/ corrupt, so we
+ * cannot use the "lock two inode" functions because we do not know that there
+ * is not a racing thread trying to take the locks in opposite order. First
+ * take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
+ * of @ip to synchronize with the VFS. Next, take ILOCK_EXCL of the scrub
+ * target and @ip to synchronize with XFS.
+ *
+ * If the trylocks succeed, *lockmode will be set to the locks held for @ip;
+ * @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
+ * be returned. If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
+ * XCHK_TRY_HARDER was set. Returns -EINTR if the process has been killed.
+ */
+int
+xchk_dir_trylock_for_pptrs(
+ struct xfs_scrub *sc,
+ struct xfs_inode *ip,
+ unsigned int *lockmode)
+{
+ unsigned int nr;
+ int error = 0;
+
+ ASSERT(sc->ilock_flags == 0);
+
+ for (nr = 0; nr < HZ; nr++) {
+ *lockmode = xchk_dir_trylock_both(sc, ip);
+ if (*lockmode)
+ return 0;
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ delay(1);
+ }
+
+ if (sc->flags & XCHK_TRY_HARDER) {
+ xchk_set_incomplete(sc);
+ return -ETIMEDOUT;
+ }
+
+ return -EDEADLOCK;
+}