summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_itable.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_itable.c')
-rw-r--r--fs/xfs/xfs_itable.c1016
1 files changed, 397 insertions, 619 deletions
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index b93e14b86754..2aa37a4d2706 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -1,127 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
* All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_btree.h"
#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_iwalk.h"
#include "xfs_itable.h"
#include "xfs_error.h"
-#include "xfs_btree.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
+#include "xfs_health.h"
+#include "xfs_trans.h"
-STATIC int
-xfs_internal_inum(
- xfs_mount_t *mp,
- xfs_ino_t ino)
+/*
+ * Bulk Stat
+ * =========
+ *
+ * Use the inode walking functions to fill out struct xfs_bulkstat for every
+ * allocated inode, then pass the stat information to some externally provided
+ * iteration function.
+ */
+
+struct xfs_bstat_chunk {
+ bulkstat_one_fmt_pf formatter;
+ struct xfs_ibulk *breq;
+ struct xfs_bulkstat *buf;
+};
+
+static inline bool
+want_metadir_file(
+ struct xfs_inode *ip,
+ struct xfs_ibulk *breq)
{
- return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
- (xfs_sb_version_hasquota(&mp->m_sb) &&
- xfs_is_quota_inode(&mp->m_sb, ino)));
+ return xfs_is_metadir_inode(ip) && (breq->flags & XFS_IBULK_METADIR);
}
/*
- * Return stat information for one inode.
- * Return 0 if ok, else errno.
+ * Fill out the bulkstat info for a single inode and report it somewhere.
+ *
+ * bc->breq->lastino is effectively the inode cursor as we walk through the
+ * filesystem. Therefore, we update it any time we need to move the cursor
+ * forward, regardless of whether or not we're sending any bstat information
+ * back to userspace. If the inode is internal metadata or, has been freed
+ * out from under us, we just simply keep going.
+ *
+ * However, if any other type of error happens we want to stop right where we
+ * are so that userspace will call back with exact number of the bad inode and
+ * we can send back an error code.
+ *
+ * Note that if the formatter tells us there's no space left in the buffer we
+ * move the cursor forward and abort the walk.
*/
-int
+STATIC int
xfs_bulkstat_one_int(
- struct xfs_mount *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
- int *ubused, /* bytes used by me */
- int *stat) /* BULKSTAT_RV_... */
+ struct xfs_mount *mp,
+ struct mnt_idmap *idmap,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ struct xfs_bstat_chunk *bc)
{
- struct xfs_icdinode *dic; /* dinode core info pointer */
+ struct user_namespace *sb_userns = mp->m_super->s_user_ns;
struct xfs_inode *ip; /* incore inode pointer */
- struct xfs_bstat *buf; /* return buffer */
- int error = 0; /* error value */
-
- *stat = BULKSTAT_RV_NOTHING;
-
- if (!buffer || xfs_internal_inum(mp, ino))
- return XFS_ERROR(EINVAL);
-
- buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
- if (!buf)
- return XFS_ERROR(ENOMEM);
-
- error = xfs_iget(mp, NULL, ino,
+ struct inode *inode;
+ struct xfs_bulkstat *buf = bc->buf;
+ xfs_extnum_t nextents;
+ int error = -EINVAL;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
+
+ error = xfs_iget(mp, tp, ino,
(XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
XFS_ILOCK_SHARED, &ip);
- if (error) {
- *stat = BULKSTAT_RV_NOTHING;
- goto out_free;
+ if (error == -ENOENT || error == -EINVAL)
+ goto out_advance;
+ if (error)
+ goto out;
+
+ /* Reload the incore unlinked list to avoid failure in inodegc. */
+ if (xfs_inode_unlinked_incomplete(ip)) {
+ error = xfs_inode_reload_unlinked_bucket(tp, ip);
+ if (error) {
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ xfs_irele(ip);
+ return error;
+ }
}
ASSERT(ip != NULL);
ASSERT(ip->i_imap.im_blkno != 0);
+ inode = VFS_I(ip);
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
+ vfsgid = i_gid_into_vfsgid(idmap, inode);
- dic = &ip->i_d;
+ /*
+ * If caller wants files from the metadata directories, push out the
+ * bare minimum information for enabling scrub.
+ */
+ if (want_metadir_file(ip, bc->breq)) {
+ memset(buf, 0, sizeof(*buf));
+ buf->bs_ino = ino;
+ buf->bs_gen = inode->i_generation;
+ buf->bs_mode = inode->i_mode & S_IFMT;
+ xfs_bulkstat_health(ip, buf);
+ buf->bs_version = XFS_BULKSTAT_VERSION_V5;
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ xfs_irele(ip);
+
+ error = bc->formatter(bc->breq, buf);
+ if (!error || error == -ECANCELED)
+ goto out_advance;
+ goto out;
+ }
+
+ /* If this is a private inode, don't leak its details to userspace. */
+ if (IS_PRIVATE(inode) || xfs_is_sb_inum(mp, ino)) {
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ xfs_irele(ip);
+ error = -EINVAL;
+ goto out_advance;
+ }
/* xfs_iget returns the following without needing
* further change.
*/
- buf->bs_nlink = dic->di_nlink;
- buf->bs_projid_lo = dic->di_projid_lo;
- buf->bs_projid_hi = dic->di_projid_hi;
+ buf->bs_projectid = ip->i_projid;
buf->bs_ino = ino;
- buf->bs_mode = dic->di_mode;
- buf->bs_uid = dic->di_uid;
- buf->bs_gid = dic->di_gid;
- buf->bs_size = dic->di_size;
- buf->bs_atime.tv_sec = dic->di_atime.t_sec;
- buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
- buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
- buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
- buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
- buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
+ buf->bs_uid = from_kuid(sb_userns, vfsuid_into_kuid(vfsuid));
+ buf->bs_gid = from_kgid(sb_userns, vfsgid_into_kgid(vfsgid));
+ buf->bs_size = ip->i_disk_size;
+
+ buf->bs_nlink = inode->i_nlink;
+ buf->bs_atime = inode_get_atime_sec(inode);
+ buf->bs_atime_nsec = inode_get_atime_nsec(inode);
+ buf->bs_mtime = inode_get_mtime_sec(inode);
+ buf->bs_mtime_nsec = inode_get_mtime_nsec(inode);
+ buf->bs_ctime = inode_get_ctime_sec(inode);
+ buf->bs_ctime_nsec = inode_get_ctime_nsec(inode);
+ buf->bs_gen = inode->i_generation;
+ buf->bs_mode = inode->i_mode;
+
buf->bs_xflags = xfs_ip2xflags(ip);
- buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
- buf->bs_extents = dic->di_nextents;
- buf->bs_gen = dic->di_gen;
- memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
- buf->bs_dmevmask = dic->di_dmevmask;
- buf->bs_dmstate = dic->di_dmstate;
- buf->bs_aextents = dic->di_anextents;
- buf->bs_forkoff = XFS_IFORK_BOFF(ip);
-
- switch (dic->di_format) {
+ buf->bs_extsize_blks = ip->i_extsize;
+
+ nextents = xfs_ifork_nextents(&ip->i_df);
+ if (!(bc->breq->flags & XFS_IBULK_NREXT64))
+ buf->bs_extents = min(nextents, XFS_MAX_EXTCNT_DATA_FORK_SMALL);
+ else
+ buf->bs_extents64 = nextents;
+
+ xfs_bulkstat_health(ip, buf);
+ buf->bs_aextents = xfs_ifork_nextents(&ip->i_af);
+ buf->bs_forkoff = xfs_inode_fork_boff(ip);
+ buf->bs_version = XFS_BULKSTAT_VERSION_V5;
+
+ if (xfs_has_v3inodes(mp)) {
+ buf->bs_btime = ip->i_crtime.tv_sec;
+ buf->bs_btime_nsec = ip->i_crtime.tv_nsec;
+ if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
+ buf->bs_cowextsize_blks = ip->i_cowextsize;
+ }
+
+ switch (ip->i_df.if_format) {
case XFS_DINODE_FMT_DEV:
- buf->bs_rdev = ip->i_df.if_u2.if_rdev;
+ buf->bs_rdev = sysv_encode_dev(inode->i_rdev);
buf->bs_blksize = BLKDEV_IOSIZE;
buf->bs_blocks = 0;
break;
case XFS_DINODE_FMT_LOCAL:
- case XFS_DINODE_FMT_UUID:
buf->bs_rdev = 0;
buf->bs_blksize = mp->m_sb.sb_blocksize;
buf->bs_blocks = 0;
@@ -130,577 +185,300 @@ xfs_bulkstat_one_int(
case XFS_DINODE_FMT_BTREE:
buf->bs_rdev = 0;
buf->bs_blksize = mp->m_sb.sb_blocksize;
- buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
+ buf->bs_blocks = ip->i_nblocks + ip->i_delayed_blks;
break;
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- IRELE(ip);
-
- error = formatter(buffer, ubsize, ubused, buf);
+ xfs_irele(ip);
- if (!error)
- *stat = BULKSTAT_RV_DIDONE;
+ error = bc->formatter(bc->breq, buf);
+ if (error == -ECANCELED)
+ goto out_advance;
+ if (error)
+ goto out;
- out_free:
- kmem_free(buf);
+out_advance:
+ /*
+ * Advance the cursor to the inode that comes after the one we just
+ * looked at. We want the caller to move along if the bulkstat
+ * information was copied successfully; if we tried to grab the inode
+ * but it's no longer allocated; or if it's internal metadata.
+ */
+ bc->breq->startino = ino + 1;
+out:
return error;
}
-/* Return 0 on success or positive error */
-STATIC int
-xfs_bulkstat_one_fmt(
- void __user *ubuffer,
- int ubsize,
- int *ubused,
- const xfs_bstat_t *buffer)
-{
- if (ubsize < sizeof(*buffer))
- return XFS_ERROR(ENOMEM);
- if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
- return XFS_ERROR(EFAULT);
- if (ubused)
- *ubused = sizeof(*buffer);
- return 0;
-}
-
+/* Bulkstat a single inode. */
int
xfs_bulkstat_one(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- int *ubused, /* bytes used by me */
- int *stat) /* BULKSTAT_RV_... */
+ struct xfs_ibulk *breq,
+ bulkstat_one_fmt_pf formatter)
{
- return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt, ubused, stat);
-}
+ struct xfs_bstat_chunk bc = {
+ .formatter = formatter,
+ .breq = breq,
+ };
+ struct xfs_trans *tp;
+ int error;
+
+ if (breq->idmap != &nop_mnt_idmap) {
+ xfs_warn_ratelimited(breq->mp,
+ "bulkstat not supported inside of idmapped mounts.");
+ return -EINVAL;
+ }
-#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
+ ASSERT(breq->icount == 1);
-/*
- * Return stat information in bulk (by-inode) for the filesystem.
- */
-int /* error status */
-xfs_bulkstat(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastinop, /* last inode returned */
- int *ubcountp, /* size of buffer/count returned */
- bulkstat_one_pf formatter, /* func that'd fill a single buf */
- size_t statstruct_size, /* sizeof struct filling */
- char __user *ubuffer, /* buffer with inode stats */
- int *done) /* 1 if there are more stats to get */
-{
- xfs_agblock_t agbno=0;/* allocation group block number */
- xfs_buf_t *agbp; /* agi header buffer */
- xfs_agi_t *agi; /* agi header data */
- xfs_agino_t agino; /* inode # in allocation group */
- xfs_agnumber_t agno; /* allocation group number */
- int chunkidx; /* current index into inode chunk */
- int clustidx; /* current index into inode cluster */
- xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
- int end_of_ag; /* set if we've seen the ag end */
- int error; /* error code */
- int fmterror;/* bulkstat formatter result */
- int i; /* loop index */
- int icount; /* count of inodes good in irbuf */
- size_t irbsize; /* size of irec buffer in bytes */
- xfs_ino_t ino; /* inode number (filesystem) */
- xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */
- xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
- xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
- xfs_ino_t lastino; /* last inode number returned */
- int nbcluster; /* # of blocks in a cluster */
- int nicluster; /* # of inodes in a cluster */
- int nimask; /* mask for inode clusters */
- int nirbuf; /* size of irbuf */
- int rval; /* return value error code */
- int tmp; /* result value from btree calls */
- int ubcount; /* size of user's buffer */
- int ubleft; /* bytes left in user's buffer */
- char __user *ubufp; /* pointer into user's buffer */
- int ubelem; /* spaces used in user's buffer */
- int ubused; /* bytes used by formatter */
+ bc.buf = kzalloc(sizeof(struct xfs_bulkstat),
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ if (!bc.buf)
+ return -ENOMEM;
/*
- * Get the last inode value, see if there's nothing to do.
+ * Grab an empty transaction so that we can use its recursive buffer
+ * locking abilities to detect cycles in the inobt without deadlocking.
*/
- ino = (xfs_ino_t)*lastinop;
- lastino = ino;
- agno = XFS_INO_TO_AGNO(mp, ino);
- agino = XFS_INO_TO_AGINO(mp, ino);
- if (agno >= mp->m_sb.sb_agcount ||
- ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
- *done = 1;
- *ubcountp = 0;
- return 0;
- }
- if (!ubcountp || *ubcountp <= 0) {
- return EINVAL;
- }
- ubcount = *ubcountp; /* statstruct's */
- ubleft = ubcount * statstruct_size; /* bytes */
- *ubcountp = ubelem = 0;
- *done = 0;
- fmterror = 0;
- ubufp = ubuffer;
- nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ?
- mp->m_sb.sb_inopblock :
- (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
- nimask = ~(nicluster - 1);
- nbcluster = nicluster >> mp->m_sb.sb_inopblog;
- irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
- if (!irbuf)
- return ENOMEM;
-
- nirbuf = irbsize / sizeof(*irbuf);
+ tp = xfs_trans_alloc_empty(breq->mp);
+ error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp,
+ breq->startino, &bc);
+ xfs_trans_cancel(tp);
+ kfree(bc.buf);
/*
- * Loop over the allocation groups, starting from the last
- * inode returned; 0 means start of the allocation group.
- */
- rval = 0;
- while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
- cond_resched();
- error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error) {
- /*
- * Skip this allocation group and go to the next one.
- */
- agno++;
- agino = 0;
- continue;
- }
- agi = XFS_BUF_TO_AGI(agbp);
- /*
- * Allocate and initialize a btree cursor for ialloc btree.
- */
- cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
- irbp = irbuf;
- irbufend = irbuf + nirbuf;
- end_of_ag = 0;
- /*
- * If we're returning in the middle of an allocation group,
- * we need to get the remainder of the chunk we're in.
- */
- if (agino > 0) {
- xfs_inobt_rec_incore_t r;
-
- /*
- * Lookup the inode chunk that this inode lives in.
- */
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE,
- &tmp);
- if (!error && /* no I/O error */
- tmp && /* lookup succeeded */
- /* got the record, should always work */
- !(error = xfs_inobt_get_rec(cur, &r, &i)) &&
- i == 1 &&
- /* this is the right chunk */
- agino < r.ir_startino + XFS_INODES_PER_CHUNK &&
- /* lastino was not last in chunk */
- (chunkidx = agino - r.ir_startino + 1) <
- XFS_INODES_PER_CHUNK &&
- /* there are some left allocated */
- xfs_inobt_maskn(chunkidx,
- XFS_INODES_PER_CHUNK - chunkidx) &
- ~r.ir_free) {
- /*
- * Grab the chunk record. Mark all the
- * uninteresting inodes (because they're
- * before our start point) free.
- */
- for (i = 0; i < chunkidx; i++) {
- if (XFS_INOBT_MASK(i) & ~r.ir_free)
- r.ir_freecount++;
- }
- r.ir_free |= xfs_inobt_maskn(0, chunkidx);
- irbp->ir_startino = r.ir_startino;
- irbp->ir_freecount = r.ir_freecount;
- irbp->ir_free = r.ir_free;
- irbp++;
- agino = r.ir_startino + XFS_INODES_PER_CHUNK;
- icount = XFS_INODES_PER_CHUNK - r.ir_freecount;
- } else {
- /*
- * If any of those tests failed, bump the
- * inode number (just in case).
- */
- agino++;
- icount = 0;
- }
- /*
- * In any case, increment to the next record.
- */
- if (!error)
- error = xfs_btree_increment(cur, 0, &tmp);
- } else {
- /*
- * Start of ag. Lookup the first inode chunk.
- */
- error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
- icount = 0;
- }
- /*
- * Loop through inode btree records in this ag,
- * until we run out of inodes or space in the buffer.
- */
- while (irbp < irbufend && icount < ubcount) {
- xfs_inobt_rec_incore_t r;
-
- /*
- * Loop as long as we're unable to read the
- * inode btree.
- */
- while (error) {
- agino += XFS_INODES_PER_CHUNK;
- if (XFS_AGINO_TO_AGBNO(mp, agino) >=
- be32_to_cpu(agi->agi_length))
- break;
- error = xfs_inobt_lookup(cur, agino,
- XFS_LOOKUP_GE, &tmp);
- cond_resched();
- }
- /*
- * If ran off the end of the ag either with an error,
- * or the normal way, set end and stop collecting.
- */
- if (error) {
- end_of_ag = 1;
- break;
- }
-
- error = xfs_inobt_get_rec(cur, &r, &i);
- if (error || i == 0) {
- end_of_ag = 1;
- break;
- }
-
- /*
- * If this chunk has any allocated inodes, save it.
- * Also start read-ahead now for this chunk.
- */
- if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
- struct blk_plug plug;
- /*
- * Loop over all clusters in the next chunk.
- * Do a readahead if there are any allocated
- * inodes in that cluster.
- */
- blk_start_plug(&plug);
- agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);
- for (chunkidx = 0;
- chunkidx < XFS_INODES_PER_CHUNK;
- chunkidx += nicluster,
- agbno += nbcluster) {
- if (xfs_inobt_maskn(chunkidx, nicluster)
- & ~r.ir_free)
- xfs_btree_reada_bufs(mp, agno,
- agbno, nbcluster,
- &xfs_inode_buf_ops);
- }
- blk_finish_plug(&plug);
- irbp->ir_startino = r.ir_startino;
- irbp->ir_freecount = r.ir_freecount;
- irbp->ir_free = r.ir_free;
- irbp++;
- icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
- }
- /*
- * Set agino to after this chunk and bump the cursor.
- */
- agino = r.ir_startino + XFS_INODES_PER_CHUNK;
- error = xfs_btree_increment(cur, 0, &tmp);
- cond_resched();
- }
- /*
- * Drop the btree buffers and the agi buffer.
- * We can't hold any of the locks these represent
- * when calling iget.
- */
- xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
- xfs_buf_relse(agbp);
- /*
- * Now format all the good inodes into the user's buffer.
- */
- irbufend = irbp;
- for (irbp = irbuf;
- irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) {
- /*
- * Now process this chunk of inodes.
- */
- for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
- XFS_BULKSTAT_UBLEFT(ubleft) &&
- irbp->ir_freecount < XFS_INODES_PER_CHUNK;
- chunkidx++, clustidx++, agino++) {
- ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
-
- ino = XFS_AGINO_TO_INO(mp, agno, agino);
- /*
- * Skip if this inode is free.
- */
- if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
- lastino = ino;
- continue;
- }
- /*
- * Count used inodes as free so we can tell
- * when the chunk is used up.
- */
- irbp->ir_freecount++;
-
- /*
- * Get the inode and fill in a single buffer.
- */
- ubused = statstruct_size;
- error = formatter(mp, ino, ubufp, ubleft,
- &ubused, &fmterror);
- if (fmterror == BULKSTAT_RV_NOTHING) {
- if (error && error != ENOENT &&
- error != EINVAL) {
- ubleft = 0;
- rval = error;
- break;
- }
- lastino = ino;
- continue;
- }
- if (fmterror == BULKSTAT_RV_GIVEUP) {
- ubleft = 0;
- ASSERT(error);
- rval = error;
- break;
- }
- if (ubufp)
- ubufp += ubused;
- ubleft -= ubused;
- ubelem++;
- lastino = ino;
- }
-
- cond_resched();
- }
- /*
- * Set up for the next loop iteration.
- */
- if (XFS_BULKSTAT_UBLEFT(ubleft)) {
- if (end_of_ag) {
- agno++;
- agino = 0;
- } else
- agino = XFS_INO_TO_AGINO(mp, lastino);
- } else
- break;
- }
- /*
- * Done, we're either out of filesystem or space to put the data.
+ * If we reported one inode to userspace then we abort because we hit
+ * the end of the buffer. Don't leak that back to userspace.
*/
- kmem_free_large(irbuf);
- *ubcountp = ubelem;
- /*
- * Found some inodes, return them now and return the error next time.
- */
- if (ubelem)
- rval = 0;
- if (agno >= mp->m_sb.sb_agcount) {
- /*
- * If we ran out of filesystem, mark lastino as off
- * the end of the filesystem, so the next call
- * will return immediately.
- */
- *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
- *done = 1;
- } else
- *lastinop = (xfs_ino_t)lastino;
-
- return rval;
+ if (error == -ECANCELED)
+ error = 0;
+
+ return error;
+}
+
+static int
+xfs_bulkstat_iwalk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ void *data)
+{
+ struct xfs_bstat_chunk *bc = data;
+ int error;
+
+ error = xfs_bulkstat_one_int(mp, bc->breq->idmap, tp, ino, data);
+ /* bulkstat just skips over missing inodes */
+ if (error == -ENOENT || error == -EINVAL)
+ return 0;
+ return error;
}
/*
- * Return stat information in bulk (by-inode) for the filesystem.
- * Special case for non-sequential one inode bulkstat.
+ * Check the incoming lastino parameter.
+ *
+ * We allow any inode value that could map to physical space inside the
+ * filesystem because if there are no inodes there, bulkstat moves on to the
+ * next chunk. In other words, the magic agino value of zero takes us to the
+ * first chunk in the AG, and an agino value past the end of the AG takes us to
+ * the first chunk in the next AG.
+ *
+ * Therefore we can end early if the requested inode is beyond the end of the
+ * filesystem or doesn't map properly.
*/
-int /* error status */
-xfs_bulkstat_single(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastinop, /* inode to return */
- char __user *buffer, /* buffer with inode stats */
- int *done) /* 1 if there are more stats to get */
+static inline bool
+xfs_bulkstat_already_done(
+ struct xfs_mount *mp,
+ xfs_ino_t startino)
+{
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino);
+
+ return agno >= mp->m_sb.sb_agcount ||
+ startino != XFS_AGINO_TO_INO(mp, agno, agino);
+}
+
+/* Return stat information in bulk (by-inode) for the filesystem. */
+int
+xfs_bulkstat(
+ struct xfs_ibulk *breq,
+ bulkstat_one_fmt_pf formatter)
{
- int count; /* count value for bulkstat call */
- int error; /* return value */
- xfs_ino_t ino; /* filesystem inode number */
- int res; /* result from bs1 */
+ struct xfs_bstat_chunk bc = {
+ .formatter = formatter,
+ .breq = breq,
+ };
+ struct xfs_trans *tp;
+ int error;
+
+ if (breq->idmap != &nop_mnt_idmap) {
+ xfs_warn_ratelimited(breq->mp,
+ "bulkstat not supported inside of idmapped mounts.");
+ return -EINVAL;
+ }
+ if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+ return 0;
+
+ bc.buf = kzalloc(sizeof(struct xfs_bulkstat),
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ if (!bc.buf)
+ return -ENOMEM;
/*
- * note that requesting valid inode numbers which are not allocated
- * to inodes will most likely cause xfs_imap_to_bp to generate warning
- * messages about bad magic numbers. This is ok. The fact that
- * the inode isn't actually an inode is handled by the
- * error check below. Done this way to make the usual case faster
- * at the expense of the error case.
+ * Grab an empty transaction so that we can use its recursive buffer
+ * locking abilities to detect cycles in the inobt without deadlocking.
*/
+ tp = xfs_trans_alloc_empty(breq->mp);
+ error = xfs_iwalk(breq->mp, tp, breq->startino, breq->iwalk_flags,
+ xfs_bulkstat_iwalk, breq->icount, &bc);
+ xfs_trans_cancel(tp);
+ kfree(bc.buf);
- ino = (xfs_ino_t)*lastinop;
- error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
- if (error) {
- /*
- * Special case way failed, do it the "long" way
- * to see if that works.
- */
- (*lastinop)--;
- count = 1;
- if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
- sizeof(xfs_bstat_t), buffer, done))
- return error;
- if (count == 0 || (xfs_ino_t)*lastinop != ino)
- return error == EFSCORRUPTED ?
- XFS_ERROR(EINVAL) : error;
- else
- return 0;
- }
- *done = 0;
- return 0;
+ /*
+ * We found some inodes, so clear the error status and return them.
+ * The lastino pointer will point directly at the inode that triggered
+ * any error that occurred, so on the next call the error will be
+ * triggered again and propagated to userspace as there will be no
+ * formatted inodes in the buffer.
+ */
+ if (breq->ocount > 0)
+ error = 0;
+
+ return error;
}
-int
-xfs_inumbers_fmt(
- void __user *ubuffer, /* buffer to write to */
- const xfs_inogrp_t *buffer, /* buffer to read from */
- long count, /* # of elements to read */
- long *written) /* # of bytes written */
+/* Convert bulkstat (v5) to bstat (v1). */
+void
+xfs_bulkstat_to_bstat(
+ struct xfs_mount *mp,
+ struct xfs_bstat *bs1,
+ const struct xfs_bulkstat *bstat)
{
- if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
- return -EFAULT;
- *written = count * sizeof(*buffer);
- return 0;
+ /* memset is needed here because of padding holes in the structure. */
+ memset(bs1, 0, sizeof(struct xfs_bstat));
+ bs1->bs_ino = bstat->bs_ino;
+ bs1->bs_mode = bstat->bs_mode;
+ bs1->bs_nlink = bstat->bs_nlink;
+ bs1->bs_uid = bstat->bs_uid;
+ bs1->bs_gid = bstat->bs_gid;
+ bs1->bs_rdev = bstat->bs_rdev;
+ bs1->bs_blksize = bstat->bs_blksize;
+ bs1->bs_size = bstat->bs_size;
+ bs1->bs_atime.tv_sec = bstat->bs_atime;
+ bs1->bs_mtime.tv_sec = bstat->bs_mtime;
+ bs1->bs_ctime.tv_sec = bstat->bs_ctime;
+ bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec;
+ bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec;
+ bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec;
+ bs1->bs_blocks = bstat->bs_blocks;
+ bs1->bs_xflags = bstat->bs_xflags;
+ bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks);
+ bs1->bs_extents = bstat->bs_extents;
+ bs1->bs_gen = bstat->bs_gen;
+ bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF;
+ bs1->bs_forkoff = bstat->bs_forkoff;
+ bs1->bs_projid_hi = bstat->bs_projectid >> 16;
+ bs1->bs_sick = bstat->bs_sick;
+ bs1->bs_checked = bstat->bs_checked;
+ bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks);
+ bs1->bs_dmevmask = 0;
+ bs1->bs_dmstate = 0;
+ bs1->bs_aextents = bstat->bs_aextents;
+}
+
+struct xfs_inumbers_chunk {
+ inumbers_fmt_pf formatter;
+ struct xfs_ibulk *breq;
+};
+
+/*
+ * INUMBERS
+ * ========
+ * This is how we export inode btree records to userspace, so that XFS tools
+ * can figure out where inodes are allocated.
+ */
+
+/*
+ * Format the inode group structure and report it somewhere.
+ *
+ * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk
+ * through the filesystem so we move it forward unless there was a runtime
+ * error. If the formatter tells us the buffer is now full we also move the
+ * cursor forward and abort the walk.
+ */
+STATIC int
+xfs_inumbers_walk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ const struct xfs_inobt_rec_incore *irec,
+ void *data)
+{
+ struct xfs_inumbers inogrp = {
+ .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino),
+ .xi_alloccount = irec->ir_count - irec->ir_freecount,
+ .xi_allocmask = ~irec->ir_free,
+ .xi_version = XFS_INUMBERS_VERSION_V5,
+ };
+ struct xfs_inumbers_chunk *ic = data;
+ int error;
+
+ error = ic->formatter(ic->breq, &inogrp);
+ if (error && error != -ECANCELED)
+ return error;
+
+ ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) +
+ XFS_INODES_PER_CHUNK;
+ return error;
}
/*
* Return inode number table for the filesystem.
*/
-int /* error status */
+int
xfs_inumbers(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastino, /* last inode returned */
- int *count, /* size of buffer/count returned */
- void __user *ubuffer,/* buffer with inode descriptions */
- inumbers_fmt_pf formatter)
+ struct xfs_ibulk *breq,
+ inumbers_fmt_pf formatter)
{
- xfs_buf_t *agbp;
- xfs_agino_t agino;
- xfs_agnumber_t agno;
- int bcount;
- xfs_inogrp_t *buffer;
- int bufidx;
- xfs_btree_cur_t *cur;
- int error;
- xfs_inobt_rec_incore_t r;
- int i;
- xfs_ino_t ino;
- int left;
- int tmp;
-
- ino = (xfs_ino_t)*lastino;
- agno = XFS_INO_TO_AGNO(mp, ino);
- agino = XFS_INO_TO_AGINO(mp, ino);
- left = *count;
- *count = 0;
- bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
- buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
- error = bufidx = 0;
- cur = NULL;
- agbp = NULL;
- while (left > 0 && agno < mp->m_sb.sb_agcount) {
- if (agbp == NULL) {
- error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error) {
- /*
- * If we can't read the AGI of this ag,
- * then just skip to the next one.
- */
- ASSERT(cur == NULL);
- agbp = NULL;
- agno++;
- agino = 0;
- continue;
- }
- cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
- &tmp);
- if (error) {
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- cur = NULL;
- xfs_buf_relse(agbp);
- agbp = NULL;
- /*
- * Move up the last inode in the current
- * chunk. The lookup_ge will always get
- * us the first inode in the next chunk.
- */
- agino += XFS_INODES_PER_CHUNK - 1;
- continue;
- }
- }
- error = xfs_inobt_get_rec(cur, &r, &i);
- if (error || i == 0) {
- xfs_buf_relse(agbp);
- agbp = NULL;
- xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
- cur = NULL;
- agno++;
- agino = 0;
- continue;
- }
- agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
- buffer[bufidx].xi_startino =
- XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
- buffer[bufidx].xi_alloccount =
- XFS_INODES_PER_CHUNK - r.ir_freecount;
- buffer[bufidx].xi_allocmask = ~r.ir_free;
- bufidx++;
- left--;
- if (bufidx == bcount) {
- long written;
- if (formatter(ubuffer, buffer, bufidx, &written)) {
- error = XFS_ERROR(EFAULT);
- break;
- }
- ubuffer += written;
- *count += bufidx;
- bufidx = 0;
- }
- if (left) {
- error = xfs_btree_increment(cur, 0, &tmp);
- if (error) {
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- cur = NULL;
- xfs_buf_relse(agbp);
- agbp = NULL;
- /*
- * The agino value has already been bumped.
- * Just try to skip up to it.
- */
- agino += XFS_INODES_PER_CHUNK;
- continue;
- }
- }
- }
- if (!error) {
- if (bufidx) {
- long written;
- if (formatter(ubuffer, buffer, bufidx, &written))
- error = XFS_ERROR(EFAULT);
- else
- *count += bufidx;
- }
- *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
- }
- kmem_free(buffer);
- if (cur)
- xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
- XFS_BTREE_NOERROR));
- if (agbp)
- xfs_buf_relse(agbp);
+ struct xfs_inumbers_chunk ic = {
+ .formatter = formatter,
+ .breq = breq,
+ };
+ struct xfs_trans *tp;
+ int error = 0;
+
+ if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+ return 0;
+
+ /*
+ * Grab an empty transaction so that we can use its recursive buffer
+ * locking abilities to detect cycles in the inobt without deadlocking.
+ */
+ tp = xfs_trans_alloc_empty(breq->mp);
+ error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->iwalk_flags,
+ xfs_inumbers_walk, breq->icount, &ic);
+ xfs_trans_cancel(tp);
+
+ /*
+ * We found some inode groups, so clear the error status and return
+ * them. The lastino pointer will point directly at the inode that
+ * triggered any error that occurred, so on the next call the error
+ * will be triggered again and propagated to userspace as there will be
+ * no formatted inode groups in the buffer.
+ */
+ if (breq->ocount > 0)
+ error = 0;
+
return error;
}
+
+/* Convert an inumbers (v5) struct to a inogrp (v1) struct. */
+void
+xfs_inumbers_to_inogrp(
+ struct xfs_inogrp *ig1,
+ const struct xfs_inumbers *ig)
+{
+ /* memset is needed here because of padding holes in the structure. */
+ memset(ig1, 0, sizeof(struct xfs_inogrp));
+ ig1->xi_startino = ig->xi_startino;
+ ig1->xi_alloccount = ig->xi_alloccount;
+ ig1->xi_allocmask = ig->xi_allocmask;
+}