summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_ialloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/libxfs/xfs_ialloc.c')
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c905
1 files changed, 568 insertions, 337 deletions
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 5118dedf9267..d97295eaebe6 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -27,6 +27,7 @@
#include "xfs_log.h"
#include "xfs_rmap.h"
#include "xfs_ag.h"
+#include "xfs_health.h"
/*
* Lookup a record by ino in the btree given by cur.
@@ -95,6 +96,61 @@ xfs_inobt_btrec_to_irec(
irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
}
+/* Compute the freecount of an incore inode record. */
+uint8_t
+xfs_inobt_rec_freecount(
+ const struct xfs_inobt_rec_incore *irec)
+{
+ uint64_t realfree = irec->ir_free;
+
+ if (xfs_inobt_issparse(irec->ir_holemask))
+ realfree &= xfs_inobt_irec_to_allocmask(irec);
+ return hweight64(realfree);
+}
+
+/* Simple checks for inode records. */
+xfs_failaddr_t
+xfs_inobt_check_irec(
+ struct xfs_perag *pag,
+ const struct xfs_inobt_rec_incore *irec)
+{
+ /* Record has to be properly aligned within the AG. */
+ if (!xfs_verify_agino(pag, irec->ir_startino))
+ return __this_address;
+ if (!xfs_verify_agino(pag,
+ irec->ir_startino + XFS_INODES_PER_CHUNK - 1))
+ return __this_address;
+ if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT ||
+ irec->ir_count > XFS_INODES_PER_CHUNK)
+ return __this_address;
+ if (irec->ir_freecount > XFS_INODES_PER_CHUNK)
+ return __this_address;
+
+ if (xfs_inobt_rec_freecount(irec) != irec->ir_freecount)
+ return __this_address;
+
+ return NULL;
+}
+
+static inline int
+xfs_inobt_complain_bad_rec(
+ struct xfs_btree_cur *cur,
+ xfs_failaddr_t fa,
+ const struct xfs_inobt_rec_incore *irec)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+
+ xfs_warn(mp,
+ "%sbt record corruption in AG %d detected at %pS!",
+ cur->bc_ops->name, cur->bc_group->xg_gno, fa);
+ xfs_warn(mp,
+"start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
+ irec->ir_startino, irec->ir_count, irec->ir_freecount,
+ irec->ir_free, irec->ir_holemask);
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+}
+
/*
* Get the data from the pointed-to record.
*/
@@ -106,43 +162,19 @@ xfs_inobt_get_rec(
{
struct xfs_mount *mp = cur->bc_mp;
union xfs_btree_rec *rec;
+ xfs_failaddr_t fa;
int error;
- uint64_t realfree;
error = xfs_btree_get_rec(cur, &rec, stat);
if (error || *stat == 0)
return error;
xfs_inobt_btrec_to_irec(mp, rec, irec);
-
- if (!xfs_verify_agino(cur->bc_ag.pag, irec->ir_startino))
- goto out_bad_rec;
- if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT ||
- irec->ir_count > XFS_INODES_PER_CHUNK)
- goto out_bad_rec;
- if (irec->ir_freecount > XFS_INODES_PER_CHUNK)
- goto out_bad_rec;
-
- /* if there are no holes, return the first available offset */
- if (!xfs_inobt_issparse(irec->ir_holemask))
- realfree = irec->ir_free;
- else
- realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec);
- if (hweight64(realfree) != irec->ir_freecount)
- goto out_bad_rec;
+ fa = xfs_inobt_check_irec(to_perag(cur->bc_group), irec);
+ if (fa)
+ return xfs_inobt_complain_bad_rec(cur, fa, irec);
return 0;
-
-out_bad_rec:
- xfs_warn(mp,
- "%s Inode BTree record corruption in AG %d detected!",
- cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free",
- cur->bc_ag.pag->pag_agno);
- xfs_warn(mp,
-"start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
- irec->ir_startino, irec->ir_count, irec->ir_freecount,
- irec->ir_free, irec->ir_holemask);
- return -EFSCORRUPTED;
}
/*
@@ -169,20 +201,22 @@ xfs_inobt_insert_rec(
*/
STATIC int
xfs_inobt_insert(
- struct xfs_mount *mp,
+ struct xfs_perag *pag,
struct xfs_trans *tp,
struct xfs_buf *agbp,
- struct xfs_perag *pag,
xfs_agino_t newino,
xfs_agino_t newlen,
- xfs_btnum_t btnum)
+ bool is_finobt)
{
struct xfs_btree_cur *cur;
xfs_agino_t thisino;
int i;
int error;
- cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum);
+ if (is_finobt)
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
+ else
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
for (thisino = newino;
thisino < newino + newlen;
@@ -241,8 +275,10 @@ xfs_check_agi_freecount(
}
} while (i == 1);
- if (!xfs_is_shutdown(cur->bc_mp))
- ASSERT(freecount == cur->bc_ag.pag->pagi_freecount);
+ if (!xfs_is_shutdown(cur->bc_mp)) {
+ ASSERT(freecount ==
+ to_perag(cur->bc_group)->pagi_freecount);
+ }
}
return 0;
}
@@ -328,7 +364,7 @@ xfs_ialloc_inode_init(
(j * M_IGEO(mp)->blocks_per_cluster));
error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
mp->m_bsize * M_IGEO(mp)->blocks_per_cluster,
- XBF_UNMAPPED, &fbuf);
+ 0, &fbuf);
if (error)
return error;
@@ -498,36 +534,32 @@ __xfs_inobt_rec_merge(
}
/*
- * Insert a new sparse inode chunk into the associated inode btree. The inode
- * record for the sparse chunk is pre-aligned to a startino that should match
- * any pre-existing sparse inode record in the tree. This allows sparse chunks
- * to fill over time.
+ * Insert a new sparse inode chunk into the associated inode allocation btree.
+ * The inode record for the sparse chunk is pre-aligned to a startino that
+ * should match any pre-existing sparse inode record in the tree. This allows
+ * sparse chunks to fill over time.
*
- * This function supports two modes of handling preexisting records depending on
- * the merge flag. If merge is true, the provided record is merged with the
+ * If no preexisting record exists, the provided record is inserted.
+ * If there is a preexisting record, the provided record is merged with the
* existing record and updated in place. The merged record is returned in nrec.
- * If merge is false, an existing record is replaced with the provided record.
- * If no preexisting record exists, the provided record is always inserted.
*
* It is considered corruption if a merge is requested and not possible. Given
* the sparse inode alignment constraints, this should never happen.
*/
STATIC int
xfs_inobt_insert_sprec(
- struct xfs_mount *mp,
+ struct xfs_perag *pag,
struct xfs_trans *tp,
struct xfs_buf *agbp,
- struct xfs_perag *pag,
- int btnum,
- struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */
- bool merge) /* merge or replace */
+ struct xfs_inobt_rec_incore *nrec) /* in/out: new/merged rec. */
{
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
int error;
int i;
struct xfs_inobt_rec_incore rec;
- cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum);
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
/* the new record is pre-aligned so we know where to look */
error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
@@ -541,6 +573,7 @@ xfs_inobt_insert_sprec(
if (error)
goto error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error;
}
@@ -549,45 +582,42 @@ xfs_inobt_insert_sprec(
}
/*
- * A record exists at this startino. Merge or replace the record
- * depending on what we've been asked to do.
+ * A record exists at this startino. Merge the records.
*/
- if (merge) {
- error = xfs_inobt_get_rec(cur, &rec, &i);
- if (error)
- goto error;
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto error;
- }
- if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) {
- error = -EFSCORRUPTED;
- goto error;
- }
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+ if (error)
+ goto error;
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error;
+ }
+ if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error;
+ }
- /*
- * This should never fail. If we have coexisting records that
- * cannot merge, something is seriously wrong.
- */
- if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) {
- error = -EFSCORRUPTED;
- goto error;
- }
+ /*
+ * This should never fail. If we have coexisting records that
+ * cannot merge, something is seriously wrong.
+ */
+ if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error;
+ }
- trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino,
- rec.ir_holemask, nrec->ir_startino,
- nrec->ir_holemask);
+ trace_xfs_irec_merge_pre(pag, &rec, nrec);
- /* merge to nrec to output the updated record */
- __xfs_inobt_rec_merge(nrec, &rec);
+ /* merge to nrec to output the updated record */
+ __xfs_inobt_rec_merge(nrec, &rec);
- trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino,
- nrec->ir_holemask);
+ trace_xfs_irec_merge_post(pag, nrec);
- error = xfs_inobt_rec_check_count(mp, nrec);
- if (error)
- goto error;
- }
+ error = xfs_inobt_rec_check_count(mp, nrec);
+ if (error)
+ goto error;
error = xfs_inobt_update(cur, nrec);
if (error)
@@ -602,6 +632,59 @@ error:
}
/*
+ * Insert a new sparse inode chunk into the free inode btree. The inode
+ * record for the sparse chunk is pre-aligned to a startino that should match
+ * any pre-existing sparse inode record in the tree. This allows sparse chunks
+ * to fill over time.
+ *
+ * The new record is always inserted, overwriting a pre-existing record if
+ * there is one.
+ */
+STATIC int
+xfs_finobt_insert_sprec(
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ struct xfs_inobt_rec_incore *nrec) /* in/out: new rec. */
+{
+ struct xfs_mount *mp = pag_mount(pag);
+ struct xfs_btree_cur *cur;
+ int error;
+ int i;
+
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
+
+ /* the new record is pre-aligned so we know where to look */
+ error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
+ if (error)
+ goto error;
+ /* if nothing there, insert a new record and return */
+ if (i == 0) {
+ error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
+ nrec->ir_count, nrec->ir_freecount,
+ nrec->ir_free, &i);
+ if (error)
+ goto error;
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error;
+ }
+ } else {
+ error = xfs_inobt_update(cur, nrec);
+ if (error)
+ goto error;
+ }
+
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+error:
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
+
+
+/*
* Allocate new inodes in the allocation group specified by agbp. Returns 0 if
* inodes were allocated in this AG; -EAGAIN if there was no space in this AG so
* the caller knows it can try another AG, a hard -ENOSPC when over the maximum
@@ -609,9 +692,9 @@ error:
*/
STATIC int
xfs_ialloc_ag_alloc(
+ struct xfs_perag *pag,
struct xfs_trans *tp,
- struct xfs_buf *agbp,
- struct xfs_perag *pag)
+ struct xfs_buf *agbp)
{
struct xfs_agi *agi;
struct xfs_alloc_arg args;
@@ -631,6 +714,7 @@ xfs_ialloc_ag_alloc(
args.mp = tp->t_mountp;
args.fsbno = NULLFSBLOCK;
args.oinfo = XFS_RMAP_OINFO_INODES;
+ args.pag = pag;
#ifdef DEBUG
/* randomly do sparse inode allocations */
@@ -662,8 +746,6 @@ xfs_ialloc_ag_alloc(
goto sparse_alloc;
if (likely(newino != NULLAGINO &&
(args.agbno < be32_to_cpu(agi->agi_length)))) {
- args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
- args.type = XFS_ALLOCTYPE_THIS_BNO;
args.prod = 1;
/*
@@ -684,7 +766,9 @@ xfs_ialloc_ag_alloc(
/* Allow space for the inode btree to split. */
args.minleft = igeo->inobt_maxlevels;
- if ((error = xfs_alloc_vextent(&args)))
+ error = xfs_alloc_vextent_exact_bno(&args,
+ xfs_agbno_to_fsb(pag, args.agbno));
+ if (error)
return error;
/*
@@ -717,22 +801,17 @@ xfs_ialloc_ag_alloc(
} else
args.alignment = igeo->cluster_align;
/*
- * Need to figure out where to allocate the inode blocks.
- * Ideally they should be spaced out through the a.g.
- * For now, just allocate blocks up front.
- */
- args.agbno = be32_to_cpu(agi->agi_root);
- args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
- /*
* Allocate a fixed-size extent of inodes.
*/
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.prod = 1;
/*
* Allow space for the inode btree to split.
*/
args.minleft = igeo->inobt_maxlevels;
- if ((error = xfs_alloc_vextent(&args)))
+ error = xfs_alloc_vextent_near_bno(&args,
+ xfs_agbno_to_fsb(pag,
+ be32_to_cpu(agi->agi_root)));
+ if (error)
return error;
}
@@ -741,11 +820,11 @@ xfs_ialloc_ag_alloc(
* alignment.
*/
if (isaligned && args.fsbno == NULLFSBLOCK) {
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
- args.agbno = be32_to_cpu(agi->agi_root);
- args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
args.alignment = igeo->cluster_align;
- if ((error = xfs_alloc_vextent(&args)))
+ error = xfs_alloc_vextent_near_bno(&args,
+ xfs_agbno_to_fsb(pag,
+ be32_to_cpu(agi->agi_root)));
+ if (error)
return error;
}
@@ -757,9 +836,6 @@ xfs_ialloc_ag_alloc(
igeo->ialloc_min_blks < igeo->ialloc_blks &&
args.fsbno == NULLFSBLOCK) {
sparse_alloc:
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
- args.agbno = be32_to_cpu(agi->agi_root);
- args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno);
args.alignment = args.mp->m_sb.sb_spino_align;
args.prod = 1;
@@ -777,11 +853,14 @@ sparse_alloc:
* the end of the AG.
*/
args.min_agbno = args.mp->m_sb.sb_inoalignmt;
- args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
+ args.max_agbno = round_down(xfs_ag_block_count(args.mp,
+ pag_agno(pag)),
args.mp->m_sb.sb_inoalignmt) -
igeo->ialloc_blks;
- error = xfs_alloc_vextent(&args);
+ error = xfs_alloc_vextent_near_bno(&args,
+ xfs_agbno_to_fsb(pag,
+ be32_to_cpu(agi->agi_root)));
if (error)
return error;
@@ -804,7 +883,7 @@ sparse_alloc:
* rather than a linear progression to prevent the next generation
* number from being easily guessable.
*/
- error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno,
+ error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag_agno(pag),
args.agbno, args.len, get_random_u32());
if (error)
@@ -831,13 +910,11 @@ sparse_alloc:
* if necessary. If a merge does occur, rec is updated to the
* merged record.
*/
- error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag,
- XFS_BTNUM_INO, &rec, true);
+ error = xfs_inobt_insert_sprec(pag, tp, agbp, &rec);
if (error == -EFSCORRUPTED) {
xfs_alert(args.mp,
"invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
- XFS_AGINO_TO_INO(args.mp, pag->pag_agno,
- rec.ir_startino),
+ xfs_agino_to_ino(pag, rec.ir_startino),
rec.ir_holemask, rec.ir_count);
xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
}
@@ -856,21 +933,19 @@ sparse_alloc:
* existing record with this one.
*/
if (xfs_has_finobt(args.mp)) {
- error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag,
- XFS_BTNUM_FINO, &rec, false);
+ error = xfs_finobt_insert_sprec(pag, tp, agbp, &rec);
if (error)
return error;
}
} else {
/* full chunk - insert new records to both btrees */
- error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino, newlen,
- XFS_BTNUM_INO);
+ error = xfs_inobt_insert(pag, tp, agbp, newino, newlen, false);
if (error)
return error;
if (xfs_has_finobt(args.mp)) {
- error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino,
- newlen, XFS_BTNUM_FINO);
+ error = xfs_inobt_insert(pag, tp, agbp, newino,
+ newlen, true);
if (error)
return error;
}
@@ -923,8 +998,10 @@ xfs_ialloc_next_rec(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
}
return 0;
@@ -948,8 +1025,10 @@ xfs_ialloc_get_rec(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
}
return 0;
@@ -977,13 +1056,40 @@ xfs_inobt_first_free_inode(
}
/*
+ * If this AG has corrupt inodes, check if allocating this inode would fail
+ * with corruption errors. Returns 0 if we're clear, or EAGAIN to try again
+ * somewhere else.
+ */
+static int
+xfs_dialloc_check_ino(
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ xfs_ino_t ino)
+{
+ struct xfs_imap imap;
+ struct xfs_buf *bp;
+ int error;
+
+ error = xfs_imap(pag, tp, ino, &imap, 0);
+ if (error)
+ return -EAGAIN;
+
+ error = xfs_imap_to_bp(pag_mount(pag), tp, &imap, &bp);
+ if (error)
+ return -EAGAIN;
+
+ xfs_trans_brelse(tp, bp);
+ return 0;
+}
+
+/*
* Allocate an inode using the inobt-only algorithm.
*/
STATIC int
xfs_dialloc_ag_inobt(
+ struct xfs_perag *pag,
struct xfs_trans *tp,
struct xfs_buf *agbp,
- struct xfs_perag *pag,
xfs_ino_t parent,
xfs_ino_t *inop)
{
@@ -999,12 +1105,12 @@ xfs_dialloc_ag_inobt(
int i, j;
int searchdistance = 10;
- ASSERT(pag->pagi_init);
- ASSERT(pag->pagi_inodeok);
+ ASSERT(xfs_perag_initialised_agi(pag));
+ ASSERT(xfs_perag_allows_inodes(pag));
ASSERT(pag->pagi_freecount > 0);
restart_pagno:
- cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO);
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
/*
* If pagino is 0 (this is the root inode allocation) use newino.
* This must work because we've just allocated some.
@@ -1019,7 +1125,7 @@ xfs_dialloc_ag_inobt(
/*
* If in the same AG as the parent, try to get near the parent.
*/
- if (pagno == pag->pag_agno) {
+ if (pagno == pag_agno(pag)) {
int doneleft; /* done, to the left */
int doneright; /* done, to the right */
@@ -1027,6 +1133,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1035,6 +1142,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, j != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1193,6 +1301,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1202,6 +1311,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1211,6 +1321,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1222,7 +1333,14 @@ alloc_inode:
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
- ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+ ino = xfs_agino_to_ino(pag, rec.ir_startino + offset);
+
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error0;
+ }
+
rec.ir_free &= ~XFS_INOBT_MASK(offset);
rec.ir_freecount--;
error = xfs_inobt_update(cur, &rec);
@@ -1271,8 +1389,10 @@ xfs_dialloc_ag_finobt_near(
error = xfs_inobt_get_rec(lcur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(lcur);
return -EFSCORRUPTED;
+ }
/*
* See if we've landed in the parent inode record. The finobt
@@ -1296,12 +1416,14 @@ xfs_dialloc_ag_finobt_near(
if (error)
goto error_rcur;
if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) {
+ xfs_btree_mark_sick(lcur);
error = -EFSCORRUPTED;
goto error_rcur;
}
}
if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) {
+ xfs_btree_mark_sick(lcur);
error = -EFSCORRUPTED;
goto error_rcur;
}
@@ -1357,8 +1479,10 @@ xfs_dialloc_ag_finobt_newino(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
return 0;
}
}
@@ -1369,14 +1493,18 @@ xfs_dialloc_ag_finobt_newino(
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
return 0;
}
@@ -1398,14 +1526,18 @@ xfs_dialloc_ag_update_inobt(
error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
@@ -1414,8 +1546,10 @@ xfs_dialloc_ag_update_inobt(
if (XFS_IS_CORRUPT(cur->bc_mp,
rec.ir_free != frec->ir_free ||
- rec.ir_freecount != frec->ir_freecount))
+ rec.ir_freecount != frec->ir_freecount)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
return xfs_inobt_update(cur, &rec);
}
@@ -1429,9 +1563,9 @@ xfs_dialloc_ag_update_inobt(
*/
static int
xfs_dialloc_ag(
+ struct xfs_perag *pag,
struct xfs_trans *tp,
struct xfs_buf *agbp,
- struct xfs_perag *pag,
xfs_ino_t parent,
xfs_ino_t *inop)
{
@@ -1448,7 +1582,7 @@ xfs_dialloc_ag(
int i;
if (!xfs_has_finobt(mp))
- return xfs_dialloc_ag_inobt(tp, agbp, pag, parent, inop);
+ return xfs_dialloc_ag_inobt(pag, tp, agbp, parent, inop);
/*
* If pagino is 0 (this is the root inode allocation) use newino.
@@ -1457,7 +1591,7 @@ xfs_dialloc_ag(
if (!pagino)
pagino = be32_to_cpu(agi->agi_newino);
- cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO);
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
error = xfs_check_agi_freecount(cur);
if (error)
@@ -1468,7 +1602,7 @@ xfs_dialloc_ag(
* parent. If so, find the closest available inode to the parent. If
* not, consider the agi hint or find the first free inode in the AG.
*/
- if (pag->pag_agno == pagno)
+ if (pag_agno(pag) == pagno)
error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
else
error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
@@ -1480,7 +1614,13 @@ xfs_dialloc_ag(
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
- ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+ ino = xfs_agino_to_ino(pag, rec.ir_startino + offset);
+
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error_cur;
+ }
/*
* Modify or remove the finobt record.
@@ -1500,7 +1640,7 @@ xfs_dialloc_ag(
* the original freecount. If all is well, make the equivalent update to
* the inobt using the finobt record and offset information.
*/
- icur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO);
+ icur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_check_agi_freecount(icur);
if (error)
@@ -1577,25 +1717,10 @@ xfs_dialloc_roll(
return error;
}
-static xfs_agnumber_t
-xfs_ialloc_next_ag(
- xfs_mount_t *mp)
-{
- xfs_agnumber_t agno;
-
- spin_lock(&mp->m_agirotor_lock);
- agno = mp->m_agirotor;
- if (++mp->m_agirotor >= mp->m_maxagi)
- mp->m_agirotor = 0;
- spin_unlock(&mp->m_agirotor_lock);
-
- return agno;
-}
-
static bool
xfs_dialloc_good_ag(
- struct xfs_trans *tp,
struct xfs_perag *pag,
+ struct xfs_trans *tp,
umode_t mode,
int flags,
bool ok_alloc)
@@ -1606,11 +1731,13 @@ xfs_dialloc_good_ag(
int needspace;
int error;
- if (!pag->pagi_inodeok)
+ if (!pag)
+ return false;
+ if (!xfs_perag_allows_inodes(pag))
return false;
- if (!pag->pagi_init) {
- error = xfs_ialloc_read_agi(pag, tp, NULL);
+ if (!xfs_perag_initialised_agi(pag)) {
+ error = xfs_ialloc_read_agi(pag, tp, 0, NULL);
if (error)
return false;
}
@@ -1620,7 +1747,7 @@ xfs_dialloc_good_ag(
if (!ok_alloc)
return false;
- if (!pag->pagf_init) {
+ if (!xfs_perag_initialised_agf(pag)) {
error = xfs_alloc_read_agf(pag, tp, flags, NULL);
if (error)
return false;
@@ -1665,8 +1792,8 @@ xfs_dialloc_good_ag(
static int
xfs_dialloc_try_ag(
- struct xfs_trans **tpp,
struct xfs_perag *pag,
+ struct xfs_trans **tpp,
xfs_ino_t parent,
xfs_ino_t *new_ino,
bool ok_alloc)
@@ -1679,7 +1806,7 @@ xfs_dialloc_try_ag(
* Then read in the AGI buffer and recheck with the AGI buffer
* lock held.
*/
- error = xfs_ialloc_read_agi(pag, *tpp, &agbp);
+ error = xfs_ialloc_read_agi(pag, *tpp, 0, &agbp);
if (error)
return error;
@@ -1689,7 +1816,7 @@ xfs_dialloc_try_ag(
goto out_release;
}
- error = xfs_ialloc_ag_alloc(*tpp, agbp, pag);
+ error = xfs_ialloc_ag_alloc(pag, *tpp, agbp);
if (error < 0)
goto out_release;
@@ -1705,7 +1832,7 @@ xfs_dialloc_try_ag(
}
/* Allocate an inode in the found AG */
- error = xfs_dialloc_ag(*tpp, agbp, pag, parent, &ino);
+ error = xfs_dialloc_ag(pag, *tpp, agbp, parent, &ino);
if (!error)
*new_ino = ino;
return error;
@@ -1716,6 +1843,40 @@ out_release:
}
/*
+ * Pick an AG for the new inode.
+ *
+ * Directories, symlinks, and regular files frequently allocate at least one
+ * block, so factor that potential expansion when we examine whether an AG has
+ * enough space for file creation. Try to keep metadata files all in the same
+ * AG.
+ */
+static inline xfs_agnumber_t
+xfs_dialloc_pick_ag(
+ struct xfs_mount *mp,
+ struct xfs_inode *dp,
+ umode_t mode)
+{
+ xfs_agnumber_t start_agno;
+
+ if (!dp)
+ return 0;
+ if (xfs_is_metadir_inode(dp)) {
+ if (mp->m_sb.sb_logstart)
+ return XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
+ return 0;
+ }
+
+ if (S_ISDIR(mode))
+ return (atomic_inc_return(&mp->m_agirotor) - 1) % mp->m_maxagi;
+
+ start_agno = XFS_INO_TO_AGNO(mp, dp->i_ino);
+ if (start_agno >= mp->m_maxagi)
+ start_agno = 0;
+
+ return start_agno;
+}
+
+/*
* Allocate an on-disk inode.
*
* Mode is used to tell whether the new inode is a directory and hence where to
@@ -1726,32 +1887,23 @@ out_release:
int
xfs_dialloc(
struct xfs_trans **tpp,
- xfs_ino_t parent,
- umode_t mode,
+ const struct xfs_icreate_args *args,
xfs_ino_t *new_ino)
{
struct xfs_mount *mp = (*tpp)->t_mountp;
- xfs_agnumber_t agno;
- int error = 0;
- xfs_agnumber_t start_agno;
struct xfs_perag *pag;
struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_ino_t ino = NULLFSINO;
+ xfs_ino_t parent = args->pip ? args->pip->i_ino : 0;
+ xfs_agnumber_t agno;
+ xfs_agnumber_t start_agno;
+ umode_t mode = args->mode & S_IFMT;
bool ok_alloc = true;
+ bool low_space = false;
int flags;
- xfs_ino_t ino;
+ int error = 0;
- /*
- * Directories, symlinks, and regular files frequently allocate at least
- * one block, so factor that potential expansion when we examine whether
- * an AG has enough space for file creation.
- */
- if (S_ISDIR(mode))
- start_agno = xfs_ialloc_next_ag(mp);
- else {
- start_agno = XFS_INO_TO_AGNO(mp, parent);
- if (start_agno >= mp->m_maxagi)
- start_agno = 0;
- }
+ start_agno = xfs_dialloc_pick_ag(mp, args->pip, mode);
/*
* If we have already hit the ceiling of inode blocks then clear
@@ -1768,41 +1920,70 @@ xfs_dialloc(
}
/*
+ * If we are near to ENOSPC, we want to prefer allocation from AGs that
+ * have free inodes in them rather than use up free space allocating new
+ * inode chunks. Hence we turn off allocation for the first non-blocking
+ * pass through the AGs if we are near ENOSPC to consume free inodes
+ * that we can immediately allocate, but then we allow allocation on the
+ * second pass if we fail to find an AG with free inodes in it.
+ */
+ if (xfs_estimate_freecounter(mp, XC_FREE_BLOCKS) <
+ mp->m_low_space[XFS_LOWSP_1_PCNT]) {
+ ok_alloc = false;
+ low_space = true;
+ }
+
+ /*
* Loop until we find an allocation group that either has free inodes
* or in which we can allocate some inodes. Iterate through the
* allocation groups upward, wrapping at the end.
*/
- agno = start_agno;
flags = XFS_ALLOC_FLAG_TRYLOCK;
- for (;;) {
- pag = xfs_perag_get(mp, agno);
- if (xfs_dialloc_good_ag(*tpp, pag, mode, flags, ok_alloc)) {
- error = xfs_dialloc_try_ag(tpp, pag, parent,
+retry:
+ for_each_perag_wrap_at(mp, start_agno, mp->m_maxagi, agno, pag) {
+ if (xfs_dialloc_good_ag(pag, *tpp, mode, flags, ok_alloc)) {
+ error = xfs_dialloc_try_ag(pag, tpp, parent,
&ino, ok_alloc);
if (error != -EAGAIN)
break;
+ error = 0;
}
if (xfs_is_shutdown(mp)) {
error = -EFSCORRUPTED;
break;
}
- if (++agno == mp->m_maxagi)
- agno = 0;
- if (agno == start_agno) {
- if (!flags) {
- error = -ENOSPC;
- break;
- }
+ }
+ if (pag)
+ xfs_perag_rele(pag);
+ if (error)
+ return error;
+ if (ino == NULLFSINO) {
+ if (flags) {
flags = 0;
+ if (low_space)
+ ok_alloc = true;
+ goto retry;
}
- xfs_perag_put(pag);
+ return -ENOSPC;
}
- if (!error)
- *new_ino = ino;
- xfs_perag_put(pag);
- return error;
+ /*
+ * Protect against obviously corrupt allocation btree records. Later
+ * xfs_iget checks will catch re-allocation of other active in-memory
+ * and on-disk inodes. If we don't catch reallocating the parent inode
+ * here we will deadlock in xfs_iget() so we have to do these checks
+ * first.
+ */
+ if (ino == parent || !xfs_verify_dir_ino(mp, ino)) {
+ xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
+ xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino),
+ XFS_SICK_AG_INOBT);
+ return -EFSCORRUPTED;
+ }
+
+ *new_ino = ino;
+ return 0;
}
/*
@@ -1810,10 +1991,10 @@ xfs_dialloc(
* might be sparse and only free the regions that are allocated as part of the
* chunk.
*/
-STATIC void
+static int
xfs_difree_inode_chunk(
struct xfs_trans *tp,
- xfs_agnumber_t agno,
+ struct xfs_perag *pag,
struct xfs_inobt_rec_incore *rec)
{
struct xfs_mount *mp = tp->t_mountp;
@@ -1827,10 +2008,9 @@ xfs_difree_inode_chunk(
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
- M_IGEO(mp)->ialloc_blks,
- &XFS_RMAP_OINFO_INODES);
- return;
+ return xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, sagbno),
+ M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
+ XFS_AG_RESV_NONE, 0);
}
/* holemask is only 16-bits (fits in an unsigned long) */
@@ -1847,6 +2027,8 @@ xfs_difree_inode_chunk(
XFS_INOBT_HOLEMASK_BITS);
nextbit = startidx + 1;
while (startidx < XFS_INOBT_HOLEMASK_BITS) {
+ int error;
+
nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
nextbit);
/*
@@ -1872,8 +2054,11 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno),
- contigblk, &XFS_RMAP_OINFO_INODES);
+ error = xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, agbno),
+ contigblk, &XFS_RMAP_OINFO_INODES,
+ XFS_AG_RESV_NONE, 0);
+ if (error)
+ return error;
/* reset range to current bit and carry on... */
startidx = endidx = nextbit;
@@ -1881,18 +2066,19 @@ xfs_difree_inode_chunk(
next:
nextbit++;
}
+ return 0;
}
STATIC int
xfs_difree_inobt(
- struct xfs_mount *mp,
+ struct xfs_perag *pag,
struct xfs_trans *tp,
struct xfs_buf *agbp,
- struct xfs_perag *pag,
xfs_agino_t agino,
struct xfs_icluster *xic,
struct xfs_inobt_rec_incore *orec)
{
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_agi *agi = agbp->b_addr;
struct xfs_btree_cur *cur;
struct xfs_inobt_rec_incore rec;
@@ -1907,7 +2093,7 @@ xfs_difree_inobt(
/*
* Initialize the cursor.
*/
- cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO);
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_check_agi_freecount(cur);
if (error)
@@ -1922,6 +2108,7 @@ xfs_difree_inobt(
goto error0;
}
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1932,6 +2119,7 @@ xfs_difree_inobt(
goto error0;
}
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1952,13 +2140,10 @@ xfs_difree_inobt(
* remove the chunk if the block size is large enough for multiple inode
* chunks (that might not be free).
*/
- if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
+ if (rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
- struct xfs_perag *pag = agbp->b_pag;
-
xic->deleted = true;
- xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
- rec.ir_startino);
+ xic->first_ino = xfs_agino_to_ino(pag, rec.ir_startino);
xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
/*
@@ -1981,7 +2166,9 @@ xfs_difree_inobt(
goto error0;
}
- xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
+ error = xfs_difree_inode_chunk(tp, pag, &rec);
+ if (error)
+ goto error0;
} else {
xic->deleted = false;
@@ -2019,20 +2206,20 @@ error0:
*/
STATIC int
xfs_difree_finobt(
- struct xfs_mount *mp,
+ struct xfs_perag *pag,
struct xfs_trans *tp,
struct xfs_buf *agbp,
- struct xfs_perag *pag,
xfs_agino_t agino,
struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
{
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
struct xfs_inobt_rec_incore rec;
int offset = agino - ibtrec->ir_startino;
int error;
int i;
- cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO);
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
if (error)
@@ -2044,6 +2231,7 @@ xfs_difree_finobt(
* something is out of sync.
*/
if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error;
}
@@ -2070,6 +2258,7 @@ xfs_difree_finobt(
if (error)
goto error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error;
}
@@ -2080,6 +2269,7 @@ xfs_difree_finobt(
if (XFS_IS_CORRUPT(mp,
rec.ir_free != ibtrec->ir_free ||
rec.ir_freecount != ibtrec->ir_freecount)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error;
}
@@ -2096,7 +2286,7 @@ xfs_difree_finobt(
* enough for multiple chunks. Leave the finobt record to remain in sync
* with the inobt.
*/
- if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
+ if (rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
error = xfs_btree_delete(cur, &i);
if (error)
@@ -2145,31 +2335,31 @@ xfs_difree(
/*
* Break up inode number into its components.
*/
- if (pag->pag_agno != XFS_INO_TO_AGNO(mp, inode)) {
- xfs_warn(mp, "%s: agno != pag->pag_agno (%d != %d).",
- __func__, XFS_INO_TO_AGNO(mp, inode), pag->pag_agno);
+ if (pag_agno(pag) != XFS_INO_TO_AGNO(mp, inode)) {
+ xfs_warn(mp, "%s: agno != pag_agno(pag) (%d != %d).",
+ __func__, XFS_INO_TO_AGNO(mp, inode), pag_agno(pag));
ASSERT(0);
return -EINVAL;
}
agino = XFS_INO_TO_AGINO(mp, inode);
- if (inode != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
- xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
+ if (inode != xfs_agino_to_ino(pag, agino)) {
+ xfs_warn(mp, "%s: inode != xfs_agino_to_ino() (%llu != %llu).",
__func__, (unsigned long long)inode,
- (unsigned long long)XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
+ (unsigned long long)xfs_agino_to_ino(pag, agino));
ASSERT(0);
return -EINVAL;
}
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (agbno >= mp->m_sb.sb_agblocks) {
- xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
- __func__, agbno, mp->m_sb.sb_agblocks);
+ if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) {
+ xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).",
+ __func__, agbno, xfs_ag_block_count(mp, pag_agno(pag)));
ASSERT(0);
return -EINVAL;
}
/*
* Get the allocation group header.
*/
- error = xfs_ialloc_read_agi(pag, tp, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error) {
xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
__func__, error);
@@ -2179,7 +2369,7 @@ xfs_difree(
/*
* Fix up the inode allocation btree.
*/
- error = xfs_difree_inobt(mp, tp, agbp, pag, agino, xic, &rec);
+ error = xfs_difree_inobt(pag, tp, agbp, agino, xic, &rec);
if (error)
goto error0;
@@ -2187,7 +2377,7 @@ xfs_difree(
* Fix up the free inode btree.
*/
if (xfs_has_finobt(mp)) {
- error = xfs_difree_finobt(mp, tp, agbp, pag, agino, &rec);
+ error = xfs_difree_finobt(pag, tp, agbp, agino, &rec);
if (error)
goto error0;
}
@@ -2200,26 +2390,26 @@ error0:
STATIC int
xfs_imap_lookup(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
struct xfs_perag *pag,
+ struct xfs_trans *tp,
xfs_agino_t agino,
xfs_agblock_t agbno,
xfs_agblock_t *chunk_agbno,
xfs_agblock_t *offset_agbno,
int flags)
{
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur;
struct xfs_buf *agbp;
int error;
int i;
- error = xfs_ialloc_read_agi(pag, tp, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error) {
xfs_alert(mp,
"%s: xfs_ialloc_read_agi() returned error %d, agno %d",
- __func__, error, pag->pag_agno);
+ __func__, error, pag_agno(pag));
return error;
}
@@ -2229,7 +2419,7 @@ xfs_imap_lookup(
* we have a record, we need to ensure it contains the inode number
* we are looking up.
*/
- cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO);
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
if (!error) {
if (i)
@@ -2263,12 +2453,13 @@ xfs_imap_lookup(
*/
int
xfs_imap(
- struct xfs_mount *mp, /* file system mount structure */
- struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
xfs_ino_t ino, /* inode to locate */
struct xfs_imap *imap, /* location map structure */
uint flags) /* flags for inode btree lookup */
{
+ struct xfs_mount *mp = pag_mount(pag);
xfs_agblock_t agbno; /* block number of inode in the alloc group */
xfs_agino_t agino; /* inode number within alloc group */
xfs_agblock_t chunk_agbno; /* first block in inode chunk */
@@ -2276,18 +2467,16 @@ xfs_imap(
int error; /* error code */
int offset; /* index of inode in its buffer */
xfs_agblock_t offset_agbno; /* blks from chunk start to inode */
- struct xfs_perag *pag;
ASSERT(ino != NULLFSINO);
/*
* Split up the inode number into its parts.
*/
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
agino = XFS_INO_TO_AGINO(mp, ino);
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (!pag || agbno >= mp->m_sb.sb_agblocks ||
- ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+ if (agbno >= xfs_ag_block_count(mp, pag_agno(pag)) ||
+ ino != xfs_agino_to_ino(pag, agino)) {
error = -EINVAL;
#ifdef DEBUG
/*
@@ -2295,28 +2484,23 @@ xfs_imap(
* as they can be invalid without implying corruption.
*/
if (flags & XFS_IGET_UNTRUSTED)
- goto out_drop;
- if (!pag) {
- xfs_alert(mp,
- "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
- __func__, XFS_INO_TO_AGNO(mp, ino),
- mp->m_sb.sb_agcount);
- }
- if (agbno >= mp->m_sb.sb_agblocks) {
+ return error;
+ if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) {
xfs_alert(mp,
"%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
__func__, (unsigned long long)agbno,
- (unsigned long)mp->m_sb.sb_agblocks);
+ (unsigned long)xfs_ag_block_count(mp,
+ pag_agno(pag)));
}
- if (pag && ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+ if (ino != xfs_agino_to_ino(pag, agino)) {
xfs_alert(mp,
- "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
+ "%s: ino (0x%llx) != xfs_agino_to_ino() (0x%llx)",
__func__, ino,
- XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
+ xfs_agino_to_ino(pag, agino));
}
xfs_stack_trace();
#endif /* DEBUG */
- goto out_drop;
+ return error;
}
/*
@@ -2327,10 +2511,10 @@ xfs_imap(
* in all cases where an untrusted inode number is passed.
*/
if (flags & XFS_IGET_UNTRUSTED) {
- error = xfs_imap_lookup(mp, tp, pag, agino, agbno,
+ error = xfs_imap_lookup(pag, tp, agino, agbno,
&chunk_agbno, &offset_agbno, flags);
if (error)
- goto out_drop;
+ return error;
goto out_map;
}
@@ -2342,12 +2526,11 @@ xfs_imap(
offset = XFS_INO_TO_OFFSET(mp, ino);
ASSERT(offset < mp->m_sb.sb_inopblock);
- imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, agbno);
+ imap->im_blkno = xfs_agbno_to_daddr(pag, agbno);
imap->im_len = XFS_FSB_TO_BB(mp, 1);
imap->im_boffset = (unsigned short)(offset <<
mp->m_sb.sb_inodelog);
- error = 0;
- goto out_drop;
+ return 0;
}
/*
@@ -2359,10 +2542,10 @@ xfs_imap(
offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
chunk_agbno = agbno - offset_agbno;
} else {
- error = xfs_imap_lookup(mp, tp, pag, agino, agbno,
+ error = xfs_imap_lookup(pag, tp, agino, agbno,
&chunk_agbno, &offset_agbno, flags);
if (error)
- goto out_drop;
+ return error;
}
out_map:
@@ -2373,7 +2556,7 @@ out_map:
offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
XFS_INO_TO_OFFSET(mp, ino);
- imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, cluster_agbno);
+ imap->im_blkno = xfs_agbno_to_daddr(pag, cluster_agbno);
imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
@@ -2390,14 +2573,9 @@ out_map:
__func__, (unsigned long long) imap->im_blkno,
(unsigned long long) imap->im_len,
XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
- error = -EINVAL;
- goto out_drop;
+ return -EINVAL;
}
- error = 0;
-out_drop:
- if (pag)
- xfs_perag_put(pag);
- return error;
+ return 0;
}
/*
@@ -2469,11 +2647,14 @@ xfs_ialloc_log_agi(
static xfs_failaddr_t
xfs_agi_verify(
- struct xfs_buf *bp)
+ struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_mount;
- struct xfs_agi *agi = bp->b_addr;
- int i;
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_agi *agi = bp->b_addr;
+ xfs_failaddr_t fa;
+ uint32_t agi_seqno = be32_to_cpu(agi->agi_seqno);
+ uint32_t agi_length = be32_to_cpu(agi->agi_length);
+ int i;
if (xfs_has_crc(mp)) {
if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
@@ -2490,6 +2671,10 @@ xfs_agi_verify(
if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
return __this_address;
+ fa = xfs_validate_ag_length(bp, agi_seqno, agi_length);
+ if (fa)
+ return fa;
+
if (be32_to_cpu(agi->agi_level) < 1 ||
be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)
return __this_address;
@@ -2499,15 +2684,6 @@ xfs_agi_verify(
be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))
return __this_address;
- /*
- * during growfs operations, the perag is not fully initialised,
- * so we can't use it for any useful checking. growfs ensures we can't
- * use it by using uncached buffers that don't have the perag attached
- * so we can detect and avoid this problem.
- */
- if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
- return __this_address;
-
for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))
continue;
@@ -2530,7 +2706,7 @@ xfs_agi_read_verify(
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
else {
fa = xfs_agi_verify(bp);
- if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
+ if (fa || XFS_TEST_ERROR(mp, XFS_ERRTAG_IALLOC_READ_AGI))
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}
}
@@ -2573,16 +2749,19 @@ int
xfs_read_agi(
struct xfs_perag *pag,
struct xfs_trans *tp,
+ xfs_buf_flags_t flags,
struct xfs_buf **agibpp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
int error;
- trace_xfs_read_agi(pag->pag_mount, pag->pag_agno);
+ trace_xfs_read_agi(pag);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0, agibpp, &xfs_agi_buf_ops);
+ XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGI_DADDR(mp)),
+ XFS_FSS_TO_BB(mp, 1), flags, agibpp, &xfs_agi_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
if (error)
return error;
if (tp)
@@ -2600,31 +2779,57 @@ int
xfs_ialloc_read_agi(
struct xfs_perag *pag,
struct xfs_trans *tp,
+ int flags,
struct xfs_buf **agibpp)
{
struct xfs_buf *agibp;
struct xfs_agi *agi;
int error;
- trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno);
+ trace_xfs_ialloc_read_agi(pag);
- error = xfs_read_agi(pag, tp, &agibp);
+ error = xfs_read_agi(pag, tp,
+ (flags & XFS_IALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
+ &agibp);
if (error)
return error;
agi = agibp->b_addr;
- if (!pag->pagi_init) {
+ if (!xfs_perag_initialised_agi(pag)) {
pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
pag->pagi_count = be32_to_cpu(agi->agi_count);
- pag->pagi_init = 1;
+ set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
}
+#ifdef DEBUG
/*
- * It's possible for these to be out of sync if
- * we are in the middle of a forced shutdown.
+ * It's possible for the AGF to be out of sync if the block device is
+ * silently dropping writes. This can happen in fstests with dmflakey
+ * enabled, which allows the buffer to be cleaned and reclaimed by
+ * memory pressure and then re-read from disk here. We will get a
+ * stale version of the AGF from disk, and nothing good can happen from
+ * here. Hence if we detect this situation, immediately shut down the
+ * filesystem.
+ *
+ * This can also happen if we are already in the middle of a forced
+ * shutdown, so don't bother checking if we are already shut down.
*/
- ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
- xfs_is_shutdown(pag->pag_mount));
+ if (!xfs_is_shutdown(pag_mount(pag))) {
+ bool ok = true;
+
+ ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount);
+ ok &= pag->pagi_count == be32_to_cpu(agi->agi_count);
+
+ if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ xfs_trans_brelse(tp, agibp);
+ xfs_force_shutdown(pag_mount(pag),
+ SHUTDOWN_CORRUPT_ONDISK);
+ return -EFSCORRUPTED;
+ }
+ }
+#endif /* DEBUG */
+
if (agibpp)
*agibpp = agibp;
else
@@ -2632,44 +2837,50 @@ xfs_ialloc_read_agi(
return 0;
}
-/* Is there an inode record covering a given range of inode numbers? */
-int
-xfs_ialloc_has_inode_record(
- struct xfs_btree_cur *cur,
- xfs_agino_t low,
- xfs_agino_t high,
- bool *exists)
+/* How many inodes are backed by inode clusters ondisk? */
+STATIC int
+xfs_ialloc_count_ondisk(
+ struct xfs_btree_cur *cur,
+ xfs_agino_t low,
+ xfs_agino_t high,
+ unsigned int *allocated)
{
struct xfs_inobt_rec_incore irec;
- xfs_agino_t agino;
- uint16_t holemask;
- int has_record;
- int i;
- int error;
+ unsigned int ret = 0;
+ int has_record;
+ int error;
- *exists = false;
error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record);
- while (error == 0 && has_record) {
+ if (error)
+ return error;
+
+ while (has_record) {
+ unsigned int i, hole_idx;
+
error = xfs_inobt_get_rec(cur, &irec, &has_record);
- if (error || irec.ir_startino > high)
+ if (error)
+ return error;
+ if (irec.ir_startino > high)
break;
- agino = irec.ir_startino;
- holemask = irec.ir_holemask;
- for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1,
- i++, agino += XFS_INODES_PER_HOLEMASK_BIT) {
- if (holemask & 1)
+ for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
+ if (irec.ir_startino + i < low)
continue;
- if (agino + XFS_INODES_PER_HOLEMASK_BIT > low &&
- agino <= high) {
- *exists = true;
- return 0;
- }
+ if (irec.ir_startino + i > high)
+ break;
+
+ hole_idx = i / XFS_INODES_PER_HOLEMASK_BIT;
+ if (!(irec.ir_holemask & (1U << hole_idx)))
+ ret++;
}
error = xfs_btree_increment(cur, 0, &has_record);
+ if (error)
+ return error;
}
- return error;
+
+ *allocated = ret;
+ return 0;
}
/* Is there an inode record covering a given extent? */
@@ -2678,15 +2889,27 @@ xfs_ialloc_has_inodes_at_extent(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
xfs_extlen_t len,
- bool *exists)
+ enum xbtree_recpacking *outcome)
{
- xfs_agino_t low;
- xfs_agino_t high;
+ xfs_agino_t agino;
+ xfs_agino_t last_agino;
+ unsigned int allocated;
+ int error;
+
+ agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno);
+ last_agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1;
- low = XFS_AGB_TO_AGINO(cur->bc_mp, bno);
- high = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1;
+ error = xfs_ialloc_count_ondisk(cur, agino, last_agino, &allocated);
+ if (error)
+ return error;
- return xfs_ialloc_has_inode_record(cur, low, high, exists);
+ if (allocated == 0)
+ *outcome = XBTREE_RECPACKING_EMPTY;
+ else if (allocated == last_agino - agino + 1)
+ *outcome = XBTREE_RECPACKING_FULL;
+ else
+ *outcome = XBTREE_RECPACKING_SPARSE;
+ return 0;
}
struct xfs_ialloc_count_inodes {
@@ -2703,8 +2926,13 @@ xfs_ialloc_count_inodes_rec(
{
struct xfs_inobt_rec_incore irec;
struct xfs_ialloc_count_inodes *ci = priv;
+ xfs_failaddr_t fa;
xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
+ fa = xfs_inobt_check_irec(to_perag(cur->bc_group), &irec);
+ if (fa)
+ return xfs_inobt_complain_bad_rec(cur, fa, &irec);
+
ci->count += irec.ir_count;
ci->freecount += irec.ir_freecount;
@@ -2721,7 +2949,7 @@ xfs_ialloc_count_inodes(
struct xfs_ialloc_count_inodes ci = {0};
int error;
- ASSERT(cur->bc_btnum == XFS_BTNUM_INO);
+ ASSERT(xfs_btree_is_ino(cur->bc_ops));
error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci);
if (error)
return error;
@@ -2762,8 +2990,8 @@ xfs_ialloc_setup_geometry(
/* Compute inode btree geometry. */
igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
- igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
- igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
+ igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, true);
+ igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, false);
igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
@@ -2848,6 +3076,11 @@ xfs_ialloc_setup_geometry(
igeo->ialloc_align = mp->m_dalign;
else
igeo->ialloc_align = 0;
+
+ if (mp->m_sb.sb_blocksize > PAGE_SIZE)
+ igeo->min_folio_order = mp->m_sb.sb_blocklog - PAGE_SHIFT;
+ else
+ igeo->min_folio_order = 0;
}
/* Compute the location of the root directory inode that is laid out by mkfs. */
@@ -2924,26 +3157,24 @@ xfs_ialloc_calc_rootino(
*/
int
xfs_ialloc_check_shrink(
+ struct xfs_perag *pag,
struct xfs_trans *tp,
- xfs_agnumber_t agno,
struct xfs_buf *agibp,
xfs_agblock_t new_length)
{
struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur;
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_perag *pag;
- xfs_agino_t agino = XFS_AGB_TO_AGINO(mp, new_length);
+ xfs_agino_t agino;
int has;
int error;
- if (!xfs_has_sparseinodes(mp))
+ if (!xfs_has_sparseinodes(pag_mount(pag)))
return 0;
- pag = xfs_perag_get(mp, agno);
- cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO);
+ cur = xfs_inobt_init_cursor(pag, tp, agibp);
/* Look up the inobt record that would correspond to the new EOFS. */
+ agino = XFS_AGB_TO_AGINO(pag_mount(pag), new_length);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
if (error || !has)
goto out;
@@ -2953,6 +3184,7 @@ xfs_ialloc_check_shrink(
goto out;
if (!has) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_INOBT);
error = -EFSCORRUPTED;
goto out;
}
@@ -2964,6 +3196,5 @@ xfs_ialloc_check_shrink(
}
out:
xfs_btree_del_cursor(cur, error);
- xfs_perag_put(pag);
return error;
}