diff options
Diffstat (limited to 'fs/xfs/libxfs/xfs_ialloc.c')
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.c | 493 |
1 files changed, 330 insertions, 163 deletions
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 2361a22035b0..f3a840a425f5 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -27,6 +27,7 @@ #include "xfs_log.h" #include "xfs_rmap.h" #include "xfs_ag.h" +#include "xfs_health.h" /* * Lookup a record by ino in the btree given by cur. @@ -140,13 +141,13 @@ xfs_inobt_complain_bad_rec( struct xfs_mount *mp = cur->bc_mp; xfs_warn(mp, - "%s Inode BTree record corruption in AG %d detected at %pS!", - cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free", - cur->bc_ag.pag->pag_agno, fa); + "%sbt record corruption in AG %d detected at %pS!", + cur->bc_ops->name, cur->bc_group->xg_gno, fa); xfs_warn(mp, "start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x", irec->ir_startino, irec->ir_count, irec->ir_freecount, irec->ir_free, irec->ir_holemask); + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } @@ -169,7 +170,7 @@ xfs_inobt_get_rec( return error; xfs_inobt_btrec_to_irec(mp, rec, irec); - fa = xfs_inobt_check_irec(cur->bc_ag.pag, irec); + fa = xfs_inobt_check_irec(to_perag(cur->bc_group), irec); if (fa) return xfs_inobt_complain_bad_rec(cur, fa, irec); @@ -205,14 +206,17 @@ xfs_inobt_insert( struct xfs_buf *agbp, xfs_agino_t newino, xfs_agino_t newlen, - xfs_btnum_t btnum) + bool is_finobt) { struct xfs_btree_cur *cur; xfs_agino_t thisino; int i; int error; - cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum); + if (is_finobt) + cur = xfs_finobt_init_cursor(pag, tp, agbp); + else + cur = xfs_inobt_init_cursor(pag, tp, agbp); for (thisino = newino; thisino < newino + newlen; @@ -271,8 +275,10 @@ xfs_check_agi_freecount( } } while (i == 1); - if (!xfs_is_shutdown(cur->bc_mp)) - ASSERT(freecount == cur->bc_ag.pag->pagi_freecount); + if (!xfs_is_shutdown(cur->bc_mp)) { + ASSERT(freecount == + to_perag(cur->bc_group)->pagi_freecount); + } } return 0; } @@ -528,16 +534,14 @@ __xfs_inobt_rec_merge( } /* - * Insert a new sparse inode chunk into the associated inode btree. The inode - * record for the sparse chunk is pre-aligned to a startino that should match - * any pre-existing sparse inode record in the tree. This allows sparse chunks - * to fill over time. + * Insert a new sparse inode chunk into the associated inode allocation btree. + * The inode record for the sparse chunk is pre-aligned to a startino that + * should match any pre-existing sparse inode record in the tree. This allows + * sparse chunks to fill over time. * - * This function supports two modes of handling preexisting records depending on - * the merge flag. If merge is true, the provided record is merged with the + * If no preexisting record exists, the provided record is inserted. + * If there is a preexisting record, the provided record is merged with the * existing record and updated in place. The merged record is returned in nrec. - * If merge is false, an existing record is replaced with the provided record. - * If no preexisting record exists, the provided record is always inserted. * * It is considered corruption if a merge is requested and not possible. Given * the sparse inode alignment constraints, this should never happen. @@ -547,17 +551,15 @@ xfs_inobt_insert_sprec( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, - int btnum, - struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */ - bool merge) /* merge or replace */ + struct xfs_inobt_rec_incore *nrec) /* in/out: new/merged rec. */ { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; int error; int i; struct xfs_inobt_rec_incore rec; - cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum); + cur = xfs_inobt_init_cursor(pag, tp, agbp); /* the new record is pre-aligned so we know where to look */ error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); @@ -571,6 +573,7 @@ xfs_inobt_insert_sprec( if (error) goto error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error; } @@ -579,45 +582,42 @@ xfs_inobt_insert_sprec( } /* - * A record exists at this startino. Merge or replace the record - * depending on what we've been asked to do. + * A record exists at this startino. Merge the records. */ - if (merge) { - error = xfs_inobt_get_rec(cur, &rec, &i); - if (error) - goto error; - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto error; - } - if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) { - error = -EFSCORRUPTED; - goto error; - } + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error; + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto error; + } + if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto error; + } - /* - * This should never fail. If we have coexisting records that - * cannot merge, something is seriously wrong. - */ - if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) { - error = -EFSCORRUPTED; - goto error; - } + /* + * This should never fail. If we have coexisting records that + * cannot merge, something is seriously wrong. + */ + if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto error; + } - trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino, - rec.ir_holemask, nrec->ir_startino, - nrec->ir_holemask); + trace_xfs_irec_merge_pre(pag, &rec, nrec); - /* merge to nrec to output the updated record */ - __xfs_inobt_rec_merge(nrec, &rec); + /* merge to nrec to output the updated record */ + __xfs_inobt_rec_merge(nrec, &rec); - trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino, - nrec->ir_holemask); + trace_xfs_irec_merge_post(pag, nrec); - error = xfs_inobt_rec_check_count(mp, nrec); - if (error) - goto error; - } + error = xfs_inobt_rec_check_count(mp, nrec); + if (error) + goto error; error = xfs_inobt_update(cur, nrec); if (error) @@ -632,6 +632,59 @@ error: } /* + * Insert a new sparse inode chunk into the free inode btree. The inode + * record for the sparse chunk is pre-aligned to a startino that should match + * any pre-existing sparse inode record in the tree. This allows sparse chunks + * to fill over time. + * + * The new record is always inserted, overwriting a pre-existing record if + * there is one. + */ +STATIC int +xfs_finobt_insert_sprec( + struct xfs_perag *pag, + struct xfs_trans *tp, + struct xfs_buf *agbp, + struct xfs_inobt_rec_incore *nrec) /* in/out: new rec. */ +{ + struct xfs_mount *mp = pag_mount(pag); + struct xfs_btree_cur *cur; + int error; + int i; + + cur = xfs_finobt_init_cursor(pag, tp, agbp); + + /* the new record is pre-aligned so we know where to look */ + error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + goto error; + /* if nothing there, insert a new record and return */ + if (i == 0) { + error = xfs_inobt_insert_rec(cur, nrec->ir_holemask, + nrec->ir_count, nrec->ir_freecount, + nrec->ir_free, &i); + if (error) + goto error; + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto error; + } + } else { + error = xfs_inobt_update(cur, nrec); + if (error) + goto error; + } + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return 0; +error: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + + +/* * Allocate new inodes in the allocation group specified by agbp. Returns 0 if * inodes were allocated in this AG; -EAGAIN if there was no space in this AG so * the caller knows it can try another AG, a hard -ENOSPC when over the maximum @@ -714,8 +767,7 @@ xfs_ialloc_ag_alloc( /* Allow space for the inode btree to split. */ args.minleft = igeo->inobt_maxlevels; error = xfs_alloc_vextent_exact_bno(&args, - XFS_AGB_TO_FSB(args.mp, pag->pag_agno, - args.agbno)); + xfs_agbno_to_fsb(pag, args.agbno)); if (error) return error; @@ -757,8 +809,8 @@ xfs_ialloc_ag_alloc( */ args.minleft = igeo->inobt_maxlevels; error = xfs_alloc_vextent_near_bno(&args, - XFS_AGB_TO_FSB(args.mp, pag->pag_agno, - be32_to_cpu(agi->agi_root))); + xfs_agbno_to_fsb(pag, + be32_to_cpu(agi->agi_root))); if (error) return error; } @@ -770,8 +822,8 @@ xfs_ialloc_ag_alloc( if (isaligned && args.fsbno == NULLFSBLOCK) { args.alignment = igeo->cluster_align; error = xfs_alloc_vextent_near_bno(&args, - XFS_AGB_TO_FSB(args.mp, pag->pag_agno, - be32_to_cpu(agi->agi_root))); + xfs_agbno_to_fsb(pag, + be32_to_cpu(agi->agi_root))); if (error) return error; } @@ -801,13 +853,14 @@ sparse_alloc: * the end of the AG. */ args.min_agbno = args.mp->m_sb.sb_inoalignmt; - args.max_agbno = round_down(args.mp->m_sb.sb_agblocks, + args.max_agbno = round_down(xfs_ag_block_count(args.mp, + pag_agno(pag)), args.mp->m_sb.sb_inoalignmt) - igeo->ialloc_blks; error = xfs_alloc_vextent_near_bno(&args, - XFS_AGB_TO_FSB(args.mp, pag->pag_agno, - be32_to_cpu(agi->agi_root))); + xfs_agbno_to_fsb(pag, + be32_to_cpu(agi->agi_root))); if (error) return error; @@ -830,7 +883,7 @@ sparse_alloc: * rather than a linear progression to prevent the next generation * number from being easily guessable. */ - error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno, + error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag_agno(pag), args.agbno, args.len, get_random_u32()); if (error) @@ -857,13 +910,11 @@ sparse_alloc: * if necessary. If a merge does occur, rec is updated to the * merged record. */ - error = xfs_inobt_insert_sprec(pag, tp, agbp, - XFS_BTNUM_INO, &rec, true); + error = xfs_inobt_insert_sprec(pag, tp, agbp, &rec); if (error == -EFSCORRUPTED) { xfs_alert(args.mp, "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u", - XFS_AGINO_TO_INO(args.mp, pag->pag_agno, - rec.ir_startino), + xfs_agino_to_ino(pag, rec.ir_startino), rec.ir_holemask, rec.ir_count); xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE); } @@ -882,21 +933,19 @@ sparse_alloc: * existing record with this one. */ if (xfs_has_finobt(args.mp)) { - error = xfs_inobt_insert_sprec(pag, tp, agbp, - XFS_BTNUM_FINO, &rec, false); + error = xfs_finobt_insert_sprec(pag, tp, agbp, &rec); if (error) return error; } } else { /* full chunk - insert new records to both btrees */ - error = xfs_inobt_insert(pag, tp, agbp, newino, newlen, - XFS_BTNUM_INO); + error = xfs_inobt_insert(pag, tp, agbp, newino, newlen, false); if (error) return error; if (xfs_has_finobt(args.mp)) { error = xfs_inobt_insert(pag, tp, agbp, newino, - newlen, XFS_BTNUM_FINO); + newlen, true); if (error) return error; } @@ -949,8 +998,10 @@ xfs_ialloc_next_rec( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } } return 0; @@ -974,8 +1025,10 @@ xfs_ialloc_get_rec( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } } return 0; @@ -1003,6 +1056,33 @@ xfs_inobt_first_free_inode( } /* + * If this AG has corrupt inodes, check if allocating this inode would fail + * with corruption errors. Returns 0 if we're clear, or EAGAIN to try again + * somewhere else. + */ +static int +xfs_dialloc_check_ino( + struct xfs_perag *pag, + struct xfs_trans *tp, + xfs_ino_t ino) +{ + struct xfs_imap imap; + struct xfs_buf *bp; + int error; + + error = xfs_imap(pag, tp, ino, &imap, 0); + if (error) + return -EAGAIN; + + error = xfs_imap_to_bp(pag_mount(pag), tp, &imap, &bp); + if (error) + return -EAGAIN; + + xfs_trans_brelse(tp, bp); + return 0; +} + +/* * Allocate an inode using the inobt-only algorithm. */ STATIC int @@ -1030,7 +1110,7 @@ xfs_dialloc_ag_inobt( ASSERT(pag->pagi_freecount > 0); restart_pagno: - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agbp); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -1045,7 +1125,7 @@ xfs_dialloc_ag_inobt( /* * If in the same AG as the parent, try to get near the parent. */ - if (pagno == pag->pag_agno) { + if (pagno == pag_agno(pag)) { int doneleft; /* done, to the left */ int doneright; /* done, to the right */ @@ -1053,6 +1133,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1061,6 +1142,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, j != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1219,6 +1301,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1228,6 +1311,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1237,6 +1321,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1248,7 +1333,14 @@ alloc_inode: ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); - ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset); + ino = xfs_agino_to_ino(pag, rec.ir_startino + offset); + + if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) { + error = xfs_dialloc_check_ino(pag, tp, ino); + if (error) + goto error0; + } + rec.ir_free &= ~XFS_INOBT_MASK(offset); rec.ir_freecount--; error = xfs_inobt_update(cur, &rec); @@ -1297,8 +1389,10 @@ xfs_dialloc_ag_finobt_near( error = xfs_inobt_get_rec(lcur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1)) { + xfs_btree_mark_sick(lcur); return -EFSCORRUPTED; + } /* * See if we've landed in the parent inode record. The finobt @@ -1322,12 +1416,14 @@ xfs_dialloc_ag_finobt_near( if (error) goto error_rcur; if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) { + xfs_btree_mark_sick(lcur); error = -EFSCORRUPTED; goto error_rcur; } } if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) { + xfs_btree_mark_sick(lcur); error = -EFSCORRUPTED; goto error_rcur; } @@ -1383,8 +1479,10 @@ xfs_dialloc_ag_finobt_newino( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } return 0; } } @@ -1395,14 +1493,18 @@ xfs_dialloc_ag_finobt_newino( error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } return 0; } @@ -1424,14 +1526,18 @@ xfs_dialloc_ag_update_inobt( error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } error = xfs_inobt_get_rec(cur, &rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); @@ -1440,8 +1546,10 @@ xfs_dialloc_ag_update_inobt( if (XFS_IS_CORRUPT(cur->bc_mp, rec.ir_free != frec->ir_free || - rec.ir_freecount != frec->ir_freecount)) + rec.ir_freecount != frec->ir_freecount)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } return xfs_inobt_update(cur, &rec); } @@ -1483,7 +1591,7 @@ xfs_dialloc_ag( if (!pagino) pagino = be32_to_cpu(agi->agi_newino); - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO); + cur = xfs_finobt_init_cursor(pag, tp, agbp); error = xfs_check_agi_freecount(cur); if (error) @@ -1494,7 +1602,7 @@ xfs_dialloc_ag( * parent. If so, find the closest available inode to the parent. If * not, consider the agi hint or find the first free inode in the AG. */ - if (pag->pag_agno == pagno) + if (pag_agno(pag) == pagno) error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); else error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); @@ -1506,7 +1614,13 @@ xfs_dialloc_ag( ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); - ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset); + ino = xfs_agino_to_ino(pag, rec.ir_startino + offset); + + if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) { + error = xfs_dialloc_check_ino(pag, tp, ino); + if (error) + goto error_cur; + } /* * Modify or remove the finobt record. @@ -1526,7 +1640,7 @@ xfs_dialloc_ag( * the original freecount. If all is well, make the equivalent update to * the inobt using the finobt record and offset information. */ - icur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); + icur = xfs_inobt_init_cursor(pag, tp, agbp); error = xfs_check_agi_freecount(icur); if (error) @@ -1623,7 +1737,7 @@ xfs_dialloc_good_ag( return false; if (!xfs_perag_initialised_agi(pag)) { - error = xfs_ialloc_read_agi(pag, tp, NULL); + error = xfs_ialloc_read_agi(pag, tp, 0, NULL); if (error) return false; } @@ -1692,7 +1806,7 @@ xfs_dialloc_try_ag( * Then read in the AGI buffer and recheck with the AGI buffer * lock held. */ - error = xfs_ialloc_read_agi(pag, *tpp, &agbp); + error = xfs_ialloc_read_agi(pag, *tpp, 0, &agbp); if (error) return error; @@ -1729,6 +1843,40 @@ out_release: } /* + * Pick an AG for the new inode. + * + * Directories, symlinks, and regular files frequently allocate at least one + * block, so factor that potential expansion when we examine whether an AG has + * enough space for file creation. Try to keep metadata files all in the same + * AG. + */ +static inline xfs_agnumber_t +xfs_dialloc_pick_ag( + struct xfs_mount *mp, + struct xfs_inode *dp, + umode_t mode) +{ + xfs_agnumber_t start_agno; + + if (!dp) + return 0; + if (xfs_is_metadir_inode(dp)) { + if (mp->m_sb.sb_logstart) + return XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); + return 0; + } + + if (S_ISDIR(mode)) + return (atomic_inc_return(&mp->m_agirotor) - 1) % mp->m_maxagi; + + start_agno = XFS_INO_TO_AGNO(mp, dp->i_ino); + if (start_agno >= mp->m_maxagi) + start_agno = 0; + + return start_agno; +} + +/* * Allocate an on-disk inode. * * Mode is used to tell whether the new inode is a directory and hence where to @@ -1739,34 +1887,23 @@ out_release: int xfs_dialloc( struct xfs_trans **tpp, - xfs_ino_t parent, - umode_t mode, + const struct xfs_icreate_args *args, xfs_ino_t *new_ino) { struct xfs_mount *mp = (*tpp)->t_mountp; - xfs_agnumber_t agno; - int error = 0; - xfs_agnumber_t start_agno; struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp); + xfs_ino_t ino = NULLFSINO; + xfs_ino_t parent = args->pip ? args->pip->i_ino : 0; + xfs_agnumber_t agno; + xfs_agnumber_t start_agno; + umode_t mode = args->mode & S_IFMT; bool ok_alloc = true; bool low_space = false; int flags; - xfs_ino_t ino = NULLFSINO; + int error = 0; - /* - * Directories, symlinks, and regular files frequently allocate at least - * one block, so factor that potential expansion when we examine whether - * an AG has enough space for file creation. - */ - if (S_ISDIR(mode)) - start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) % - mp->m_maxagi; - else { - start_agno = XFS_INO_TO_AGNO(mp, parent); - if (start_agno >= mp->m_maxagi) - start_agno = 0; - } + start_agno = xfs_dialloc_pick_ag(mp, args->pip, mode); /* * If we have already hit the ceiling of inode blocks then clear @@ -1830,6 +1967,21 @@ retry: } return -ENOSPC; } + + /* + * Protect against obviously corrupt allocation btree records. Later + * xfs_iget checks will catch re-allocation of other active in-memory + * and on-disk inodes. If we don't catch reallocating the parent inode + * here we will deadlock in xfs_iget() so we have to do these checks + * first. + */ + if (ino == parent || !xfs_verify_dir_ino(mp, ino)) { + xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino); + xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino), + XFS_SICK_AG_INOBT); + return -EFSCORRUPTED; + } + *new_ino = ino; return 0; } @@ -1842,7 +1994,7 @@ retry: static int xfs_difree_inode_chunk( struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, struct xfs_inobt_rec_incore *rec) { struct xfs_mount *mp = tp->t_mountp; @@ -1856,10 +2008,9 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - return xfs_free_extent_later(tp, - XFS_AGB_TO_FSB(mp, agno, sagbno), + return xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, sagbno), M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, - XFS_AG_RESV_NONE, false); + XFS_AG_RESV_NONE, 0); } /* holemask is only 16-bits (fits in an unsigned long) */ @@ -1903,10 +2054,9 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - error = xfs_free_extent_later(tp, - XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, - &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, - false); + error = xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, agbno), + contigblk, &XFS_RMAP_OINFO_INODES, + XFS_AG_RESV_NONE, 0); if (error) return error; @@ -1928,7 +2078,7 @@ xfs_difree_inobt( struct xfs_icluster *xic, struct xfs_inobt_rec_incore *orec) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_agi *agi = agbp->b_addr; struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; @@ -1943,7 +2093,7 @@ xfs_difree_inobt( /* * Initialize the cursor. */ - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agbp); error = xfs_check_agi_freecount(cur); if (error) @@ -1958,6 +2108,7 @@ xfs_difree_inobt( goto error0; } if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1968,6 +2119,7 @@ xfs_difree_inobt( goto error0; } if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1991,8 +2143,7 @@ xfs_difree_inobt( if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE && mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { xic->deleted = true; - xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, - rec.ir_startino); + xic->first_ino = xfs_agino_to_ino(pag, rec.ir_startino); xic->alloc = xfs_inobt_irec_to_allocmask(&rec); /* @@ -2015,7 +2166,7 @@ xfs_difree_inobt( goto error0; } - error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); + error = xfs_difree_inode_chunk(tp, pag, &rec); if (error) goto error0; } else { @@ -2061,14 +2212,14 @@ xfs_difree_finobt( xfs_agino_t agino, struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; int offset = agino - ibtrec->ir_startino; int error; int i; - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO); + cur = xfs_finobt_init_cursor(pag, tp, agbp); error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) @@ -2080,6 +2231,7 @@ xfs_difree_finobt( * something is out of sync. */ if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error; } @@ -2106,6 +2258,7 @@ xfs_difree_finobt( if (error) goto error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error; } @@ -2116,6 +2269,7 @@ xfs_difree_finobt( if (XFS_IS_CORRUPT(mp, rec.ir_free != ibtrec->ir_free || rec.ir_freecount != ibtrec->ir_freecount)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error; } @@ -2181,31 +2335,31 @@ xfs_difree( /* * Break up inode number into its components. */ - if (pag->pag_agno != XFS_INO_TO_AGNO(mp, inode)) { - xfs_warn(mp, "%s: agno != pag->pag_agno (%d != %d).", - __func__, XFS_INO_TO_AGNO(mp, inode), pag->pag_agno); + if (pag_agno(pag) != XFS_INO_TO_AGNO(mp, inode)) { + xfs_warn(mp, "%s: agno != pag_agno(pag) (%d != %d).", + __func__, XFS_INO_TO_AGNO(mp, inode), pag_agno(pag)); ASSERT(0); return -EINVAL; } agino = XFS_INO_TO_AGINO(mp, inode); - if (inode != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { - xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", + if (inode != xfs_agino_to_ino(pag, agino)) { + xfs_warn(mp, "%s: inode != xfs_agino_to_ino() (%llu != %llu).", __func__, (unsigned long long)inode, - (unsigned long long)XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)); + (unsigned long long)xfs_agino_to_ino(pag, agino)); ASSERT(0); return -EINVAL; } agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agbno >= mp->m_sb.sb_agblocks) { - xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", - __func__, agbno, mp->m_sb.sb_agblocks); + if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) { + xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).", + __func__, agbno, xfs_ag_block_count(mp, pag_agno(pag))); ASSERT(0); return -EINVAL; } /* * Get the allocation group header. */ - error = xfs_ialloc_read_agi(pag, tp, &agbp); + error = xfs_ialloc_read_agi(pag, tp, 0, &agbp); if (error) { xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", __func__, error); @@ -2244,18 +2398,18 @@ xfs_imap_lookup( xfs_agblock_t *offset_agbno, int flags) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_inobt_rec_incore rec; struct xfs_btree_cur *cur; struct xfs_buf *agbp; int error; int i; - error = xfs_ialloc_read_agi(pag, tp, &agbp); + error = xfs_ialloc_read_agi(pag, tp, 0, &agbp); if (error) { xfs_alert(mp, "%s: xfs_ialloc_read_agi() returned error %d, agno %d", - __func__, error, pag->pag_agno); + __func__, error, pag_agno(pag)); return error; } @@ -2265,7 +2419,7 @@ xfs_imap_lookup( * we have a record, we need to ensure it contains the inode number * we are looking up. */ - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agbp); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (!error) { if (i) @@ -2305,7 +2459,7 @@ xfs_imap( struct xfs_imap *imap, /* location map structure */ uint flags) /* flags for inode btree lookup */ { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); xfs_agblock_t agbno; /* block number of inode in the alloc group */ xfs_agino_t agino; /* inode number within alloc group */ xfs_agblock_t chunk_agbno; /* first block in inode chunk */ @@ -2321,8 +2475,8 @@ xfs_imap( */ agino = XFS_INO_TO_AGINO(mp, ino); agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agbno >= mp->m_sb.sb_agblocks || - ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { + if (agbno >= xfs_ag_block_count(mp, pag_agno(pag)) || + ino != xfs_agino_to_ino(pag, agino)) { error = -EINVAL; #ifdef DEBUG /* @@ -2331,17 +2485,18 @@ xfs_imap( */ if (flags & XFS_IGET_UNTRUSTED) return error; - if (agbno >= mp->m_sb.sb_agblocks) { + if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) { xfs_alert(mp, "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", __func__, (unsigned long long)agbno, - (unsigned long)mp->m_sb.sb_agblocks); + (unsigned long)xfs_ag_block_count(mp, + pag_agno(pag))); } - if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { + if (ino != xfs_agino_to_ino(pag, agino)) { xfs_alert(mp, - "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", + "%s: ino (0x%llx) != xfs_agino_to_ino() (0x%llx)", __func__, ino, - XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)); + xfs_agino_to_ino(pag, agino)); } xfs_stack_trace(); #endif /* DEBUG */ @@ -2371,7 +2526,7 @@ xfs_imap( offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); - imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, agbno); + imap->im_blkno = xfs_agbno_to_daddr(pag, agbno); imap->im_len = XFS_FSB_TO_BB(mp, 1); imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); @@ -2401,7 +2556,7 @@ out_map: offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + XFS_INO_TO_OFFSET(mp, ino); - imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, cluster_agbno); + imap->im_blkno = xfs_agbno_to_daddr(pag, cluster_agbno); imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); @@ -2594,16 +2749,19 @@ int xfs_read_agi( struct xfs_perag *pag, struct xfs_trans *tp, + xfs_buf_flags_t flags, struct xfs_buf **agibpp) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); int error; - trace_xfs_read_agi(pag->pag_mount, pag->pag_agno); + trace_xfs_read_agi(pag); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, agibpp, &xfs_agi_buf_ops); + XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGI_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), flags, agibpp, &xfs_agi_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); if (error) return error; if (tp) @@ -2621,15 +2779,18 @@ int xfs_ialloc_read_agi( struct xfs_perag *pag, struct xfs_trans *tp, + int flags, struct xfs_buf **agibpp) { struct xfs_buf *agibp; struct xfs_agi *agi; int error; - trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno); + trace_xfs_ialloc_read_agi(pag); - error = xfs_read_agi(pag, tp, &agibp); + error = xfs_read_agi(pag, tp, + (flags & XFS_IALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, + &agibp); if (error) return error; @@ -2645,7 +2806,7 @@ xfs_ialloc_read_agi( * we are in the middle of a forced shutdown. */ ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - xfs_is_shutdown(pag->pag_mount)); + xfs_is_shutdown(pag_mount(pag))); if (agibpp) *agibpp = agibp; else @@ -2745,7 +2906,7 @@ xfs_ialloc_count_inodes_rec( xfs_failaddr_t fa; xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); - fa = xfs_inobt_check_irec(cur->bc_ag.pag, &irec); + fa = xfs_inobt_check_irec(to_perag(cur->bc_group), &irec); if (fa) return xfs_inobt_complain_bad_rec(cur, fa, &irec); @@ -2765,7 +2926,7 @@ xfs_ialloc_count_inodes( struct xfs_ialloc_count_inodes ci = {0}; int error; - ASSERT(cur->bc_btnum == XFS_BTNUM_INO); + ASSERT(xfs_btree_is_ino(cur->bc_ops)); error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci); if (error) return error; @@ -2806,8 +2967,8 @@ xfs_ialloc_setup_geometry( /* Compute inode btree geometry. */ igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; - igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); - igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); + igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, true); + igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, false); igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2; igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2; @@ -2892,6 +3053,11 @@ xfs_ialloc_setup_geometry( igeo->ialloc_align = mp->m_dalign; else igeo->ialloc_align = 0; + + if (mp->m_sb.sb_blocksize > PAGE_SIZE) + igeo->min_folio_order = mp->m_sb.sb_blocklog - PAGE_SHIFT; + else + igeo->min_folio_order = 0; } /* Compute the location of the root directory inode that is laid out by mkfs. */ @@ -2979,13 +3145,13 @@ xfs_ialloc_check_shrink( int has; int error; - if (!xfs_has_sparseinodes(pag->pag_mount)) + if (!xfs_has_sparseinodes(pag_mount(pag))) return 0; - cur = xfs_inobt_init_cursor(pag, tp, agibp, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agibp); /* Look up the inobt record that would correspond to the new EOFS. */ - agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length); + agino = XFS_AGB_TO_AGINO(pag_mount(pag), new_length); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has); if (error || !has) goto out; @@ -2995,6 +3161,7 @@ xfs_ialloc_check_shrink( goto out; if (!has) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_INOBT); error = -EFSCORRUPTED; goto out; } |