summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2023-02-13 09:14:53 +1100
committerDave Chinner <dchinner@redhat.com>2023-02-13 09:14:53 +1100
commitecd788a92460eef44c5444290757bfd0f38d91b4 (patch)
tree99f803895e57542df48f3064d8517950fb2b6ea8 /fs/xfs
parent76257a15873ccce817e0c4441f6bb66fb8f8201c (diff)
xfs: rework xfs_alloc_vextent()
It's a multiplexing mess that can be greatly simplified, and really needs to be simplified to allow active per-ag references to propagate from initial AG selection code the the bmapi code. This splits the code out into separate a parameter checking function, an iterator function, and allocation completion functions and then implements the individual policies using these functions. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c464
1 files changed, 285 insertions, 179 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 4508862239a8..5afda109aaef 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -3151,29 +3151,20 @@ xfs_alloc_read_agf(
}
/*
- * Allocate an extent (variable-size).
- * Depending on the allocation type, we either look in a single allocation
- * group or loop over the allocation groups to find the result.
+ * Pre-proces allocation arguments to set initial state that we don't require
+ * callers to set up correctly, as well as bounds check the allocation args
+ * that are set up.
*/
-int /* error */
-xfs_alloc_vextent(
- struct xfs_alloc_arg *args) /* allocation argument structure */
+static int
+xfs_alloc_vextent_check_args(
+ struct xfs_alloc_arg *args)
{
- xfs_agblock_t agsize; /* allocation group size */
- int error;
- int flags; /* XFS_ALLOC_FLAG_... locking flags */
- struct xfs_mount *mp; /* mount structure pointer */
- xfs_agnumber_t sagno; /* starting allocation group number */
- xfs_alloctype_t type; /* input allocation type */
- int bump_rotor = 0;
- xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
- xfs_agnumber_t minimum_agno = 0;
+ struct xfs_mount *mp = args->mp;
+ xfs_agblock_t agsize;
- mp = args->mp;
- type = args->otype = args->type;
+ args->otype = args->type;
args->agbno = NULLAGBLOCK;
- if (args->tp->t_highest_agno != NULLAGNUMBER)
- minimum_agno = args->tp->t_highest_agno;
+
/*
* Just fix this up, for the case where the last a.g. is shorter
* (or there's only one a.g.) and the caller couldn't easily figure
@@ -3195,199 +3186,314 @@ xfs_alloc_vextent(
args->mod >= args->prod) {
args->fsbno = NULLFSBLOCK;
trace_xfs_alloc_vextent_badargs(args);
+ return -ENOSPC;
+ }
+ return 0;
+}
+
+/*
+ * Post-process allocation results to set the allocated block number correctly
+ * for the caller.
+ *
+ * XXX: xfs_alloc_vextent() should really be returning ENOSPC for ENOSPC, not
+ * hiding it behind a "successful" NULLFSBLOCK allocation.
+ */
+static void
+xfs_alloc_vextent_set_fsbno(
+ struct xfs_alloc_arg *args,
+ xfs_agnumber_t minimum_agno)
+{
+ struct xfs_mount *mp = args->mp;
+
+ /*
+ * We can end up here with a locked AGF. If we failed, the caller is
+ * likely going to try to allocate again with different parameters, and
+ * that can widen the AGs that are searched for free space. If we have
+ * to do BMBT block allocation, we have to do a new allocation.
+ *
+ * Hence leaving this function with the AGF locked opens up potential
+ * ABBA AGF deadlocks because a future allocation attempt in this
+ * transaction may attempt to lock a lower number AGF.
+ *
+ * We can't release the AGF until the transaction is commited, so at
+ * this point we must update the "first allocation" tracker to point at
+ * this AG if the tracker is empty or points to a lower AG. This allows
+ * the next allocation attempt to be modified appropriately to avoid
+ * deadlocks.
+ */
+ if (args->agbp &&
+ (args->tp->t_highest_agno == NULLAGNUMBER ||
+ args->agno > minimum_agno))
+ args->tp->t_highest_agno = args->agno;
+
+ /* Allocation failed with ENOSPC if NULLAGBLOCK was returned. */
+ if (args->agbno == NULLAGBLOCK) {
+ args->fsbno = NULLFSBLOCK;
+ return;
+ }
+
+ args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
+#ifdef DEBUG
+ ASSERT(args->len >= args->minlen);
+ ASSERT(args->len <= args->maxlen);
+ ASSERT(args->agbno % args->alignment == 0);
+ XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
+#endif
+}
+
+/*
+ * Allocate within a single AG only.
+ */
+static int
+xfs_alloc_vextent_this_ag(
+ struct xfs_alloc_arg *args,
+ xfs_agnumber_t minimum_agno)
+{
+ struct xfs_mount *mp = args->mp;
+ int error;
+
+ error = xfs_alloc_vextent_check_args(args);
+ if (error) {
+ if (error == -ENOSPC)
+ return 0;
+ return error;
+ }
+
+ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+ if (minimum_agno > args->agno) {
+ trace_xfs_alloc_vextent_skip_deadlock(args);
+ args->fsbno = NULLFSBLOCK;
return 0;
}
- switch (type) {
- case XFS_ALLOCTYPE_THIS_AG:
- case XFS_ALLOCTYPE_NEAR_BNO:
- case XFS_ALLOCTYPE_THIS_BNO:
- /*
- * These three force us into a single a.g.
- */
- args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
- args->pag = xfs_perag_get(mp, args->agno);
+ args->pag = xfs_perag_get(mp, args->agno);
+ error = xfs_alloc_fix_freelist(args, 0);
+ if (error) {
+ trace_xfs_alloc_vextent_nofix(args);
+ goto out_error;
+ }
+ if (!args->agbp) {
+ trace_xfs_alloc_vextent_noagbp(args);
+ args->fsbno = NULLFSBLOCK;
+ goto out_error;
+ }
+ args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
+ error = xfs_alloc_ag_vextent(args);
- if (minimum_agno > args->agno) {
- trace_xfs_alloc_vextent_skip_deadlock(args);
- error = 0;
- break;
- }
+ xfs_alloc_vextent_set_fsbno(args, minimum_agno);
+out_error:
+ xfs_perag_put(args->pag);
+ return error;
+}
+
+/*
+ * Iterate all AGs trying to allocate an extent starting from @start_ag.
+ *
+ * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
+ * allocation attempts in @start_agno have locality information. If we fail to
+ * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
+ * we attempt to allocation in as there is no locality optimisation possible for
+ * those allocations.
+ *
+ * When we wrap the AG iteration at the end of the filesystem, we have to be
+ * careful not to wrap into AGs below ones we already have locked in the
+ * transaction if we are doing a blocking iteration. This will result in an
+ * out-of-order locking of AGFs and hence can cause deadlocks.
+ */
+static int
+xfs_alloc_vextent_iterate_ags(
+ struct xfs_alloc_arg *args,
+ xfs_agnumber_t minimum_agno,
+ xfs_agnumber_t start_agno,
+ uint32_t flags)
+{
+ struct xfs_mount *mp = args->mp;
+ int error = 0;
- error = xfs_alloc_fix_freelist(args, 0);
+ ASSERT(start_agno >= minimum_agno);
+
+ /*
+ * Loop over allocation groups twice; first time with
+ * trylock set, second time without.
+ */
+ args->agno = start_agno;
+ for (;;) {
+ args->pag = xfs_perag_get(mp, args->agno);
+ error = xfs_alloc_fix_freelist(args, flags);
if (error) {
trace_xfs_alloc_vextent_nofix(args);
- goto error0;
- }
- if (!args->agbp) {
- trace_xfs_alloc_vextent_noagbp(args);
break;
}
- args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
- if ((error = xfs_alloc_ag_vextent(args)))
- goto error0;
- break;
- case XFS_ALLOCTYPE_START_BNO:
/*
- * Try near allocation first, then anywhere-in-ag after
- * the first a.g. fails.
+ * If we get a buffer back then the allocation will fly.
*/
- if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
- xfs_is_inode32(mp)) {
- args->fsbno = XFS_AGB_TO_FSB(mp,
- ((mp->m_agfrotor / rotorstep) %
- mp->m_sb.sb_agcount), 0);
- bump_rotor = 1;
+ if (args->agbp) {
+ error = xfs_alloc_ag_vextent(args);
+ break;
}
- args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
- args->type = XFS_ALLOCTYPE_NEAR_BNO;
- fallthrough;
- case XFS_ALLOCTYPE_FIRST_AG:
+
+ trace_xfs_alloc_vextent_loopfailed(args);
+
/*
- * Rotate through the allocation groups looking for a winner.
- * If we are blocking, we must obey minimum_agno contraints for
- * avoiding ABBA deadlocks on AGF locking.
+ * Didn't work, figure out the next iteration.
*/
- if (type == XFS_ALLOCTYPE_FIRST_AG) {
- /*
- * Start with allocation group given by bno.
- */
- args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+ if (args->agno == start_agno &&
+ args->otype == XFS_ALLOCTYPE_START_BNO)
args->type = XFS_ALLOCTYPE_THIS_AG;
- sagno = minimum_agno;
- flags = 0;
- } else {
- /*
- * Start with the given allocation group.
- */
- args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
- flags = XFS_ALLOC_FLAG_TRYLOCK;
+
+ /*
+ * If we are try-locking, we can't deadlock on AGF locks so we
+ * can wrap all the way back to the first AG. Otherwise, wrap
+ * back to the start AG so we can't deadlock and let the end of
+ * scan handler decide what to do next.
+ */
+ if (++(args->agno) == mp->m_sb.sb_agcount) {
+ if (flags & XFS_ALLOC_FLAG_TRYLOCK)
+ args->agno = 0;
+ else
+ args->agno = minimum_agno;
}
/*
- * Loop over allocation groups twice; first time with
- * trylock set, second time without.
+ * Reached the starting a.g., must either be done
+ * or switch to non-trylock mode.
*/
- for (;;) {
- args->pag = xfs_perag_get(mp, args->agno);
- error = xfs_alloc_fix_freelist(args, flags);
- if (error) {
- trace_xfs_alloc_vextent_nofix(args);
- goto error0;
- }
- /*
- * If we get a buffer back then the allocation will fly.
- */
- if (args->agbp) {
- if ((error = xfs_alloc_ag_vextent(args)))
- goto error0;
+ if (args->agno == start_agno) {
+ if (flags == 0) {
+ args->agbno = NULLAGBLOCK;
+ trace_xfs_alloc_vextent_allfailed(args);
break;
}
- trace_xfs_alloc_vextent_loopfailed(args);
+ flags = 0;
+ if (args->otype == XFS_ALLOCTYPE_START_BNO) {
+ args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
+ args->type = XFS_ALLOCTYPE_NEAR_BNO;
+ }
+ }
+ xfs_perag_put(args->pag);
+ args->pag = NULL;
+ }
+ if (args->pag) {
+ xfs_perag_put(args->pag);
+ args->pag = NULL;
+ }
+ return error;
+}
- /*
- * Didn't work, figure out the next iteration.
- */
- if (args->agno == sagno &&
- type == XFS_ALLOCTYPE_START_BNO)
- args->type = XFS_ALLOCTYPE_THIS_AG;
+/*
+ * Iterate from the AGs from the start AG to the end of the filesystem, trying
+ * to allocate blocks. It starts with a near allocation attempt in the initial
+ * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
+ * back to zero if allowed by previous allocations in this transaction,
+ * otherwise will wrap back to the start AG and run a second blocking pass to
+ * the end of the filesystem.
+ */
+static int
+xfs_alloc_vextent_start_ag(
+ struct xfs_alloc_arg *args,
+ xfs_agnumber_t minimum_agno)
+{
+ struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t start_agno;
+ xfs_agnumber_t rotorstep = xfs_rotorstep;
+ bool bump_rotor = false;
+ int error;
- /*
- * If we are try-locking, we can't deadlock on AGF
- * locks, so we can wrap all the way back to the first
- * AG. Otherwise, wrap back to the start AG so we can't
- * deadlock, and let the end of scan handler decide what
- * to do next.
- */
- if (++(args->agno) == mp->m_sb.sb_agcount) {
- if (flags & XFS_ALLOC_FLAG_TRYLOCK)
- args->agno = 0;
- else
- args->agno = sagno;
- }
+ error = xfs_alloc_vextent_check_args(args);
+ if (error) {
+ if (error == -ENOSPC)
+ return 0;
+ return error;
+ }
- /*
- * Reached the starting a.g., must either be done
- * or switch to non-trylock mode.
- */
- if (args->agno == sagno) {
- if (flags == 0) {
- args->agbno = NULLAGBLOCK;
- trace_xfs_alloc_vextent_allfailed(args);
- break;
- }
+ if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
+ xfs_is_inode32(mp)) {
+ args->fsbno = XFS_AGB_TO_FSB(mp,
+ ((mp->m_agfrotor / rotorstep) %
+ mp->m_sb.sb_agcount), 0);
+ bump_rotor = 1;
+ }
+ start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, args->fsbno));
+ args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
+ args->type = XFS_ALLOCTYPE_NEAR_BNO;
- /*
- * Blocking pass next, so we must obey minimum
- * agno constraints to avoid ABBA AGF deadlocks.
- */
- flags = 0;
- if (minimum_agno > sagno)
- sagno = minimum_agno;
-
- if (type == XFS_ALLOCTYPE_START_BNO) {
- args->agbno = XFS_FSB_TO_AGBNO(mp,
- args->fsbno);
- args->type = XFS_ALLOCTYPE_NEAR_BNO;
- }
- }
- xfs_perag_put(args->pag);
- }
- if (bump_rotor) {
- if (args->agno == sagno)
- mp->m_agfrotor = (mp->m_agfrotor + 1) %
- (mp->m_sb.sb_agcount * rotorstep);
- else
- mp->m_agfrotor = (args->agno * rotorstep + 1) %
- (mp->m_sb.sb_agcount * rotorstep);
- }
- break;
- default:
- ASSERT(0);
- /* NOTREACHED */
+ error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
+ XFS_ALLOC_FLAG_TRYLOCK);
+ if (bump_rotor) {
+ if (args->agno == start_agno)
+ mp->m_agfrotor = (mp->m_agfrotor + 1) %
+ (mp->m_sb.sb_agcount * rotorstep);
+ else
+ mp->m_agfrotor = (args->agno * rotorstep + 1) %
+ (mp->m_sb.sb_agcount * rotorstep);
}
- if (args->agbno == NULLAGBLOCK) {
- args->fsbno = NULLFSBLOCK;
- } else {
- args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
-#ifdef DEBUG
- ASSERT(args->len >= args->minlen);
- ASSERT(args->len <= args->maxlen);
- ASSERT(args->agbno % args->alignment == 0);
- XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
- args->len);
-#endif
+ xfs_alloc_vextent_set_fsbno(args, minimum_agno);
+ return error;
+}
+
+/*
+ * Iterate from the agno indicated from args->fsbno through to the end of the
+ * filesystem attempting blocking allocation. This does not wrap or try a second
+ * pass, so will not recurse into AGs lower than indicated by fsbno.
+ */
+static int
+xfs_alloc_vextent_first_ag(
+ struct xfs_alloc_arg *args,
+ xfs_agnumber_t minimum_agno)
+{
+ struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t start_agno;
+ int error;
+
+ error = xfs_alloc_vextent_check_args(args);
+ if (error) {
+ if (error == -ENOSPC)
+ return 0;
+ return error;
}
- /*
- * We end up here with a locked AGF. If we failed, the caller is likely
- * going to try to allocate again with different parameters, and that
- * can widen the AGs that are searched for free space. If we have to do
- * BMBT block allocation, we have to do a new allocation.
- *
- * Hence leaving this function with the AGF locked opens up potential
- * ABBA AGF deadlocks because a future allocation attempt in this
- * transaction may attempt to lock a lower number AGF.
- *
- * We can't release the AGF until the transaction is commited, so at
- * this point we must update the "firstblock" tracker to point at this
- * AG if the tracker is empty or points to a lower AG. This allows the
- * next allocation attempt to be modified appropriately to avoid
- * deadlocks.
- */
- if (args->agbp &&
- (args->tp->t_highest_agno == NULLAGNUMBER ||
- args->pag->pag_agno > minimum_agno))
- args->tp->t_highest_agno = args->pag->pag_agno;
- xfs_perag_put(args->pag);
- return 0;
-error0:
- xfs_perag_put(args->pag);
+ start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, args->fsbno));
+
+ args->type = XFS_ALLOCTYPE_THIS_AG;
+ error = xfs_alloc_vextent_iterate_ags(args, minimum_agno,
+ start_agno, 0);
+ xfs_alloc_vextent_set_fsbno(args, minimum_agno);
return error;
}
+/*
+ * Allocate an extent (variable-size).
+ * Depending on the allocation type, we either look in a single allocation
+ * group or loop over the allocation groups to find the result.
+ */
+int
+xfs_alloc_vextent(
+ struct xfs_alloc_arg *args)
+{
+ xfs_agnumber_t minimum_agno = 0;
+
+ if (args->tp->t_highest_agno != NULLAGNUMBER)
+ minimum_agno = args->tp->t_highest_agno;
+
+ switch (args->type) {
+ case XFS_ALLOCTYPE_THIS_AG:
+ case XFS_ALLOCTYPE_NEAR_BNO:
+ case XFS_ALLOCTYPE_THIS_BNO:
+ return xfs_alloc_vextent_this_ag(args, minimum_agno);
+ case XFS_ALLOCTYPE_START_BNO:
+ return xfs_alloc_vextent_start_ag(args, minimum_agno);
+ case XFS_ALLOCTYPE_FIRST_AG:
+ return xfs_alloc_vextent_first_ag(args, minimum_agno);
+ default:
+ ASSERT(0);
+ /* NOTREACHED */
+ }
+ /* Should never get here */
+ return -EFSCORRUPTED;
+}
+
/* Ensure that the freelist is at full capacity. */
int
xfs_free_extent_fix_freelist(