diff options
Diffstat (limited to 'fs/xfs/xfs_extent_busy.c')
| -rw-r--r-- | fs/xfs/xfs_extent_busy.c | 437 |
1 files changed, 281 insertions, 156 deletions
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 85e9f87a1a7c..da3161572735 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -1,74 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * Copyright (c) 2010 David Chinner. * Copyright (c) 2011 Christoph Hellwig. * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_alloc.h" -#include "xfs_inode.h" #include "xfs_extent_busy.h" #include "xfs_trace.h" - -void -xfs_extent_busy_insert( - struct xfs_trans *tp, - xfs_agnumber_t agno, +#include "xfs_trans.h" +#include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_rtgroup.h" + +struct xfs_extent_busy_tree { + spinlock_t eb_lock; + struct rb_root eb_tree; + unsigned int eb_gen; + wait_queue_head_t eb_wait; +}; + +static void +xfs_extent_busy_insert_list( + struct xfs_group *xg, xfs_agblock_t bno, xfs_extlen_t len, - unsigned int flags) + unsigned int flags, + struct list_head *busy_list) { + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; struct xfs_extent_busy *new; struct xfs_extent_busy *busyp; - struct xfs_perag *pag; struct rb_node **rbp; struct rb_node *parent = NULL; - new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL); - if (!new) { - /* - * No Memory! Since it is now not possible to track the free - * block, make this a synchronous transaction to insure that - * the block is not reused before this transaction commits. - */ - trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len); - xfs_trans_set_sync(tp); - return; - } - - new->agno = agno; + new = kzalloc(sizeof(struct xfs_extent_busy), + GFP_KERNEL | __GFP_NOFAIL); + new->group = xfs_group_hold(xg); new->bno = bno; new->length = len; INIT_LIST_HEAD(&new->list); new->flags = flags; /* trace before insert to be able to see failed inserts */ - trace_xfs_extent_busy(tp->t_mountp, agno, bno, len); + trace_xfs_extent_busy(xg, bno, len); - pag = xfs_perag_get(tp->t_mountp, new->agno); - spin_lock(&pag->pagb_lock); - rbp = &pag->pagb_tree.rb_node; + spin_lock(&eb->eb_lock); + rbp = &eb->eb_tree.rb_node; while (*rbp) { parent = *rbp; busyp = rb_entry(parent, struct xfs_extent_busy, rb_node); @@ -85,11 +70,33 @@ xfs_extent_busy_insert( } rb_link_node(&new->rb_node, parent, rbp); - rb_insert_color(&new->rb_node, &pag->pagb_tree); + rb_insert_color(&new->rb_node, &eb->eb_tree); - list_add(&new->list, &tp->t_busy); - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); + /* always process discard lists in fifo order */ + list_add_tail(&new->list, busy_list); + spin_unlock(&eb->eb_lock); +} + +void +xfs_extent_busy_insert( + struct xfs_trans *tp, + struct xfs_group *xg, + xfs_agblock_t bno, + xfs_extlen_t len, + unsigned int flags) +{ + xfs_extent_busy_insert_list(xg, bno, len, flags, &tp->t_busy); +} + +void +xfs_extent_busy_insert_discard( + struct xfs_group *xg, + xfs_agblock_t bno, + xfs_extlen_t len, + struct list_head *busy_list) +{ + xfs_extent_busy_insert_list(xg, bno, len, XFS_EXTENT_BUSY_DISCARDED, + busy_list); } /* @@ -103,22 +110,18 @@ xfs_extent_busy_insert( */ int xfs_extent_busy_search( - struct xfs_mount *mp, - xfs_agnumber_t agno, + struct xfs_group *xg, xfs_agblock_t bno, xfs_extlen_t len) { - struct xfs_perag *pag; + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; struct rb_node *rbp; struct xfs_extent_busy *busyp; int match = 0; - pag = xfs_perag_get(mp, agno); - spin_lock(&pag->pagb_lock); - - rbp = pag->pagb_tree.rb_node; - /* find closest start bno overlap */ + spin_lock(&eb->eb_lock); + rbp = eb->eb_tree.rb_node; while (rbp) { busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); if (bno < busyp->bno) { @@ -137,8 +140,7 @@ xfs_extent_busy_search( break; } } - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); + spin_unlock(&eb->eb_lock); return match; } @@ -147,7 +149,7 @@ xfs_extent_busy_search( * extent. If the overlap covers the beginning, the end, or all of the busy * extent, the overlapping portion can be made unbusy and used for the * allocation. We can't split a busy extent because we can't modify a - * transaction/CIL context busy list, but we can update an entries block + * transaction/CIL context busy list, but we can update an entry's block * number or length. * * Returns true if the extent can safely be reused, or false if the search @@ -155,13 +157,15 @@ xfs_extent_busy_search( */ STATIC bool xfs_extent_busy_update_extent( - struct xfs_mount *mp, - struct xfs_perag *pag, + struct xfs_group *xg, struct xfs_extent_busy *busyp, xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata) + __releases(&eb->eb_lock) + __acquires(&eb->eb_lock) { + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; xfs_agblock_t fend = fbno + flen; xfs_agblock_t bbno = busyp->bno; xfs_agblock_t bend = bbno + busyp->length; @@ -172,9 +176,9 @@ xfs_extent_busy_update_extent( * and retry. */ if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) { - spin_unlock(&pag->pagb_lock); + spin_unlock(&eb->eb_lock); delay(1); - spin_lock(&pag->pagb_lock); + spin_lock(&eb->eb_lock); return false; } @@ -247,7 +251,7 @@ xfs_extent_busy_update_extent( * tree root, because erasing the node can rearrange the * tree topology. */ - rb_erase(&busyp->rb_node, &pag->pagb_tree); + rb_erase(&busyp->rb_node, &eb->eb_tree); busyp->length = 0; return false; } else if (fend < bend) { @@ -266,6 +270,7 @@ xfs_extent_busy_update_extent( * */ busyp->bno = fend; + busyp->length = bend - fend; } else if (bbno < fbno) { /* * Case 8: @@ -285,38 +290,34 @@ xfs_extent_busy_update_extent( ASSERT(0); } - trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen); + trace_xfs_extent_busy_reuse(xg, fbno, flen); return true; out_force_log: - spin_unlock(&pag->pagb_lock); - xfs_log_force(mp, XFS_LOG_SYNC); - trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen); - spin_lock(&pag->pagb_lock); + spin_unlock(&eb->eb_lock); + xfs_log_force(xg->xg_mount, XFS_LOG_SYNC); + trace_xfs_extent_busy_force(xg, fbno, flen); + spin_lock(&eb->eb_lock); return false; } - /* * For a given extent [fbno, flen], make sure we can reuse it safely. */ void xfs_extent_busy_reuse( - struct xfs_mount *mp, - xfs_agnumber_t agno, + struct xfs_group *xg, xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata) { - struct xfs_perag *pag; + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; struct rb_node *rbp; ASSERT(flen > 0); - - pag = xfs_perag_get(mp, agno); - spin_lock(&pag->pagb_lock); + spin_lock(&eb->eb_lock); restart: - rbp = pag->pagb_tree.rb_node; + rbp = eb->eb_tree.rb_node; while (rbp) { struct xfs_extent_busy *busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); @@ -331,12 +332,11 @@ restart: continue; } - if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen, + if (!xfs_extent_busy_update_extent(xg, busyp, fbno, flen, userdata)) goto restart; } - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); + spin_unlock(&eb->eb_lock); } /* @@ -344,27 +344,35 @@ restart: * subset of the extent that is not busy. If *rlen is smaller than * args->minlen no suitable extent could be found, and the higher level * code needs to force out the log and retry the allocation. + * + * Return the current busy generation for the group if the extent is busy. This + * value can be used to wait for at least one of the currently busy extents + * to be cleared. Note that the busy list is not guaranteed to be empty after + * the gen is woken. The state of a specific extent must always be confirmed + * with another call to xfs_extent_busy_trim() before it can be used. */ -void +bool xfs_extent_busy_trim( - struct xfs_alloc_arg *args, - xfs_agblock_t bno, - xfs_extlen_t len, - xfs_agblock_t *rbno, - xfs_extlen_t *rlen) + struct xfs_group *xg, + xfs_extlen_t minlen, + xfs_extlen_t maxlen, + xfs_agblock_t *bno, + xfs_extlen_t *len, + unsigned *busy_gen) { + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; xfs_agblock_t fbno; xfs_extlen_t flen; struct rb_node *rbp; + bool ret = false; - ASSERT(len > 0); + ASSERT(*len > 0); - spin_lock(&args->pag->pagb_lock); -restart: - fbno = bno; - flen = len; - rbp = args->pag->pagb_tree.rb_node; - while (rbp && flen >= args->minlen) { + spin_lock(&eb->eb_lock); + fbno = *bno; + flen = *len; + rbp = eb->eb_tree.rb_node; + while (rbp && flen >= minlen) { struct xfs_extent_busy *busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); xfs_agblock_t fend = fbno + flen; @@ -379,19 +387,6 @@ restart: continue; } - /* - * If this is a metadata allocation, try to reuse the busy - * extent instead of trimming the allocation. - */ - if (!args->userdata && - !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) { - if (!xfs_extent_busy_update_extent(args->mp, args->pag, - busyp, fbno, flen, - false)) - goto restart; - continue; - } - if (bbno <= fbno) { /* start overlap */ @@ -498,13 +493,13 @@ restart: * good chance subsequent allocations will be * contiguous. */ - if (bbno - fbno >= args->maxlen) { + if (bbno - fbno >= maxlen) { /* left candidate fits perfect */ fend = bbno; - } else if (fend - bend >= args->maxlen * 4) { + } else if (fend - bend >= maxlen * 4) { /* right candidate has enough free space */ fbno = bend; - } else if (bbno - fbno >= args->minlen) { + } else if (bbno - fbno >= minlen) { /* left candidate fits minimum requirement */ fend = bbno; } else { @@ -514,40 +509,48 @@ restart: flen = fend - fbno; } - spin_unlock(&args->pag->pagb_lock); - - if (fbno != bno || flen != len) { - trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, - fbno, flen); +out: + + if (fbno != *bno || flen != *len) { + trace_xfs_extent_busy_trim(xg, *bno, *len, fbno, flen); + *bno = fbno; + *len = flen; + *busy_gen = eb->eb_gen; + ret = true; } - *rbno = fbno; - *rlen = flen; - return; + spin_unlock(&eb->eb_lock); + return ret; fail: /* * Return a zero extent length as failure indications. All callers * re-check if the trimmed extent satisfies the minlen requirement. */ - spin_unlock(&args->pag->pagb_lock); - trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0); - *rbno = fbno; - *rlen = 0; + flen = 0; + goto out; } -STATIC void +static bool xfs_extent_busy_clear_one( - struct xfs_mount *mp, - struct xfs_perag *pag, - struct xfs_extent_busy *busyp) + struct xfs_extent_busy *busyp, + bool do_discard) { + struct xfs_extent_busy_tree *eb = busyp->group->xg_busy_extents; + if (busyp->length) { - trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno, - busyp->length); - rb_erase(&busyp->rb_node, &pag->pagb_tree); + if (do_discard && + !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) { + busyp->flags = XFS_EXTENT_BUSY_DISCARDED; + return false; + } + trace_xfs_extent_busy_clear(busyp->group, busyp->bno, + busyp->length); + rb_erase(&busyp->rb_node, &eb->eb_tree); } list_del_init(&busyp->list); - kmem_free(busyp); + xfs_group_put(busyp->group); + kfree(busyp); + return true; } /* @@ -557,47 +560,169 @@ xfs_extent_busy_clear_one( */ void xfs_extent_busy_clear( - struct xfs_mount *mp, struct list_head *list, bool do_discard) { - struct xfs_extent_busy *busyp, *n; - struct xfs_perag *pag = NULL; - xfs_agnumber_t agno = NULLAGNUMBER; + struct xfs_extent_busy *busyp, *next; - list_for_each_entry_safe(busyp, n, list, list) { - if (busyp->agno != agno) { - if (pag) { - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); - } - pag = xfs_perag_get(mp, busyp->agno); - spin_lock(&pag->pagb_lock); - agno = busyp->agno; + busyp = list_first_entry_or_null(list, typeof(*busyp), list); + if (!busyp) + return; + + do { + struct xfs_group *xg = xfs_group_hold(busyp->group); + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; + bool wakeup = false; + + spin_lock(&eb->eb_lock); + do { + next = list_next_entry(busyp, list); + if (xfs_extent_busy_clear_one(busyp, do_discard)) + wakeup = true; + busyp = next; + } while (!list_entry_is_head(busyp, list, list) && + busyp->group == xg); + + if (wakeup) { + eb->eb_gen++; + wake_up_all(&eb->eb_wait); } + spin_unlock(&eb->eb_lock); + xfs_group_put(xg); + } while (!list_entry_is_head(busyp, list, list)); +} - if (do_discard && busyp->length && - !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) - busyp->flags = XFS_EXTENT_BUSY_DISCARDED; - else - xfs_extent_busy_clear_one(mp, pag, busyp); - } +/* + * Flush out all busy extents for this group. + * + * If the current transaction is holding busy extents, the caller may not want + * to wait for committed busy extents to resolve. If we are being told just to + * try a flush or progress has been made since we last skipped a busy extent, + * return immediately to allow the caller to try again. + * + * If we are freeing extents, we might actually be holding the only free extents + * in the transaction busy list and the log force won't resolve that situation. + * In this case, we must return -EAGAIN to avoid a deadlock by informing the + * caller it needs to commit the busy extents it holds before retrying the + * extent free operation. + */ +int +xfs_extent_busy_flush( + struct xfs_trans *tp, + struct xfs_group *xg, + unsigned busy_gen, + uint32_t alloc_flags) +{ + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; + DEFINE_WAIT (wait); + int error; + + error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); + if (error) + return error; - if (pag) { - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); + /* Avoid deadlocks on uncommitted busy extents. */ + if (!list_empty(&tp->t_busy)) { + if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH) + return 0; + + if (busy_gen != READ_ONCE(eb->eb_gen)) + return 0; + + if (alloc_flags & XFS_ALLOC_FLAG_FREEING) + return -EAGAIN; } + + /* Wait for committed busy extents to resolve. */ + do { + prepare_to_wait(&eb->eb_wait, &wait, TASK_KILLABLE); + if (busy_gen != READ_ONCE(eb->eb_gen)) + break; + schedule(); + } while (1); + + finish_wait(&eb->eb_wait, &wait); + return 0; +} + +static void +xfs_extent_busy_wait_group( + struct xfs_group *xg) +{ + DEFINE_WAIT (wait); + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; + + do { + prepare_to_wait(&eb->eb_wait, &wait, TASK_KILLABLE); + if (RB_EMPTY_ROOT(&eb->eb_tree)) + break; + schedule(); + } while (1); + finish_wait(&eb->eb_wait, &wait); +} + +void +xfs_extent_busy_wait_all( + struct xfs_mount *mp) +{ + struct xfs_perag *pag = NULL; + struct xfs_rtgroup *rtg = NULL; + + while ((pag = xfs_perag_next(mp, pag))) + xfs_extent_busy_wait_group(pag_group(pag)); + + if (xfs_has_rtgroups(mp) && !xfs_has_zoned(mp)) + while ((rtg = xfs_rtgroup_next(mp, rtg))) + xfs_extent_busy_wait_group(rtg_group(rtg)); } /* - * Callback for list_sort to sort busy extents by the AG they reside in. + * Callback for list_sort to sort busy extents by the group they reside in. */ int xfs_extent_busy_ag_cmp( void *priv, - struct list_head *a, - struct list_head *b) + const struct list_head *l1, + const struct list_head *l2) +{ + struct xfs_extent_busy *b1 = + container_of(l1, struct xfs_extent_busy, list); + struct xfs_extent_busy *b2 = + container_of(l2, struct xfs_extent_busy, list); + s32 diff; + + diff = b1->group->xg_gno - b2->group->xg_gno; + if (!diff) + diff = b1->bno - b2->bno; + return diff; +} + +/* Are there any busy extents in this group? */ +bool +xfs_extent_busy_list_empty( + struct xfs_group *xg, + unsigned *busy_gen) +{ + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; + bool res; + + spin_lock(&eb->eb_lock); + res = RB_EMPTY_ROOT(&eb->eb_tree); + *busy_gen = READ_ONCE(eb->eb_gen); + spin_unlock(&eb->eb_lock); + return res; +} + +struct xfs_extent_busy_tree * +xfs_extent_busy_alloc(void) { - return container_of(a, struct xfs_extent_busy, list)->agno - - container_of(b, struct xfs_extent_busy, list)->agno; + struct xfs_extent_busy_tree *eb; + + eb = kzalloc(sizeof(*eb), GFP_KERNEL); + if (!eb) + return NULL; + spin_lock_init(&eb->eb_lock); + init_waitqueue_head(&eb->eb_wait); + eb->eb_tree = RB_ROOT; + return eb; } |
