diff options
Diffstat (limited to 'fs/xfs/xfs_extent_busy.c')
| -rw-r--r-- | fs/xfs/xfs_extent_busy.c | 373 |
1 files changed, 221 insertions, 152 deletions
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 0ed68379e551..da3161572735 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -11,41 +11,49 @@ #include "xfs_log_format.h" #include "xfs_shared.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_rtgroup.h" -void -xfs_extent_busy_insert( - struct xfs_trans *tp, - xfs_agnumber_t agno, +struct xfs_extent_busy_tree { + spinlock_t eb_lock; + struct rb_root eb_tree; + unsigned int eb_gen; + wait_queue_head_t eb_wait; +}; + +static void +xfs_extent_busy_insert_list( + struct xfs_group *xg, xfs_agblock_t bno, xfs_extlen_t len, - unsigned int flags) + unsigned int flags, + struct list_head *busy_list) { + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; struct xfs_extent_busy *new; struct xfs_extent_busy *busyp; - struct xfs_perag *pag; struct rb_node **rbp; struct rb_node *parent = NULL; - new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP); - new->agno = agno; + new = kzalloc(sizeof(struct xfs_extent_busy), + GFP_KERNEL | __GFP_NOFAIL); + new->group = xfs_group_hold(xg); new->bno = bno; new->length = len; INIT_LIST_HEAD(&new->list); new->flags = flags; /* trace before insert to be able to see failed inserts */ - trace_xfs_extent_busy(tp->t_mountp, agno, bno, len); + trace_xfs_extent_busy(xg, bno, len); - pag = xfs_perag_get(tp->t_mountp, new->agno); - spin_lock(&pag->pagb_lock); - rbp = &pag->pagb_tree.rb_node; + spin_lock(&eb->eb_lock); + rbp = &eb->eb_tree.rb_node; while (*rbp) { parent = *rbp; busyp = rb_entry(parent, struct xfs_extent_busy, rb_node); @@ -62,11 +70,33 @@ xfs_extent_busy_insert( } rb_link_node(&new->rb_node, parent, rbp); - rb_insert_color(&new->rb_node, &pag->pagb_tree); + rb_insert_color(&new->rb_node, &eb->eb_tree); - list_add(&new->list, &tp->t_busy); - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); + /* always process discard lists in fifo order */ + list_add_tail(&new->list, busy_list); + spin_unlock(&eb->eb_lock); +} + +void +xfs_extent_busy_insert( + struct xfs_trans *tp, + struct xfs_group *xg, + xfs_agblock_t bno, + xfs_extlen_t len, + unsigned int flags) +{ + xfs_extent_busy_insert_list(xg, bno, len, flags, &tp->t_busy); +} + +void +xfs_extent_busy_insert_discard( + struct xfs_group *xg, + xfs_agblock_t bno, + xfs_extlen_t len, + struct list_head *busy_list) +{ + xfs_extent_busy_insert_list(xg, bno, len, XFS_EXTENT_BUSY_DISCARDED, + busy_list); } /* @@ -80,22 +110,18 @@ xfs_extent_busy_insert( */ int xfs_extent_busy_search( - struct xfs_mount *mp, - xfs_agnumber_t agno, + struct xfs_group *xg, xfs_agblock_t bno, xfs_extlen_t len) { - struct xfs_perag *pag; + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; struct rb_node *rbp; struct xfs_extent_busy *busyp; int match = 0; - pag = xfs_perag_get(mp, agno); - spin_lock(&pag->pagb_lock); - - rbp = pag->pagb_tree.rb_node; - /* find closest start bno overlap */ + spin_lock(&eb->eb_lock); + rbp = eb->eb_tree.rb_node; while (rbp) { busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); if (bno < busyp->bno) { @@ -114,8 +140,7 @@ xfs_extent_busy_search( break; } } - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); + spin_unlock(&eb->eb_lock); return match; } @@ -132,14 +157,15 @@ xfs_extent_busy_search( */ STATIC bool xfs_extent_busy_update_extent( - struct xfs_mount *mp, - struct xfs_perag *pag, + struct xfs_group *xg, struct xfs_extent_busy *busyp, xfs_agblock_t fbno, xfs_extlen_t flen, - bool userdata) __releases(&pag->pagb_lock) - __acquires(&pag->pagb_lock) + bool userdata) + __releases(&eb->eb_lock) + __acquires(&eb->eb_lock) { + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; xfs_agblock_t fend = fbno + flen; xfs_agblock_t bbno = busyp->bno; xfs_agblock_t bend = bbno + busyp->length; @@ -150,9 +176,9 @@ xfs_extent_busy_update_extent( * and retry. */ if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) { - spin_unlock(&pag->pagb_lock); + spin_unlock(&eb->eb_lock); delay(1); - spin_lock(&pag->pagb_lock); + spin_lock(&eb->eb_lock); return false; } @@ -225,7 +251,7 @@ xfs_extent_busy_update_extent( * tree root, because erasing the node can rearrange the * tree topology. */ - rb_erase(&busyp->rb_node, &pag->pagb_tree); + rb_erase(&busyp->rb_node, &eb->eb_tree); busyp->length = 0; return false; } else if (fend < bend) { @@ -244,6 +270,7 @@ xfs_extent_busy_update_extent( * */ busyp->bno = fend; + busyp->length = bend - fend; } else if (bbno < fbno) { /* * Case 8: @@ -263,38 +290,34 @@ xfs_extent_busy_update_extent( ASSERT(0); } - trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen); + trace_xfs_extent_busy_reuse(xg, fbno, flen); return true; out_force_log: - spin_unlock(&pag->pagb_lock); - xfs_log_force(mp, XFS_LOG_SYNC); - trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen); - spin_lock(&pag->pagb_lock); + spin_unlock(&eb->eb_lock); + xfs_log_force(xg->xg_mount, XFS_LOG_SYNC); + trace_xfs_extent_busy_force(xg, fbno, flen); + spin_lock(&eb->eb_lock); return false; } - /* * For a given extent [fbno, flen], make sure we can reuse it safely. */ void xfs_extent_busy_reuse( - struct xfs_mount *mp, - xfs_agnumber_t agno, + struct xfs_group *xg, xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata) { - struct xfs_perag *pag; + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; struct rb_node *rbp; ASSERT(flen > 0); - - pag = xfs_perag_get(mp, agno); - spin_lock(&pag->pagb_lock); + spin_lock(&eb->eb_lock); restart: - rbp = pag->pagb_tree.rb_node; + rbp = eb->eb_tree.rb_node; while (rbp) { struct xfs_extent_busy *busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); @@ -309,12 +332,11 @@ restart: continue; } - if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen, + if (!xfs_extent_busy_update_extent(xg, busyp, fbno, flen, userdata)) goto restart; } - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); + spin_unlock(&eb->eb_lock); } /* @@ -323,7 +345,7 @@ restart: * args->minlen no suitable extent could be found, and the higher level * code needs to force out the log and retry the allocation. * - * Return the current busy generation for the AG if the extent is busy. This + * Return the current busy generation for the group if the extent is busy. This * value can be used to wait for at least one of the currently busy extents * to be cleared. Note that the busy list is not guaranteed to be empty after * the gen is woken. The state of a specific extent must always be confirmed @@ -331,11 +353,14 @@ restart: */ bool xfs_extent_busy_trim( - struct xfs_alloc_arg *args, + struct xfs_group *xg, + xfs_extlen_t minlen, + xfs_extlen_t maxlen, xfs_agblock_t *bno, xfs_extlen_t *len, unsigned *busy_gen) { + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; xfs_agblock_t fbno; xfs_extlen_t flen; struct rb_node *rbp; @@ -343,12 +368,11 @@ xfs_extent_busy_trim( ASSERT(*len > 0); - spin_lock(&args->pag->pagb_lock); -restart: + spin_lock(&eb->eb_lock); fbno = *bno; flen = *len; - rbp = args->pag->pagb_tree.rb_node; - while (rbp && flen >= args->minlen) { + rbp = eb->eb_tree.rb_node; + while (rbp && flen >= minlen) { struct xfs_extent_busy *busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); xfs_agblock_t fend = fbno + flen; @@ -363,19 +387,6 @@ restart: continue; } - /* - * If this is a metadata allocation, try to reuse the busy - * extent instead of trimming the allocation. - */ - if (!xfs_alloc_is_userdata(args->datatype) && - !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) { - if (!xfs_extent_busy_update_extent(args->mp, args->pag, - busyp, fbno, flen, - false)) - goto restart; - continue; - } - if (bbno <= fbno) { /* start overlap */ @@ -482,13 +493,13 @@ restart: * good chance subsequent allocations will be * contiguous. */ - if (bbno - fbno >= args->maxlen) { + if (bbno - fbno >= maxlen) { /* left candidate fits perfect */ fend = bbno; - } else if (fend - bend >= args->maxlen * 4) { + } else if (fend - bend >= maxlen * 4) { /* right candidate has enough free space */ fbno = bend; - } else if (bbno - fbno >= args->minlen) { + } else if (bbno - fbno >= minlen) { /* left candidate fits minimum requirement */ fend = bbno; } else { @@ -501,14 +512,13 @@ restart: out: if (fbno != *bno || flen != *len) { - trace_xfs_extent_busy_trim(args->mp, args->agno, *bno, *len, - fbno, flen); + trace_xfs_extent_busy_trim(xg, *bno, *len, fbno, flen); *bno = fbno; *len = flen; - *busy_gen = args->pag->pagb_gen; + *busy_gen = eb->eb_gen; ret = true; } - spin_unlock(&args->pag->pagb_lock); + spin_unlock(&eb->eb_lock); return ret; fail: /* @@ -519,35 +529,28 @@ fail: goto out; } -STATIC void +static bool xfs_extent_busy_clear_one( - struct xfs_mount *mp, - struct xfs_perag *pag, - struct xfs_extent_busy *busyp) + struct xfs_extent_busy *busyp, + bool do_discard) { + struct xfs_extent_busy_tree *eb = busyp->group->xg_busy_extents; + if (busyp->length) { - trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno, - busyp->length); - rb_erase(&busyp->rb_node, &pag->pagb_tree); + if (do_discard && + !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) { + busyp->flags = XFS_EXTENT_BUSY_DISCARDED; + return false; + } + trace_xfs_extent_busy_clear(busyp->group, busyp->bno, + busyp->length); + rb_erase(&busyp->rb_node, &eb->eb_tree); } list_del_init(&busyp->list); - kmem_free(busyp); -} - -static void -xfs_extent_busy_put_pag( - struct xfs_perag *pag, - bool wakeup) - __releases(pag->pagb_lock) -{ - if (wakeup) { - pag->pagb_gen++; - wake_up_all(&pag->pagb_wait); - } - - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); + xfs_group_put(busyp->group); + kfree(busyp); + return true; } /* @@ -557,94 +560,130 @@ xfs_extent_busy_put_pag( */ void xfs_extent_busy_clear( - struct xfs_mount *mp, struct list_head *list, bool do_discard) { - struct xfs_extent_busy *busyp, *n; - struct xfs_perag *pag = NULL; - xfs_agnumber_t agno = NULLAGNUMBER; - bool wakeup = false; - - list_for_each_entry_safe(busyp, n, list, list) { - if (busyp->agno != agno) { - if (pag) - xfs_extent_busy_put_pag(pag, wakeup); - agno = busyp->agno; - pag = xfs_perag_get(mp, agno); - spin_lock(&pag->pagb_lock); - wakeup = false; - } + struct xfs_extent_busy *busyp, *next; - if (do_discard && busyp->length && - !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) { - busyp->flags = XFS_EXTENT_BUSY_DISCARDED; - } else { - xfs_extent_busy_clear_one(mp, pag, busyp); - wakeup = true; - } - } + busyp = list_first_entry_or_null(list, typeof(*busyp), list); + if (!busyp) + return; - if (pag) - xfs_extent_busy_put_pag(pag, wakeup); + do { + struct xfs_group *xg = xfs_group_hold(busyp->group); + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; + bool wakeup = false; + + spin_lock(&eb->eb_lock); + do { + next = list_next_entry(busyp, list); + if (xfs_extent_busy_clear_one(busyp, do_discard)) + wakeup = true; + busyp = next; + } while (!list_entry_is_head(busyp, list, list) && + busyp->group == xg); + + if (wakeup) { + eb->eb_gen++; + wake_up_all(&eb->eb_wait); + } + spin_unlock(&eb->eb_lock); + xfs_group_put(xg); + } while (!list_entry_is_head(busyp, list, list)); } /* - * Flush out all busy extents for this AG. + * Flush out all busy extents for this group. + * + * If the current transaction is holding busy extents, the caller may not want + * to wait for committed busy extents to resolve. If we are being told just to + * try a flush or progress has been made since we last skipped a busy extent, + * return immediately to allow the caller to try again. + * + * If we are freeing extents, we might actually be holding the only free extents + * in the transaction busy list and the log force won't resolve that situation. + * In this case, we must return -EAGAIN to avoid a deadlock by informing the + * caller it needs to commit the busy extents it holds before retrying the + * extent free operation. */ -void +int xfs_extent_busy_flush( - struct xfs_mount *mp, - struct xfs_perag *pag, - unsigned busy_gen) + struct xfs_trans *tp, + struct xfs_group *xg, + unsigned busy_gen, + uint32_t alloc_flags) { + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; DEFINE_WAIT (wait); int error; - error = xfs_log_force(mp, XFS_LOG_SYNC); + error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); if (error) - return; + return error; + + /* Avoid deadlocks on uncommitted busy extents. */ + if (!list_empty(&tp->t_busy)) { + if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH) + return 0; + + if (busy_gen != READ_ONCE(eb->eb_gen)) + return 0; + + if (alloc_flags & XFS_ALLOC_FLAG_FREEING) + return -EAGAIN; + } + /* Wait for committed busy extents to resolve. */ do { - prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); - if (busy_gen != READ_ONCE(pag->pagb_gen)) + prepare_to_wait(&eb->eb_wait, &wait, TASK_KILLABLE); + if (busy_gen != READ_ONCE(eb->eb_gen)) break; schedule(); } while (1); - finish_wait(&pag->pagb_wait, &wait); + finish_wait(&eb->eb_wait, &wait); + return 0; +} + +static void +xfs_extent_busy_wait_group( + struct xfs_group *xg) +{ + DEFINE_WAIT (wait); + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; + + do { + prepare_to_wait(&eb->eb_wait, &wait, TASK_KILLABLE); + if (RB_EMPTY_ROOT(&eb->eb_tree)) + break; + schedule(); + } while (1); + finish_wait(&eb->eb_wait, &wait); } void xfs_extent_busy_wait_all( struct xfs_mount *mp) { - DEFINE_WAIT (wait); - xfs_agnumber_t agno; + struct xfs_perag *pag = NULL; + struct xfs_rtgroup *rtg = NULL; - for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { - struct xfs_perag *pag = xfs_perag_get(mp, agno); + while ((pag = xfs_perag_next(mp, pag))) + xfs_extent_busy_wait_group(pag_group(pag)); - do { - prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); - if (RB_EMPTY_ROOT(&pag->pagb_tree)) - break; - schedule(); - } while (1); - finish_wait(&pag->pagb_wait, &wait); - - xfs_perag_put(pag); - } + if (xfs_has_rtgroups(mp) && !xfs_has_zoned(mp)) + while ((rtg = xfs_rtgroup_next(mp, rtg))) + xfs_extent_busy_wait_group(rtg_group(rtg)); } /* - * Callback for list_sort to sort busy extents by the AG they reside in. + * Callback for list_sort to sort busy extents by the group they reside in. */ int xfs_extent_busy_ag_cmp( void *priv, - struct list_head *l1, - struct list_head *l2) + const struct list_head *l1, + const struct list_head *l2) { struct xfs_extent_busy *b1 = container_of(l1, struct xfs_extent_busy, list); @@ -652,8 +691,38 @@ xfs_extent_busy_ag_cmp( container_of(l2, struct xfs_extent_busy, list); s32 diff; - diff = b1->agno - b2->agno; + diff = b1->group->xg_gno - b2->group->xg_gno; if (!diff) diff = b1->bno - b2->bno; return diff; } + +/* Are there any busy extents in this group? */ +bool +xfs_extent_busy_list_empty( + struct xfs_group *xg, + unsigned *busy_gen) +{ + struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; + bool res; + + spin_lock(&eb->eb_lock); + res = RB_EMPTY_ROOT(&eb->eb_tree); + *busy_gen = READ_ONCE(eb->eb_gen); + spin_unlock(&eb->eb_lock); + return res; +} + +struct xfs_extent_busy_tree * +xfs_extent_busy_alloc(void) +{ + struct xfs_extent_busy_tree *eb; + + eb = kzalloc(sizeof(*eb), GFP_KERNEL); + if (!eb) + return NULL; + spin_lock_init(&eb->eb_lock); + init_waitqueue_head(&eb->eb_wait); + eb->eb_tree = RB_ROOT; + return eb; +} |
