diff options
Diffstat (limited to 'fs/xfs/xfs_trans.c')
| -rw-r--r-- | fs/xfs/xfs_trans.c | 833 |
1 files changed, 524 insertions, 309 deletions
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 59e2f9031b9f..474f5a04ec63 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -24,6 +24,9 @@ #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_icache.h" +#include "xfs_rtbitmap.h" +#include "xfs_rtgroup.h" +#include "xfs_sb.h" struct kmem_cache *xfs_trans_cache; @@ -32,7 +35,6 @@ static void xfs_trans_trace_reservations( struct xfs_mount *mp) { - struct xfs_trans_res resv; struct xfs_trans_res *res; struct xfs_trans_res *end_res; int i; @@ -41,8 +43,6 @@ xfs_trans_trace_reservations( end_res = (struct xfs_trans_res *)(M_RES(mp) + 1); for (i = 0; res < end_res; i++, res++) trace_xfs_trans_resv_calc(mp, i, res); - xfs_log_get_max_trans_res(mp, &resv); - trace_xfs_trans_resv_calc(mp, -1, &resv); } #else # define xfs_trans_trace_reservations(mp) @@ -69,7 +69,7 @@ xfs_trans_free( struct xfs_trans *tp) { xfs_extent_busy_sort(&tp->t_busy); - xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); + xfs_extent_busy_clear(&tp->t_busy, false); trace_xfs_trans_free(tp, _RET_IP_); xfs_trans_clear_context(tp); @@ -100,12 +100,11 @@ xfs_trans_dup( /* * Initialize the new transaction structure. */ - ntp->t_magic = XFS_TRANS_HEADER_MAGIC; ntp->t_mountp = tp->t_mountp; INIT_LIST_HEAD(&ntp->t_items); INIT_LIST_HEAD(&ntp->t_busy); INIT_LIST_HEAD(&ntp->t_dfops); - ntp->t_firstblock = NULLFSBLOCK; + ntp->t_highest_agno = NULLAGNUMBER; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_ticket != NULL); @@ -135,18 +134,14 @@ xfs_trans_dup( } /* - * This is called to reserve free disk blocks and log space for the - * given transaction. This must be done before allocating any resources - * within the transaction. + * This is called to reserve free disk blocks and log space for the given + * transaction before allocating any resources within the transaction. * * This will return ENOSPC if there are not enough blocks available. * It will sleep waiting for available log space. - * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which - * is used by long running transactions. If any one of the reservations - * fails then they will all be backed out. * - * This does not do quota reservations. That typically is done by the - * caller afterwards. + * This does not do quota reservations. That typically is done by the caller + * afterwards. */ static int xfs_trans_reserve( @@ -159,13 +154,15 @@ xfs_trans_reserve( int error = 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; + ASSERT(resp->tr_logres > 0); + /* - * Attempt to reserve the needed disk blocks by decrementing - * the number needed from the number available. This will - * fail if the count would go below zero. + * Attempt to reserve the needed disk blocks by decrementing the number + * needed from the number available. This will fail if the count would + * go below zero. */ if (blocks > 0) { - error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); + error = xfs_dec_fdblocks(mp, blocks, rsvd); if (error != 0) return -ENOSPC; tp->t_blk_res += blocks; @@ -174,47 +171,23 @@ xfs_trans_reserve( /* * Reserve the log space needed for this transaction. */ - if (resp->tr_logres > 0) { - bool permanent = false; - - ASSERT(tp->t_log_res == 0 || - tp->t_log_res == resp->tr_logres); - ASSERT(tp->t_log_count == 0 || - tp->t_log_count == resp->tr_logcount); - - if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) { - tp->t_flags |= XFS_TRANS_PERM_LOG_RES; - permanent = true; - } else { - ASSERT(tp->t_ticket == NULL); - ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); - } - - if (tp->t_ticket != NULL) { - ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES); - error = xfs_log_regrant(mp, tp->t_ticket); - } else { - error = xfs_log_reserve(mp, - resp->tr_logres, - resp->tr_logcount, - &tp->t_ticket, XFS_TRANSACTION, - permanent); - } - - if (error) - goto undo_blocks; + if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) + tp->t_flags |= XFS_TRANS_PERM_LOG_RES; + error = xfs_log_reserve(mp, resp->tr_logres, resp->tr_logcount, + &tp->t_ticket, (tp->t_flags & XFS_TRANS_PERM_LOG_RES)); + if (error) + goto undo_blocks; - tp->t_log_res = resp->tr_logres; - tp->t_log_count = resp->tr_logcount; - } + tp->t_log_res = resp->tr_logres; + tp->t_log_count = resp->tr_logcount; /* - * Attempt to reserve the needed realtime extents by decrementing - * the number needed from the number available. This will - * fail if the count would go below zero. + * Attempt to reserve the needed realtime extents by decrementing the + * number needed from the number available. This will fail if the + * count would go below zero. */ if (rtextents > 0) { - error = xfs_mod_frextents(mp, -((int64_t)rtextents)); + error = xfs_dec_frextents(mp, rtextents); if (error) { error = -ENOSPC; goto undo_log; @@ -224,26 +197,41 @@ xfs_trans_reserve( return 0; - /* - * Error cases jump to one of these labels to undo any - * reservations which have already been performed. - */ undo_log: - if (resp->tr_logres > 0) { - xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); - tp->t_ticket = NULL; - tp->t_log_res = 0; - tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; - } - + xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); + tp->t_ticket = NULL; + tp->t_log_res = 0; + tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; undo_blocks: if (blocks > 0) { - xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); + xfs_add_fdblocks(mp, blocks); tp->t_blk_res = 0; } return error; } +static struct xfs_trans * +__xfs_trans_alloc( + struct xfs_mount *mp, + uint flags) +{ + struct xfs_trans *tp; + + ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || xfs_has_lazysbcount(mp)); + + tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); + if (!(flags & XFS_TRANS_NO_WRITECOUNT)) + sb_start_intwrite(mp->m_super); + xfs_trans_set_context(tp); + tp->t_flags = flags; + tp->t_mountp = mp; + INIT_LIST_HEAD(&tp->t_items); + INIT_LIST_HEAD(&tp->t_busy); + INIT_LIST_HEAD(&tp->t_dfops); + tp->t_highest_agno = NULLAGNUMBER; + return tp; +} + int xfs_trans_alloc( struct xfs_mount *mp, @@ -257,34 +245,16 @@ xfs_trans_alloc( bool want_retry = true; int error; + ASSERT(resp->tr_logres > 0); + /* * Allocate the handle before we do our freeze accounting and setting up * GFP_NOFS allocation context so that we avoid lockdep false positives * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: - tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); - if (!(flags & XFS_TRANS_NO_WRITECOUNT)) - sb_start_intwrite(mp->m_super); - xfs_trans_set_context(tp); - - /* - * Zero-reservation ("empty") transactions can't modify anything, so - * they're allowed to run while we're frozen. - */ - WARN_ON(resp->tr_logres > 0 && - mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); - ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || - xfs_has_lazysbcount(mp)); - - tp->t_magic = XFS_TRANS_HEADER_MAGIC; - tp->t_flags = flags; - tp->t_mountp = mp; - INIT_LIST_HEAD(&tp->t_items); - INIT_LIST_HEAD(&tp->t_busy); - INIT_LIST_HEAD(&tp->t_dfops); - tp->t_firstblock = NULLFSBLOCK; - + tp = __xfs_trans_alloc(mp, flags); + WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); error = xfs_trans_reserve(tp, resp, blocks, rtextents); if (error == -ENOSPC && want_retry) { xfs_trans_cancel(tp); @@ -295,7 +265,9 @@ retry: * Do not perform a synchronous scan because callers can hold * other locks. */ - xfs_blockgc_flush_all(mp); + error = xfs_blockgc_flush_all(mp); + if (error) + return error; want_retry = false; goto retry; } @@ -326,14 +298,11 @@ retry: * where we can be grabbing buffers at the same time that freeze is trying to * drain the buffer LRU list. */ -int +struct xfs_trans * xfs_trans_alloc_empty( - struct xfs_mount *mp, - struct xfs_trans **tpp) + struct xfs_mount *mp) { - struct xfs_trans_res resv = {0}; - - return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp); + return __xfs_trans_alloc(mp, XFS_TRANS_NO_WRITECOUNT); } /* @@ -422,6 +391,8 @@ xfs_trans_mod_sb( ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res); } tp->t_frextents_delta += delta; + if (xfs_has_rtgroups(mp)) + flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_RES_FREXTENTS: /* @@ -431,6 +402,8 @@ xfs_trans_mod_sb( */ ASSERT(delta < 0); tp->t_res_frextents_delta += delta; + if (xfs_has_rtgroups(mp)) + flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_DBLOCKS: tp->t_dblocks_delta += delta; @@ -457,6 +430,10 @@ xfs_trans_mod_sb( case XFS_TRANS_SB_REXTSLOG: tp->t_rextslog_delta += delta; break; + case XFS_TRANS_SB_RGCOUNT: + ASSERT(delta > 0); + tp->t_rgcount_delta += delta; + break; default: ASSERT(0); return; @@ -475,19 +452,17 @@ xfs_trans_mod_sb( */ STATIC void xfs_trans_apply_sb_deltas( - xfs_trans_t *tp) + struct xfs_trans *tp) { - struct xfs_dsb *sbp; - struct xfs_buf *bp; - int whole = 0; - - bp = xfs_trans_getsb(tp); - sbp = bp->b_addr; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_buf *bp = xfs_trans_getsb(tp); + struct xfs_dsb *sbp = bp->b_addr; + int whole = 0; /* * Only update the superblock counters if we are logging them */ - if (!xfs_has_lazysbcount((tp->t_mountp))) { + if (!xfs_has_lazysbcount(mp)) { if (tp->t_icount_delta) be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta); if (tp->t_ifree_delta) @@ -498,13 +473,37 @@ xfs_trans_apply_sb_deltas( be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta); } - if (tp->t_frextents_delta) - be64_add_cpu(&sbp->sb_frextents, tp->t_frextents_delta); - if (tp->t_res_frextents_delta) - be64_add_cpu(&sbp->sb_frextents, tp->t_res_frextents_delta); + /* + * sb_frextents was added to the lazy sb counters when the rt groups + * feature was introduced. This is possible because we know that all + * kernels supporting rtgroups will also recompute frextents from the + * realtime bitmap. + * + * For older file systems, updating frextents requires careful handling + * because we cannot rely on log recovery in older kernels to recompute + * the value from the rtbitmap. This means that the ondisk frextents + * must be consistent with the rtbitmap. + * + * Therefore, log the frextents change to the ondisk superblock and + * update the incore superblock so that future calls to xfs_log_sb + * write the correct value ondisk. + */ + if ((tp->t_frextents_delta || tp->t_res_frextents_delta) && + !xfs_has_rtgroups(mp)) { + int64_t rtxdelta; + + rtxdelta = tp->t_frextents_delta + tp->t_res_frextents_delta; + + spin_lock(&mp->m_sb_lock); + be64_add_cpu(&sbp->sb_frextents, rtxdelta); + mp->m_sb.sb_frextents += rtxdelta; + spin_unlock(&mp->m_sb_lock); + } if (tp->t_dblocks_delta) { be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta); + mp->m_ddev_targp->bt_nr_sectors += + XFS_FSB_TO_BB(mp, tp->t_dblocks_delta); whole = 1; } if (tp->t_agcount_delta) { @@ -517,6 +516,18 @@ xfs_trans_apply_sb_deltas( } if (tp->t_rextsize_delta) { be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta); + + /* + * Because the ondisk sb records rtgroup size in units of rt + * extents, any time we update the rt extent size we have to + * recompute the ondisk rtgroup block log. The incore values + * will be recomputed in xfs_trans_unreserve_and_mod_sb. + */ + if (xfs_has_rtgroups(mp)) { + sbp->sb_rgblklog = xfs_compute_rgblklog( + be32_to_cpu(sbp->sb_rgextents), + be32_to_cpu(sbp->sb_rextsize)); + } whole = 1; } if (tp->t_rbmblocks_delta) { @@ -525,6 +536,8 @@ xfs_trans_apply_sb_deltas( } if (tp->t_rblocks_delta) { be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta); + mp->m_rtdev_targp->bt_nr_sectors += + XFS_FSB_TO_BB(mp, tp->t_rblocks_delta); whole = 1; } if (tp->t_rextents_delta) { @@ -535,6 +548,10 @@ xfs_trans_apply_sb_deltas( sbp->sb_rextslog += tp->t_rextslog_delta; whole = 1; } + if (tp->t_rgcount_delta) { + be32_add_cpu(&sbp->sb_rgcount, tp->t_rgcount_delta); + whole = 1; + } xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); if (whole) @@ -574,38 +591,44 @@ xfs_trans_unreserve_and_mod_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; - bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; - int64_t blkdelta = 0; - int64_t rtxdelta = 0; + int64_t blkdelta = tp->t_blk_res; + int64_t rtxdelta = tp->t_rtx_res; int64_t idelta = 0; int64_t ifreedelta = 0; - int error; - /* calculate deltas */ - if (tp->t_blk_res > 0) - blkdelta = tp->t_blk_res; - if ((tp->t_fdblocks_delta != 0) && - (xfs_has_lazysbcount(mp) || - (tp->t_flags & XFS_TRANS_SB_DIRTY))) + /* + * Calculate the deltas. + * + * t_fdblocks_delta and t_frextents_delta can be positive or negative: + * + * - positive values indicate blocks freed in the transaction. + * - negative values indicate blocks allocated in the transaction + * + * Negative values can only happen if the transaction has a block + * reservation that covers the allocated block. The end result is + * that the calculated delta values must always be positive and we + * can only put back previous allocated or reserved blocks here. + */ + ASSERT(tp->t_blk_res || tp->t_fdblocks_delta >= 0); + if (xfs_has_lazysbcount(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) { blkdelta += tp->t_fdblocks_delta; + ASSERT(blkdelta >= 0); + } - if (tp->t_rtx_res > 0) - rtxdelta = tp->t_rtx_res; - if ((tp->t_frextents_delta != 0) && - (tp->t_flags & XFS_TRANS_SB_DIRTY)) + ASSERT(tp->t_rtx_res || tp->t_frextents_delta >= 0); + if (xfs_has_rtgroups(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) { rtxdelta += tp->t_frextents_delta; + ASSERT(rtxdelta >= 0); + } - if (xfs_has_lazysbcount(mp) || - (tp->t_flags & XFS_TRANS_SB_DIRTY)) { + if (xfs_has_lazysbcount(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) { idelta = tp->t_icount_delta; ifreedelta = tp->t_ifree_delta; } /* apply the per-cpu counters */ - if (blkdelta) { - error = xfs_mod_fdblocks(mp, blkdelta, rsvd); - ASSERT(!error); - } + if (blkdelta) + xfs_add_fdblocks(mp, blkdelta); if (idelta) percpu_counter_add_batch(&mp->m_icount, idelta, @@ -614,7 +637,10 @@ xfs_trans_unreserve_and_mod_sb( if (ifreedelta) percpu_counter_add(&mp->m_ifree, ifreedelta); - if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY)) + if (rtxdelta) + xfs_add_frextents(mp, rtxdelta); + + if (!(tp->t_flags & XFS_TRANS_SB_DIRTY)) return; /* apply remaining deltas */ @@ -622,15 +648,22 @@ xfs_trans_unreserve_and_mod_sb( mp->m_sb.sb_fdblocks += tp->t_fdblocks_delta + tp->t_res_fdblocks_delta; mp->m_sb.sb_icount += idelta; mp->m_sb.sb_ifree += ifreedelta; - mp->m_sb.sb_frextents += rtxdelta; + /* + * Do not touch sb_frextents here because it is handled in + * xfs_trans_apply_sb_deltas for file systems where it isn't a lazy + * counter anyway. + */ mp->m_sb.sb_dblocks += tp->t_dblocks_delta; mp->m_sb.sb_agcount += tp->t_agcount_delta; mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta; - mp->m_sb.sb_rextsize += tp->t_rextsize_delta; + if (tp->t_rextsize_delta) + xfs_mount_sb_set_rextsize(mp, &mp->m_sb, + mp->m_sb.sb_rextsize + tp->t_rextsize_delta); mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta; mp->m_sb.sb_rblocks += tp->t_rblocks_delta; mp->m_sb.sb_rextents += tp->t_rextents_delta; mp->m_sb.sb_rextslog += tp->t_rextslog_delta; + mp->m_sb.sb_rgcount += tp->t_rgcount_delta; spin_unlock(&mp->m_sb_lock); /* @@ -639,7 +672,6 @@ xfs_trans_unreserve_and_mod_sb( */ ASSERT(mp->m_sb.sb_imax_pct >= 0); ASSERT(mp->m_sb.sb_rextslog >= 0); - return; } /* Add the given log item to the transaction's list of log items. */ @@ -648,7 +680,7 @@ xfs_trans_add_item( struct xfs_trans *tp, struct xfs_log_item *lip) { - ASSERT(lip->li_mountp == tp->t_mountp); + ASSERT(lip->li_log == tp->t_mountp->m_log); ASSERT(lip->li_ailp == tp->t_mountp->m_ail); ASSERT(list_empty(&lip->li_trans)); ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags)); @@ -682,140 +714,97 @@ xfs_trans_free_items( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); - if (abort) + if (abort) { + trace_xfs_trans_free_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->iop_release) lip->li_ops->iop_release(lip); } } -static inline void -xfs_log_item_batch_insert( - struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, - struct xfs_log_item **log_items, - int nr_items, - xfs_lsn_t commit_lsn) +/* + * Sort transaction items prior to running precommit operations. This will + * attempt to order the items such that they will always be locked in the same + * order. Items that have no sort function are moved to the end of the list + * and so are locked last. + * + * This may need refinement as different types of objects add sort functions. + * + * Function is more complex than it needs to be because we are comparing 64 bit + * values and the function only returns 32 bit values. + */ +static int +xfs_trans_precommit_sort( + void *unused_arg, + const struct list_head *a, + const struct list_head *b) { - int i; - - spin_lock(&ailp->ail_lock); - /* xfs_trans_ail_update_bulk drops ailp->ail_lock */ - xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); + struct xfs_log_item *lia = container_of(a, + struct xfs_log_item, li_trans); + struct xfs_log_item *lib = container_of(b, + struct xfs_log_item, li_trans); + int64_t diff; - for (i = 0; i < nr_items; i++) { - struct xfs_log_item *lip = log_items[i]; - - if (lip->li_ops->iop_unpin) - lip->li_ops->iop_unpin(lip, 0); - } + /* + * If both items are non-sortable, leave them alone. If only one is + * sortable, move the non-sortable item towards the end of the list. + */ + if (!lia->li_ops->iop_sort && !lib->li_ops->iop_sort) + return 0; + if (!lia->li_ops->iop_sort) + return 1; + if (!lib->li_ops->iop_sort) + return -1; + + diff = lia->li_ops->iop_sort(lia) - lib->li_ops->iop_sort(lib); + if (diff < 0) + return -1; + if (diff > 0) + return 1; + return 0; } /* - * Bulk operation version of xfs_trans_committed that takes a log vector of - * items to insert into the AIL. This uses bulk AIL insertion techniques to - * minimise lock traffic. - * - * If we are called with the aborted flag set, it is because a log write during - * a CIL checkpoint commit has failed. In this case, all the items in the - * checkpoint have already gone through iop_committed and iop_committing, which - * means that checkpoint commit abort handling is treated exactly the same - * as an iclog write error even though we haven't started any IO yet. Hence in - * this case all we need to do is iop_committed processing, followed by an - * iop_unpin(aborted) call. + * Run transaction precommit functions. * - * The AIL cursor is used to optimise the insert process. If commit_lsn is not - * at the end of the AIL, the insert cursor avoids the need to walk - * the AIL to find the insertion point on every xfs_log_item_batch_insert() - * call. This saves a lot of needless list walking and is a net win, even - * though it slightly increases that amount of AIL lock traffic to set it up - * and tear it down. + * If there is an error in any of the callouts, then stop immediately and + * trigger a shutdown to abort the transaction. There is no recovery possible + * from errors at this point as the transaction is dirty.... */ -void -xfs_trans_committed_bulk( - struct xfs_ail *ailp, - struct xfs_log_vec *log_vector, - xfs_lsn_t commit_lsn, - bool aborted) +static int +xfs_trans_run_precommits( + struct xfs_trans *tp) { -#define LOG_ITEM_BATCH_SIZE 32 - struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; - struct xfs_log_vec *lv; - struct xfs_ail_cursor cur; - int i = 0; - - spin_lock(&ailp->ail_lock); - xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); - spin_unlock(&ailp->ail_lock); - - /* unpin all the log items */ - for (lv = log_vector; lv; lv = lv->lv_next ) { - struct xfs_log_item *lip = lv->lv_item; - xfs_lsn_t item_lsn; - - if (aborted) - set_bit(XFS_LI_ABORTED, &lip->li_flags); - - if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { - lip->li_ops->iop_release(lip); - continue; - } - - if (lip->li_ops->iop_committed) - item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); - else - item_lsn = commit_lsn; - - /* item_lsn of -1 means the item needs no further processing */ - if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) - continue; - - /* - * if we are aborting the operation, no point in inserting the - * object into the AIL as we are in a shutdown situation. - */ - if (aborted) { - ASSERT(xfs_is_shutdown(ailp->ail_mount)); - if (lip->li_ops->iop_unpin) - lip->li_ops->iop_unpin(lip, 1); - continue; - } + struct xfs_mount *mp = tp->t_mountp; + struct xfs_log_item *lip, *n; + int error = 0; - if (item_lsn != commit_lsn) { + /* + * Sort the item list to avoid ABBA deadlocks with other transactions + * running precommit operations that lock multiple shared items such as + * inode cluster buffers. + */ + list_sort(NULL, &tp->t_items, xfs_trans_precommit_sort); - /* - * Not a bulk update option due to unusual item_lsn. - * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. This does - * not affect the AIL cursor the bulk insert path is - * using. - */ - spin_lock(&ailp->ail_lock); - if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) - xfs_trans_ail_update(ailp, lip, item_lsn); - else - spin_unlock(&ailp->ail_lock); - if (lip->li_ops->iop_unpin) - lip->li_ops->iop_unpin(lip, 0); + /* + * Precommit operations can remove the log item from the transaction + * if the log item exists purely to delay modifications until they + * can be ordered against other operations. Hence we have to use + * list_for_each_entry_safe() here. + */ + list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) { + if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) continue; - } - - /* Item is a candidate for bulk AIL insert. */ - log_items[i++] = lv->lv_item; - if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, &cur, log_items, - LOG_ITEM_BATCH_SIZE, commit_lsn); - i = 0; + if (lip->li_ops->iop_precommit) { + error = lip->li_ops->iop_precommit(tp, lip); + if (error) + break; } } - - /* make sure we insert the remainder! */ - if (i) - xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); - - spin_lock(&ailp->ail_lock); - xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->ail_lock); + if (error) + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; } /* @@ -836,6 +825,7 @@ __xfs_trans_commit( bool regrant) { struct xfs_mount *mp = tp->t_mountp; + struct xlog *log = mp->m_log; xfs_csn_t commit_seq = 0; int error = 0; int sync = tp->t_flags & XFS_TRANS_SYNC; @@ -843,16 +833,16 @@ __xfs_trans_commit( trace_xfs_trans_commit(tp, _RET_IP_); /* - * Finish deferred items on final commit. Only permanent transactions - * should ever have deferred ops. + * Commit per-transaction changes that are not already tracked through + * log items. This can add dirty log items to the transaction. */ - WARN_ON_ONCE(!list_empty(&tp->t_dfops) && - !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); - if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) { - error = xfs_defer_finish_noroll(&tp); - if (error) - goto out_unreserve; - } + if (tp->t_flags & XFS_TRANS_SB_DIRTY) + xfs_trans_apply_sb_deltas(tp); + xfs_trans_apply_dquot_deltas(tp); + + error = xfs_trans_run_precommits(tp); + if (error) + goto out_unreserve; /* * If there is nothing to be logged by the transaction, @@ -864,21 +854,20 @@ __xfs_trans_commit( if (!(tp->t_flags & XFS_TRANS_DIRTY)) goto out_unreserve; - if (xfs_is_shutdown(mp)) { + /* + * We must check against log shutdown here because we cannot abort log + * items and leave them dirty, inconsistent and unpinned in memory while + * the log is active. This leaves them open to being written back to + * disk, and that will lead to on-disk corruption. + */ + if (xlog_is_shutdown(log)) { error = -EIO; goto out_unreserve; } ASSERT(tp->t_ticket != NULL); - /* - * If we need to update the superblock, then do it now. - */ - if (tp->t_flags & XFS_TRANS_SB_DIRTY) - xfs_trans_apply_sb_deltas(tp); - xfs_trans_apply_dquot_deltas(tp); - - xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant); + xlog_cil_commit(log, tp, &commit_seq, regrant); xfs_trans_free(tp); @@ -903,12 +892,12 @@ out_unreserve: * the dqinfo portion to be. All that means is that we have some * (non-persistent) quota reservations that need to be unreserved. */ - xfs_trans_unreserve_and_mod_dquots(tp); + xfs_trans_unreserve_and_mod_dquots(tp, true); if (tp->t_ticket) { - if (regrant && !xlog_is_shutdown(mp->m_log)) - xfs_log_ticket_regrant(mp->m_log, tp->t_ticket); + if (regrant && !xlog_is_shutdown(log)) + xfs_log_ticket_regrant(log, tp->t_ticket); else - xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); + xfs_log_ticket_ungrant(log, tp->t_ticket); tp->t_ticket = NULL; } xfs_trans_free_items(tp, !!error); @@ -922,22 +911,45 @@ int xfs_trans_commit( struct xfs_trans *tp) { + /* + * Finish deferred items on final commit. Only permanent transactions + * should ever have deferred ops. + */ + WARN_ON_ONCE(!list_empty(&tp->t_dfops) && + !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); + if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) { + int error = xfs_defer_finish_noroll(&tp); + if (error) { + xfs_trans_cancel(tp); + return error; + } + } + return __xfs_trans_commit(tp, false); } /* - * Unlock all of the transaction's items and free the transaction. - * The transaction must not have modified any of its items, because - * there is no way to restore them to their previous state. + * Unlock all of the transaction's items and free the transaction. If the + * transaction is dirty, we must shut down the filesystem because there is no + * way to restore them to their previous state. * - * If the transaction has made a log reservation, make sure to release - * it as well. + * If the transaction has made a log reservation, make sure to release it as + * well. + * + * This is a high level function (equivalent to xfs_trans_commit()) and so can + * be called after the transaction has effectively been aborted due to the mount + * being shut down. However, if the mount has not been shut down and the + * transaction is dirty we will shut the mount down and, in doing so, that + * guarantees that the log is shut down, too. Hence we don't need to be as + * careful with shutdown state and dirty items here as we need to be in + * xfs_trans_commit(). */ void xfs_trans_cancel( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; + struct xlog *log = mp->m_log; bool dirty = (tp->t_flags & XFS_TRANS_DIRTY); trace_xfs_trans_cancel(tp, _RET_IP_); @@ -945,26 +957,28 @@ xfs_trans_cancel( /* * It's never valid to cancel a transaction with deferred ops attached, * because the transaction is effectively dirty. Complain about this - * loudly before freeing the in-memory defer items. + * loudly before freeing the in-memory defer items and shutting down the + * filesystem. */ if (!list_empty(&tp->t_dfops)) { - ASSERT(xfs_is_shutdown(mp) || list_empty(&tp->t_dfops)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); dirty = true; xfs_defer_cancel(tp); } /* - * See if the caller is relying on us to shut down the - * filesystem. This happens in paths where we detect - * corruption and decide to give up. + * See if the caller is relying on us to shut down the filesystem. We + * only want an error report if there isn't already a shutdown in + * progress, so we only need to check against the mount shutdown state + * here. */ if (dirty && !xfs_is_shutdown(mp)) { XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } #ifdef DEBUG - if (!dirty && !xfs_is_shutdown(mp)) { + /* Log items need to be consistent until the log is shut down. */ + if (!dirty && !xlog_is_shutdown(log)) { struct xfs_log_item *lip; list_for_each_entry(lip, &tp->t_items, li_trans) @@ -972,10 +986,10 @@ xfs_trans_cancel( } #endif xfs_trans_unreserve_and_mod_sb(tp); - xfs_trans_unreserve_and_mod_dquots(tp); + xfs_trans_unreserve_and_mod_dquots(tp, false); if (tp->t_ticket) { - xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); + xfs_log_ticket_ungrant(log, tp->t_ticket); tp->t_ticket = NULL; } @@ -984,51 +998,57 @@ xfs_trans_cancel( } /* - * Roll from one trans in the sequence of PERMANENT transactions to - * the next: permanent transactions are only flushed out when - * committed with xfs_trans_commit(), but we still want as soon - * as possible to let chunks of it go to the log. So we commit the - * chunk we've been working on and get a new transaction to continue. + * Roll from one trans in the sequence of PERMANENT transactions to the next: + * permanent transactions are only flushed out when committed with + * xfs_trans_commit(), but we still want as soon as possible to let chunks of it + * go to the log. So we commit the chunk we've been working on and get a new + * transaction to continue. */ int xfs_trans_roll( struct xfs_trans **tpp) { - struct xfs_trans *trans = *tpp; - struct xfs_trans_res tres; + struct xfs_trans *tp = *tpp; + unsigned int log_res = tp->t_log_res; + unsigned int log_count = tp->t_log_count; int error; - trace_xfs_trans_roll(trans, _RET_IP_); + trace_xfs_trans_roll(tp, _RET_IP_); + + ASSERT(log_res > 0); /* * Copy the critical parameters from one trans to the next. */ - tres.tr_logres = trans->t_log_res; - tres.tr_logcount = trans->t_log_count; - - *tpp = xfs_trans_dup(trans); + *tpp = xfs_trans_dup(tp); /* * Commit the current transaction. - * If this commit failed, then it'd just unlock those items that - * are not marked ihold. That also means that a filesystem shutdown - * is in progress. The caller takes the responsibility to cancel - * the duplicate transaction that gets returned. + * + * If this commit failed, then it'd just unlock those items that are not + * marked ihold. That also means that a filesystem shutdown is in + * progress. The caller takes the responsibility to cancel the + * duplicate transaction that gets returned. */ - error = __xfs_trans_commit(trans, true); + error = __xfs_trans_commit(tp, true); if (error) return error; /* * Reserve space in the log for the next transaction. - * This also pushes items in the "AIL", the list of logged items, - * out to disk if they are taking up space at the tail of the log - * that we want to use. This requires that either nothing be locked - * across this call, or that anything that is locked be logged in - * the prior and the next transactions. + * + * This also pushes items in the AIL out to disk if they are taking up + * space at the tail of the log that we want to use. This requires that + * either nothing be locked across this call, or that anything that is + * locked be logged in the prior and the next transactions. */ - tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; - return xfs_trans_reserve(*tpp, &tres, 0, 0); + tp = *tpp; + error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); + if (error) + return error; + tp->t_log_res = log_res; + tp->t_log_count = log_count; + return 0; } /* @@ -1054,7 +1074,7 @@ xfs_trans_alloc_inode( retry: error = xfs_trans_alloc(mp, resv, dblocks, - rblocks / mp->m_sb.sb_rextsize, + xfs_extlen_to_rtxlen(mp, rblocks), force ? XFS_TRANS_RESERVE : 0, &tp); if (error) return error; @@ -1090,6 +1110,76 @@ out_cancel: } /* + * Try to reserve more blocks for a transaction. + * + * This is for callers that need to attach resources to a transaction, scan + * those resources to determine the space reservation requirements, and then + * modify the attached resources. In other words, online repair. This can + * fail due to ENOSPC, so the caller must be able to cancel the transaction + * without shutting down the fs. + */ +int +xfs_trans_reserve_more( + struct xfs_trans *tp, + unsigned int blocks, + unsigned int rtextents) +{ + bool rsvd = tp->t_flags & XFS_TRANS_RESERVE; + + if (blocks && xfs_dec_fdblocks(tp->t_mountp, blocks, rsvd)) + return -ENOSPC; + if (rtextents && xfs_dec_frextents(tp->t_mountp, rtextents)) { + if (blocks) + xfs_add_fdblocks(tp->t_mountp, blocks); + return -ENOSPC; + } + tp->t_blk_res += blocks; + tp->t_rtx_res += rtextents; + return 0; +} + +/* + * Try to reserve more blocks and file quota for a transaction. Same + * conditions of usage as xfs_trans_reserve_more. + */ +int +xfs_trans_reserve_more_inode( + struct xfs_trans *tp, + struct xfs_inode *ip, + unsigned int dblocks, + unsigned int rblocks, + bool force_quota) +{ + struct xfs_mount *mp = ip->i_mount; + unsigned int rtx = xfs_extlen_to_rtxlen(mp, rblocks); + int error; + + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); + + error = xfs_trans_reserve_more(tp, dblocks, rtx); + if (error) + return error; + + if (!XFS_IS_QUOTA_ON(mp) || xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) + return 0; + + if (tp->t_flags & XFS_TRANS_RESERVE) + force_quota = true; + + error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, + force_quota); + if (!error) + return 0; + + /* Quota failed, give back the new reservation. */ + xfs_add_fdblocks(mp, dblocks); + tp->t_blk_res -= dblocks; + xfs_add_frextents(mp, rtx); + tp->t_rtx_res -= rtx; + return error; +} + +/* * Allocate an transaction in preparation for inode creation by reserving quota * against the given dquots. Callers are not required to hold any inode locks. */ @@ -1162,6 +1252,9 @@ retry: xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + if (xfs_is_metadir_inode(ip)) + goto out; + error = xfs_qm_dqattach_locked(ip, false); if (error) { /* Caller should have allocated the dquots! */ @@ -1179,11 +1272,26 @@ retry: gdqp = (new_gdqp != ip->i_gdquot) ? new_gdqp : NULL; pdqp = (new_pdqp != ip->i_pdquot) ? new_pdqp : NULL; if (udqp || gdqp || pdqp) { + xfs_filblks_t dblocks, rblocks; unsigned int qflags = XFS_QMOPT_RES_REGBLKS; + bool isrt = XFS_IS_REALTIME_INODE(ip); if (force) qflags |= XFS_QMOPT_FORCE_RES; + if (isrt) { + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); + if (error) + goto out_cancel; + } + + xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks); + + if (isrt) + rblocks += ip->i_delayed_blks; + else + dblocks += ip->i_delayed_blks; + /* * Reserve enough quota to handle blocks on disk and reserved * for a delayed allocation. We'll actually transfer the @@ -1191,8 +1299,7 @@ retry: * though that part is only semi-transactional. */ error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp, - pdqp, ip->i_nblocks + ip->i_delayed_blks, - 1, qflags); + pdqp, dblocks, 1, qflags); if ((error == -EDQUOT || error == -ENOSPC) && !retried) { xfs_trans_cancel(tp); xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0); @@ -1201,12 +1308,120 @@ retry: } if (error) goto out_cancel; + + /* Do the same for realtime. */ + qflags = XFS_QMOPT_RES_RTBLKS | (qflags & XFS_QMOPT_FORCE_RES); + error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp, + pdqp, rblocks, 0, qflags); + if ((error == -EDQUOT || error == -ENOSPC) && !retried) { + xfs_trans_cancel(tp); + xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0); + retried = true; + goto retry; + } + if (error) + goto out_cancel; + } + +out: + *tpp = tp; + return 0; + +out_cancel: + xfs_trans_cancel(tp); + return error; +} + +/* + * Allocate an transaction, lock and join the directory and child inodes to it, + * and reserve quota for a directory update. If there isn't sufficient space, + * @dblocks will be set to zero for a reservationless directory update and + * @nospace_error will be set to a negative errno describing the space + * constraint we hit. + * + * The caller must ensure that the on-disk dquots attached to this inode have + * already been allocated and initialized. The ILOCKs will be dropped when the + * transaction is committed or cancelled. + * + * Caller is responsible for unlocking the inodes manually upon return + */ +int +xfs_trans_alloc_dir( + struct xfs_inode *dp, + struct xfs_trans_res *resv, + struct xfs_inode *ip, + unsigned int *dblocks, + struct xfs_trans **tpp, + int *nospace_error) +{ + struct xfs_trans *tp; + struct xfs_mount *mp = ip->i_mount; + unsigned int resblks; + bool retried = false; + int error; + +retry: + *nospace_error = 0; + resblks = *dblocks; + error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); + if (error == -ENOSPC) { + *nospace_error = error; + resblks = 0; + error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); + } + if (error) + return error; + + xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, dp, 0); + xfs_trans_ijoin(tp, ip, 0); + + error = xfs_qm_dqattach_locked(dp, false); + if (error) { + /* Caller should have allocated the dquots! */ + ASSERT(error != -ENOENT); + goto out_cancel; + } + + error = xfs_qm_dqattach_locked(ip, false); + if (error) { + /* Caller should have allocated the dquots! */ + ASSERT(error != -ENOENT); + goto out_cancel; + } + + if (resblks == 0) + goto done; + + error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false); + if (error == -EDQUOT || error == -ENOSPC) { + if (!retried) { + xfs_trans_cancel(tp); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + if (dp != ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_blockgc_free_quota(dp, 0); + retried = true; + goto retry; + } + + *nospace_error = error; + resblks = 0; + error = 0; } + if (error) + goto out_cancel; +done: *tpp = tp; + *dblocks = resblks; return 0; out_cancel: xfs_trans_cancel(tp); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + if (dp != ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } |
