diff options
Diffstat (limited to 'fs/xfs/xfs_trans.c')
-rw-r--r-- | fs/xfs/xfs_trans.c | 362 |
1 files changed, 148 insertions, 214 deletions
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 7350640059cc..b4a07af513ba 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -25,6 +25,8 @@ #include "xfs_dquot.h" #include "xfs_icache.h" #include "xfs_rtbitmap.h" +#include "xfs_rtgroup.h" +#include "xfs_sb.h" struct kmem_cache *xfs_trans_cache; @@ -67,7 +69,7 @@ xfs_trans_free( struct xfs_trans *tp) { xfs_extent_busy_sort(&tp->t_busy); - xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); + xfs_extent_busy_clear(&tp->t_busy, false); trace_xfs_trans_free(tp, _RET_IP_); xfs_trans_clear_context(tp); @@ -98,7 +100,6 @@ xfs_trans_dup( /* * Initialize the new transaction structure. */ - ntp->t_magic = XFS_TRANS_HEADER_MAGIC; ntp->t_mountp = tp->t_mountp; INIT_LIST_HEAD(&ntp->t_items); INIT_LIST_HEAD(&ntp->t_busy); @@ -163,7 +164,7 @@ xfs_trans_reserve( * fail if the count would go below zero. */ if (blocks > 0) { - error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); + error = xfs_dec_fdblocks(mp, blocks, rsvd); if (error != 0) return -ENOSPC; tp->t_blk_res += blocks; @@ -210,7 +211,7 @@ xfs_trans_reserve( * fail if the count would go below zero. */ if (rtextents > 0) { - error = xfs_mod_frextents(mp, -((int64_t)rtextents)); + error = xfs_dec_frextents(mp, rtextents); if (error) { error = -ENOSPC; goto undo_log; @@ -234,7 +235,7 @@ undo_log: undo_blocks: if (blocks > 0) { - xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); + xfs_add_fdblocks(mp, blocks); tp->t_blk_res = 0; } return error; @@ -273,7 +274,6 @@ retry: ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || xfs_has_lazysbcount(mp)); - tp->t_magic = XFS_TRANS_HEADER_MAGIC; tp->t_flags = flags; tp->t_mountp = mp; INIT_LIST_HEAD(&tp->t_items); @@ -420,6 +420,8 @@ xfs_trans_mod_sb( ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res); } tp->t_frextents_delta += delta; + if (xfs_has_rtgroups(mp)) + flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_RES_FREXTENTS: /* @@ -429,6 +431,8 @@ xfs_trans_mod_sb( */ ASSERT(delta < 0); tp->t_res_frextents_delta += delta; + if (xfs_has_rtgroups(mp)) + flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_DBLOCKS: tp->t_dblocks_delta += delta; @@ -455,6 +459,10 @@ xfs_trans_mod_sb( case XFS_TRANS_SB_REXTSLOG: tp->t_rextslog_delta += delta; break; + case XFS_TRANS_SB_RGCOUNT: + ASSERT(delta > 0); + tp->t_rgcount_delta += delta; + break; default: ASSERT(0); return; @@ -497,20 +505,22 @@ xfs_trans_apply_sb_deltas( } /* - * Updating frextents requires careful handling because it does not - * behave like the lazysb counters because we cannot rely on log - * recovery in older kenels to recompute the value from the rtbitmap. - * This means that the ondisk frextents must be consistent with the - * rtbitmap. + * sb_frextents was added to the lazy sb counters when the rt groups + * feature was introduced. This is possible because we know that all + * kernels supporting rtgroups will also recompute frextents from the + * realtime bitmap. + * + * For older file systems, updating frextents requires careful handling + * because we cannot rely on log recovery in older kernels to recompute + * the value from the rtbitmap. This means that the ondisk frextents + * must be consistent with the rtbitmap. * * Therefore, log the frextents change to the ondisk superblock and * update the incore superblock so that future calls to xfs_log_sb * write the correct value ondisk. - * - * Don't touch m_frextents because it includes incore reservations, - * and those are handled by the unreserve function. */ - if (tp->t_frextents_delta || tp->t_res_frextents_delta) { + if ((tp->t_frextents_delta || tp->t_res_frextents_delta) && + !xfs_has_rtgroups(tp->t_mountp)) { struct xfs_mount *mp = tp->t_mountp; int64_t rtxdelta; @@ -536,6 +546,18 @@ xfs_trans_apply_sb_deltas( } if (tp->t_rextsize_delta) { be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta); + + /* + * Because the ondisk sb records rtgroup size in units of rt + * extents, any time we update the rt extent size we have to + * recompute the ondisk rtgroup block log. The incore values + * will be recomputed in xfs_trans_unreserve_and_mod_sb. + */ + if (xfs_has_rtgroups(tp->t_mountp)) { + sbp->sb_rgblklog = xfs_compute_rgblklog( + be32_to_cpu(sbp->sb_rgextents), + be32_to_cpu(sbp->sb_rextsize)); + } whole = 1; } if (tp->t_rbmblocks_delta) { @@ -554,6 +576,10 @@ xfs_trans_apply_sb_deltas( sbp->sb_rextslog += tp->t_rextslog_delta; whole = 1; } + if (tp->t_rgcount_delta) { + be32_add_cpu(&sbp->sb_rgcount, tp->t_rgcount_delta); + whole = 1; + } xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); if (whole) @@ -593,38 +619,44 @@ xfs_trans_unreserve_and_mod_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; - bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; - int64_t blkdelta = 0; - int64_t rtxdelta = 0; + int64_t blkdelta = tp->t_blk_res; + int64_t rtxdelta = tp->t_rtx_res; int64_t idelta = 0; int64_t ifreedelta = 0; - int error; - /* calculate deltas */ - if (tp->t_blk_res > 0) - blkdelta = tp->t_blk_res; - if ((tp->t_fdblocks_delta != 0) && - (xfs_has_lazysbcount(mp) || - (tp->t_flags & XFS_TRANS_SB_DIRTY))) + /* + * Calculate the deltas. + * + * t_fdblocks_delta and t_frextents_delta can be positive or negative: + * + * - positive values indicate blocks freed in the transaction. + * - negative values indicate blocks allocated in the transaction + * + * Negative values can only happen if the transaction has a block + * reservation that covers the allocated block. The end result is + * that the calculated delta values must always be positive and we + * can only put back previous allocated or reserved blocks here. + */ + ASSERT(tp->t_blk_res || tp->t_fdblocks_delta >= 0); + if (xfs_has_lazysbcount(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) { blkdelta += tp->t_fdblocks_delta; + ASSERT(blkdelta >= 0); + } - if (tp->t_rtx_res > 0) - rtxdelta = tp->t_rtx_res; - if ((tp->t_frextents_delta != 0) && - (tp->t_flags & XFS_TRANS_SB_DIRTY)) + ASSERT(tp->t_rtx_res || tp->t_frextents_delta >= 0); + if (xfs_has_rtgroups(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) { rtxdelta += tp->t_frextents_delta; + ASSERT(rtxdelta >= 0); + } - if (xfs_has_lazysbcount(mp) || - (tp->t_flags & XFS_TRANS_SB_DIRTY)) { + if (xfs_has_lazysbcount(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) { idelta = tp->t_icount_delta; ifreedelta = tp->t_ifree_delta; } /* apply the per-cpu counters */ - if (blkdelta) { - error = xfs_mod_fdblocks(mp, blkdelta, rsvd); - ASSERT(!error); - } + if (blkdelta) + xfs_add_fdblocks(mp, blkdelta); if (idelta) percpu_counter_add_batch(&mp->m_icount, idelta, @@ -633,10 +665,8 @@ xfs_trans_unreserve_and_mod_sb( if (ifreedelta) percpu_counter_add(&mp->m_ifree, ifreedelta); - if (rtxdelta) { - error = xfs_mod_frextents(mp, rtxdelta); - ASSERT(!error); - } + if (rtxdelta) + xfs_add_frextents(mp, rtxdelta); if (!(tp->t_flags & XFS_TRANS_SB_DIRTY)) return; @@ -647,23 +677,21 @@ xfs_trans_unreserve_and_mod_sb( mp->m_sb.sb_icount += idelta; mp->m_sb.sb_ifree += ifreedelta; /* - * Do not touch sb_frextents here because we are dealing with incore - * reservation. sb_frextents is not part of the lazy sb counters so it - * must be consistent with the ondisk rtbitmap and must never include - * incore reservations. + * Do not touch sb_frextents here because it is handled in + * xfs_trans_apply_sb_deltas for file systems where it isn't a lazy + * counter anyway. */ mp->m_sb.sb_dblocks += tp->t_dblocks_delta; mp->m_sb.sb_agcount += tp->t_agcount_delta; mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta; - mp->m_sb.sb_rextsize += tp->t_rextsize_delta; - if (tp->t_rextsize_delta) { - mp->m_rtxblklog = log2_if_power2(mp->m_sb.sb_rextsize); - mp->m_rtxblkmask = mask64_if_power2(mp->m_sb.sb_rextsize); - } + if (tp->t_rextsize_delta) + xfs_mount_sb_set_rextsize(mp, &mp->m_sb, + mp->m_sb.sb_rextsize + tp->t_rextsize_delta); mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta; mp->m_sb.sb_rblocks += tp->t_rblocks_delta; mp->m_sb.sb_rextents += tp->t_rextents_delta; mp->m_sb.sb_rextslog += tp->t_rextslog_delta; + mp->m_sb.sb_rgcount += tp->t_rgcount_delta; spin_unlock(&mp->m_sb_lock); /* @@ -672,7 +700,6 @@ xfs_trans_unreserve_and_mod_sb( */ ASSERT(mp->m_sb.sb_imax_pct >= 0); ASSERT(mp->m_sb.sb_rextslog >= 0); - return; } /* Add the given log item to the transaction's list of log items. */ @@ -715,142 +742,15 @@ xfs_trans_free_items( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); - if (abort) + if (abort) { + trace_xfs_trans_free_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->iop_release) lip->li_ops->iop_release(lip); } } -static inline void -xfs_log_item_batch_insert( - struct xfs_ail *ailp, - struct xfs_ail_cursor *cur, - struct xfs_log_item **log_items, - int nr_items, - xfs_lsn_t commit_lsn) -{ - int i; - - spin_lock(&ailp->ail_lock); - /* xfs_trans_ail_update_bulk drops ailp->ail_lock */ - xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); - - for (i = 0; i < nr_items; i++) { - struct xfs_log_item *lip = log_items[i]; - - if (lip->li_ops->iop_unpin) - lip->li_ops->iop_unpin(lip, 0); - } -} - -/* - * Bulk operation version of xfs_trans_committed that takes a log vector of - * items to insert into the AIL. This uses bulk AIL insertion techniques to - * minimise lock traffic. - * - * If we are called with the aborted flag set, it is because a log write during - * a CIL checkpoint commit has failed. In this case, all the items in the - * checkpoint have already gone through iop_committed and iop_committing, which - * means that checkpoint commit abort handling is treated exactly the same - * as an iclog write error even though we haven't started any IO yet. Hence in - * this case all we need to do is iop_committed processing, followed by an - * iop_unpin(aborted) call. - * - * The AIL cursor is used to optimise the insert process. If commit_lsn is not - * at the end of the AIL, the insert cursor avoids the need to walk - * the AIL to find the insertion point on every xfs_log_item_batch_insert() - * call. This saves a lot of needless list walking and is a net win, even - * though it slightly increases that amount of AIL lock traffic to set it up - * and tear it down. - */ -void -xfs_trans_committed_bulk( - struct xfs_ail *ailp, - struct list_head *lv_chain, - xfs_lsn_t commit_lsn, - bool aborted) -{ -#define LOG_ITEM_BATCH_SIZE 32 - struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; - struct xfs_log_vec *lv; - struct xfs_ail_cursor cur; - int i = 0; - - spin_lock(&ailp->ail_lock); - xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); - spin_unlock(&ailp->ail_lock); - - /* unpin all the log items */ - list_for_each_entry(lv, lv_chain, lv_list) { - struct xfs_log_item *lip = lv->lv_item; - xfs_lsn_t item_lsn; - - if (aborted) - set_bit(XFS_LI_ABORTED, &lip->li_flags); - - if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { - lip->li_ops->iop_release(lip); - continue; - } - - if (lip->li_ops->iop_committed) - item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); - else - item_lsn = commit_lsn; - - /* item_lsn of -1 means the item needs no further processing */ - if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) - continue; - - /* - * if we are aborting the operation, no point in inserting the - * object into the AIL as we are in a shutdown situation. - */ - if (aborted) { - ASSERT(xlog_is_shutdown(ailp->ail_log)); - if (lip->li_ops->iop_unpin) - lip->li_ops->iop_unpin(lip, 1); - continue; - } - - if (item_lsn != commit_lsn) { - - /* - * Not a bulk update option due to unusual item_lsn. - * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. This does - * not affect the AIL cursor the bulk insert path is - * using. - */ - spin_lock(&ailp->ail_lock); - if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) - xfs_trans_ail_update(ailp, lip, item_lsn); - else - spin_unlock(&ailp->ail_lock); - if (lip->li_ops->iop_unpin) - lip->li_ops->iop_unpin(lip, 0); - continue; - } - - /* Item is a candidate for bulk AIL insert. */ - log_items[i++] = lv->lv_item; - if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, &cur, log_items, - LOG_ITEM_BATCH_SIZE, commit_lsn); - i = 0; - } - } - - /* make sure we insert the remainder! */ - if (i) - xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); - - spin_lock(&ailp->ail_lock); - xfs_trans_ail_cursor_done(&cur); - spin_unlock(&ailp->ail_lock); -} - /* * Sort transaction items prior to running precommit operations. This will * attempt to order the items such that they will always be locked in the same @@ -960,29 +860,17 @@ __xfs_trans_commit( trace_xfs_trans_commit(tp, _RET_IP_); - error = xfs_trans_run_precommits(tp); - if (error) { - if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) - xfs_defer_cancel(tp); - goto out_unreserve; - } - /* - * Finish deferred items on final commit. Only permanent transactions - * should ever have deferred ops. + * Commit per-transaction changes that are not already tracked through + * log items. This can add dirty log items to the transaction. */ - WARN_ON_ONCE(!list_empty(&tp->t_dfops) && - !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); - if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) { - error = xfs_defer_finish_noroll(&tp); - if (error) - goto out_unreserve; + if (tp->t_flags & XFS_TRANS_SB_DIRTY) + xfs_trans_apply_sb_deltas(tp); + xfs_trans_apply_dquot_deltas(tp); - /* Run precommits from final tx in defer chain. */ - error = xfs_trans_run_precommits(tp); - if (error) - goto out_unreserve; - } + error = xfs_trans_run_precommits(tp); + if (error) + goto out_unreserve; /* * If there is nothing to be logged by the transaction, @@ -1007,13 +895,6 @@ __xfs_trans_commit( ASSERT(tp->t_ticket != NULL); - /* - * If we need to update the superblock, then do it now. - */ - if (tp->t_flags & XFS_TRANS_SB_DIRTY) - xfs_trans_apply_sb_deltas(tp); - xfs_trans_apply_dquot_deltas(tp); - xlog_cil_commit(log, tp, &commit_seq, regrant); xfs_trans_free(tp); @@ -1039,7 +920,7 @@ out_unreserve: * the dqinfo portion to be. All that means is that we have some * (non-persistent) quota reservations that need to be unreserved. */ - xfs_trans_unreserve_and_mod_dquots(tp); + xfs_trans_unreserve_and_mod_dquots(tp, true); if (tp->t_ticket) { if (regrant && !xlog_is_shutdown(log)) xfs_log_ticket_regrant(log, tp->t_ticket); @@ -1058,6 +939,20 @@ int xfs_trans_commit( struct xfs_trans *tp) { + /* + * Finish deferred items on final commit. Only permanent transactions + * should ever have deferred ops. + */ + WARN_ON_ONCE(!list_empty(&tp->t_dfops) && + !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); + if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) { + int error = xfs_defer_finish_noroll(&tp); + if (error) { + xfs_trans_cancel(tp); + return error; + } + } + return __xfs_trans_commit(tp, false); } @@ -1119,7 +1014,7 @@ xfs_trans_cancel( } #endif xfs_trans_unreserve_and_mod_sb(tp); - xfs_trans_unreserve_and_mod_dquots(tp); + xfs_trans_unreserve_and_mod_dquots(tp, false); if (tp->t_ticket) { xfs_log_ticket_ungrant(log, tp->t_ticket); @@ -1291,9 +1186,9 @@ xfs_trans_reserve_more_inode( return 0; /* Quota failed, give back the new reservation. */ - xfs_mod_fdblocks(mp, dblocks, tp->t_flags & XFS_TRANS_RESERVE); + xfs_add_fdblocks(mp, dblocks); tp->t_blk_res -= dblocks; - xfs_mod_frextents(mp, rtx); + xfs_add_frextents(mp, rtx); tp->t_rtx_res -= rtx; return error; } @@ -1371,6 +1266,9 @@ retry: xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + if (xfs_is_metadir_inode(ip)) + goto out; + error = xfs_qm_dqattach_locked(ip, false); if (error) { /* Caller should have allocated the dquots! */ @@ -1388,11 +1286,26 @@ retry: gdqp = (new_gdqp != ip->i_gdquot) ? new_gdqp : NULL; pdqp = (new_pdqp != ip->i_pdquot) ? new_pdqp : NULL; if (udqp || gdqp || pdqp) { + xfs_filblks_t dblocks, rblocks; unsigned int qflags = XFS_QMOPT_RES_REGBLKS; + bool isrt = XFS_IS_REALTIME_INODE(ip); if (force) qflags |= XFS_QMOPT_FORCE_RES; + if (isrt) { + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); + if (error) + goto out_cancel; + } + + xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks); + + if (isrt) + rblocks += ip->i_delayed_blks; + else + dblocks += ip->i_delayed_blks; + /* * Reserve enough quota to handle blocks on disk and reserved * for a delayed allocation. We'll actually transfer the @@ -1400,8 +1313,20 @@ retry: * though that part is only semi-transactional. */ error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp, - pdqp, ip->i_nblocks + ip->i_delayed_blks, - 1, qflags); + pdqp, dblocks, 1, qflags); + if ((error == -EDQUOT || error == -ENOSPC) && !retried) { + xfs_trans_cancel(tp); + xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0); + retried = true; + goto retry; + } + if (error) + goto out_cancel; + + /* Do the same for realtime. */ + qflags = XFS_QMOPT_RES_RTBLKS | (qflags & XFS_QMOPT_FORCE_RES); + error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp, + pdqp, rblocks, 0, qflags); if ((error == -EDQUOT || error == -ENOSPC) && !retried) { xfs_trans_cancel(tp); xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0); @@ -1412,6 +1337,7 @@ retry: goto out_cancel; } +out: *tpp = tp; return 0; @@ -1430,6 +1356,8 @@ out_cancel: * The caller must ensure that the on-disk dquots attached to this inode have * already been allocated and initialized. The ILOCKs will be dropped when the * transaction is committed or cancelled. + * + * Caller is responsible for unlocking the inodes manually upon return */ int xfs_trans_alloc_dir( @@ -1460,8 +1388,8 @@ retry: xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dp, 0); + xfs_trans_ijoin(tp, ip, 0); error = xfs_qm_dqattach_locked(dp, false); if (error) { @@ -1484,6 +1412,9 @@ retry: if (error == -EDQUOT || error == -ENOSPC) { if (!retried) { xfs_trans_cancel(tp); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + if (dp != ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_blockgc_free_quota(dp, 0); retried = true; goto retry; @@ -1503,5 +1434,8 @@ done: out_cancel: xfs_trans_cancel(tp); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + if (dp != ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } |