summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_bmap_item.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_bmap_item.c')
-rw-r--r--fs/xfs/xfs_bmap_item.c803
1 files changed, 500 insertions, 303 deletions
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 88073910fa5d..80f0c4bcc483 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -1,21 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 2016 Oracle. All Rights Reserved.
- *
* Author: Darrick J. Wong <darrick.wong@oracle.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "xfs.h"
#include "xfs_fs.h"
@@ -23,36 +9,62 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_shared.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_bmap_item.h"
#include "xfs_log.h"
#include "xfs_bmap.h"
#include "xfs_icache.h"
-#include "xfs_trace.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
+#include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+#include "xfs_ag.h"
+#include "xfs_trace.h"
+struct kmem_cache *xfs_bui_cache;
+struct kmem_cache *xfs_bud_cache;
-kmem_zone_t *xfs_bui_zone;
-kmem_zone_t *xfs_bud_zone;
+static const struct xfs_item_ops xfs_bui_item_ops;
static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_bui_log_item, bui_item);
}
-void
+STATIC void
xfs_bui_item_free(
struct xfs_bui_log_item *buip)
{
- kmem_zone_free(xfs_bui_zone, buip);
+ kvfree(buip->bui_item.li_lv_shadow);
+ kmem_cache_free(xfs_bui_cache, buip);
+}
+
+/*
+ * Freeing the BUI requires that we remove it from the AIL if it has already
+ * been placed there. However, the BUI may not yet have been placed in the AIL
+ * when called by xfs_bui_release() from BUD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the reference
+ * count to ensure only the last caller frees the BUI.
+ */
+STATIC void
+xfs_bui_release(
+ struct xfs_bui_log_item *buip)
+{
+ ASSERT(atomic_read(&buip->bui_refcount) > 0);
+ if (!atomic_dec_and_test(&buip->bui_refcount))
+ return;
+
+ xfs_trans_ail_delete(&buip->bui_item, 0);
+ xfs_bui_item_free(buip);
}
+
STATIC void
xfs_bui_item_size(
struct xfs_log_item *lip,
@@ -65,6 +77,11 @@ xfs_bui_item_size(
*nbytes += xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents);
}
+unsigned int xfs_bui_log_space(unsigned int nr)
+{
+ return xlog_item_space(1, xfs_bui_log_format_sizeof(nr));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given bui log item. We use only 1 iovec, and we point that
@@ -91,15 +108,6 @@ xfs_bui_item_format(
}
/*
- * Pinning has no meaning for an bui item, so just return.
- */
-STATIC void
-xfs_bui_item_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
* The unpin operation is the last place an BUI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
* either case, the BUI transaction has been successfully committed to make it
@@ -118,84 +126,28 @@ xfs_bui_item_unpin(
}
/*
- * BUI items have no locking or pushing. However, since BUIs are pulled from
- * the AIL when their corresponding BUDs are committed to disk, their situation
- * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log. This should help in getting the BUI out of
- * the AIL.
- */
-STATIC uint
-xfs_bui_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- return XFS_ITEM_PINNED;
-}
-
-/*
* The BUI has been either committed or aborted if the transaction has been
* cancelled. If the transaction was cancelled, an BUD isn't going to be
* constructed and thus we free the BUI here directly.
*/
STATIC void
-xfs_bui_item_unlock(
+xfs_bui_item_release(
struct xfs_log_item *lip)
{
- if (lip->li_flags & XFS_LI_ABORTED)
- xfs_bui_item_free(BUI_ITEM(lip));
+ xfs_bui_release(BUI_ITEM(lip));
}
/*
- * The BUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_bui_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
-/*
- * The BUI dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_bui_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all bui log items.
- */
-static const struct xfs_item_ops xfs_bui_item_ops = {
- .iop_size = xfs_bui_item_size,
- .iop_format = xfs_bui_item_format,
- .iop_pin = xfs_bui_item_pin,
- .iop_unpin = xfs_bui_item_unpin,
- .iop_unlock = xfs_bui_item_unlock,
- .iop_committed = xfs_bui_item_committed,
- .iop_push = xfs_bui_item_push,
- .iop_committing = xfs_bui_item_committing,
-};
-
-/*
* Allocate and initialize an bui item with the given number of extents.
*/
-struct xfs_bui_log_item *
+STATIC struct xfs_bui_log_item *
xfs_bui_init(
struct xfs_mount *mp)
{
struct xfs_bui_log_item *buip;
- buip = kmem_zone_zalloc(xfs_bui_zone, KM_SLEEP);
+ buip = kmem_cache_zalloc(xfs_bui_cache, GFP_KERNEL | __GFP_NOFAIL);
xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops);
buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS;
@@ -206,24 +158,6 @@ xfs_bui_init(
return buip;
}
-/*
- * Freeing the BUI requires that we remove it from the AIL if it has already
- * been placed there. However, the BUI may not yet have been placed in the AIL
- * when called by xfs_bui_release() from BUD processing due to the ordering of
- * committed vs unpin operations in bulk insert operations. Hence the reference
- * count to ensure only the last caller frees the BUI.
- */
-void
-xfs_bui_release(
- struct xfs_bui_log_item *buip)
-{
- ASSERT(atomic_read(&buip->bui_refcount) > 0);
- if (atomic_dec_and_test(&buip->bui_refcount)) {
- xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR);
- xfs_bui_item_free(buip);
- }
-}
-
static inline struct xfs_bud_log_item *BUD_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_bud_log_item, bud_item);
@@ -239,6 +173,11 @@ xfs_bud_item_size(
*nbytes += sizeof(struct xfs_bud_log_format);
}
+unsigned int xfs_bud_log_space(void)
+{
+ return xlog_item_space(1, sizeof(struct xfs_bud_log_format));
+}
+
/*
* This is called to fill in the vector of log iovecs for the
* given bud log item. We use only 1 iovec, and we point that
@@ -262,264 +201,522 @@ xfs_bud_item_format(
}
/*
- * Pinning has no meaning for an bud item, so just return.
+ * The BUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the BUI and free the
+ * BUD.
*/
STATIC void
-xfs_bud_item_pin(
+xfs_bud_item_release(
struct xfs_log_item *lip)
{
+ struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+
+ xfs_bui_release(budp->bud_buip);
+ kvfree(budp->bud_item.li_lv_shadow);
+ kmem_cache_free(xfs_bud_cache, budp);
}
-/*
- * Since pinning has no meaning for an bud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_bud_item_unpin(
- struct xfs_log_item *lip,
- int remove)
+static struct xfs_log_item *
+xfs_bud_item_intent(
+ struct xfs_log_item *lip)
{
+ return &BUD_ITEM(lip)->bud_buip->bui_item;
}
-/*
- * There isn't much you can do to push on an bud item. It is simply stuck
- * waiting for the log to be flushed to disk.
- */
-STATIC uint
-xfs_bud_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
+static const struct xfs_item_ops xfs_bud_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED |
+ XFS_ITEM_INTENT_DONE,
+ .iop_size = xfs_bud_item_size,
+ .iop_format = xfs_bud_item_format,
+ .iop_release = xfs_bud_item_release,
+ .iop_intent = xfs_bud_item_intent,
+};
+
+static inline struct xfs_bmap_intent *bi_entry(const struct list_head *e)
{
- return XFS_ITEM_PINNED;
+ return list_entry(e, struct xfs_bmap_intent, bi_list);
}
-/*
- * The BUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the BUI and free the
- * BUD.
- */
+/* Sort bmap intents by inode. */
+static int
+xfs_bmap_update_diff_items(
+ void *priv,
+ const struct list_head *a,
+ const struct list_head *b)
+{
+ struct xfs_bmap_intent *ba = bi_entry(a);
+ struct xfs_bmap_intent *bb = bi_entry(b);
+
+ return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
+}
+
+/* Log bmap updates in the intent item. */
STATIC void
-xfs_bud_item_unlock(
- struct xfs_log_item *lip)
+xfs_bmap_update_log_item(
+ struct xfs_trans *tp,
+ struct xfs_bui_log_item *buip,
+ struct xfs_bmap_intent *bi)
{
- struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+ uint next_extent;
+ struct xfs_map_extent *map;
- if (lip->li_flags & XFS_LI_ABORTED) {
- xfs_bui_release(budp->bud_buip);
- kmem_zone_free(xfs_bud_zone, budp);
+ /*
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
+ */
+ next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
+ ASSERT(next_extent < buip->bui_format.bui_nextents);
+ map = &buip->bui_format.bui_extents[next_extent];
+ map->me_owner = bi->bi_owner->i_ino;
+ map->me_startblock = bi->bi_bmap.br_startblock;
+ map->me_startoff = bi->bi_bmap.br_startoff;
+ map->me_len = bi->bi_bmap.br_blockcount;
+
+ switch (bi->bi_type) {
+ case XFS_BMAP_MAP:
+ case XFS_BMAP_UNMAP:
+ map->me_flags = bi->bi_type;
+ break;
+ default:
+ ASSERT(0);
}
+ if (bi->bi_bmap.br_state == XFS_EXT_UNWRITTEN)
+ map->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
+ if (bi->bi_whichfork == XFS_ATTR_FORK)
+ map->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
+ if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork))
+ map->me_flags |= XFS_BMAP_EXTENT_REALTIME;
}
-/*
- * When the bud item is committed to disk, all we need to do is delete our
- * reference to our partner bui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_bud_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+static struct xfs_log_item *
+xfs_bmap_update_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
{
- struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_bui_log_item *buip = xfs_bui_init(mp);
+ struct xfs_bmap_intent *bi;
+
+ ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
+
+ if (sort)
+ list_sort(mp, items, xfs_bmap_update_diff_items);
+ list_for_each_entry(bi, items, bi_list)
+ xfs_bmap_update_log_item(tp, buip, bi);
+ return &buip->bui_item;
+}
+
+/* Get an BUD so we can process all the deferred bmap updates. */
+static struct xfs_log_item *
+xfs_bmap_update_create_done(
+ struct xfs_trans *tp,
+ struct xfs_log_item *intent,
+ unsigned int count)
+{
+ struct xfs_bui_log_item *buip = BUI_ITEM(intent);
+ struct xfs_bud_log_item *budp;
+
+ budp = kmem_cache_zalloc(xfs_bud_cache, GFP_KERNEL | __GFP_NOFAIL);
+ xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD,
+ &xfs_bud_item_ops);
+ budp->bud_buip = buip;
+ budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+
+ return &budp->bud_item;
+}
+
+/* Take a passive ref to the group containing the space we're mapping. */
+static inline void
+xfs_bmap_update_get_group(
+ struct xfs_mount *mp,
+ struct xfs_bmap_intent *bi)
+{
+ enum xfs_group_type type = XG_TYPE_AG;
+
+ if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork))
+ type = XG_TYPE_RTG;
/*
- * Drop the BUI reference regardless of whether the BUD has been
- * aborted. Once the BUD transaction is constructed, it is the sole
- * responsibility of the BUD to release the BUI (even if the BUI is
- * aborted due to log I/O error).
+ * Bump the intent count on behalf of the deferred rmap and refcount
+ * intent items that that we can queue when we finish this bmap work.
+ * This new intent item will bump the intent count before the bmap
+ * intent drops the intent count, ensuring that the intent count
+ * remains nonzero across the transaction roll.
*/
- xfs_bui_release(budp->bud_buip);
- kmem_zone_free(xfs_bud_zone, budp);
+ bi->bi_group = xfs_group_intent_get(mp, bi->bi_bmap.br_startblock,
+ type);
+}
+
+/* Add this deferred BUI to the transaction. */
+void
+xfs_bmap_defer_add(
+ struct xfs_trans *tp,
+ struct xfs_bmap_intent *bi)
+{
+ xfs_bmap_update_get_group(tp->t_mountp, bi);
- return (xfs_lsn_t)-1;
+ /*
+ * Ensure the deferred mapping is pre-recorded in i_delayed_blks.
+ *
+ * Otherwise stat can report zero blocks for an inode that actually has
+ * data when the entire mapping is in the process of being overwritten
+ * using the out of place write path. This is undone in xfs_bmapi_remap
+ * after it has incremented di_nblocks for a successful operation.
+ */
+ if (bi->bi_type == XFS_BMAP_MAP)
+ bi->bi_owner->i_delayed_blks += bi->bi_bmap.br_blockcount;
+
+ trace_xfs_bmap_defer(bi);
+ xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type);
}
-/*
- * The BUD dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Cancel a deferred bmap update. */
STATIC void
-xfs_bud_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+xfs_bmap_update_cancel_item(
+ struct list_head *item)
{
+ struct xfs_bmap_intent *bi = bi_entry(item);
+
+ if (bi->bi_type == XFS_BMAP_MAP)
+ bi->bi_owner->i_delayed_blks -= bi->bi_bmap.br_blockcount;
+
+ xfs_group_intent_put(bi->bi_group);
+ kmem_cache_free(xfs_bmap_intent_cache, bi);
}
-/*
- * This is the ops vector shared by all bud log items.
- */
-static const struct xfs_item_ops xfs_bud_item_ops = {
- .iop_size = xfs_bud_item_size,
- .iop_format = xfs_bud_item_format,
- .iop_pin = xfs_bud_item_pin,
- .iop_unpin = xfs_bud_item_unpin,
- .iop_unlock = xfs_bud_item_unlock,
- .iop_committed = xfs_bud_item_committed,
- .iop_push = xfs_bud_item_push,
- .iop_committing = xfs_bud_item_committing,
-};
+/* Process a deferred bmap update. */
+STATIC int
+xfs_bmap_update_finish_item(
+ struct xfs_trans *tp,
+ struct xfs_log_item *done,
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+{
+ struct xfs_bmap_intent *bi = bi_entry(item);
+ int error;
-/*
- * Allocate and initialize an bud item with the given number of extents.
- */
-struct xfs_bud_log_item *
-xfs_bud_init(
+ error = xfs_bmap_finish_one(tp, bi);
+ if (!error && bi->bi_bmap.br_blockcount > 0) {
+ ASSERT(bi->bi_type == XFS_BMAP_UNMAP);
+ return -EAGAIN;
+ }
+
+ xfs_bmap_update_cancel_item(item);
+ return error;
+}
+
+/* Abort all pending BUIs. */
+STATIC void
+xfs_bmap_update_abort_intent(
+ struct xfs_log_item *intent)
+{
+ xfs_bui_release(BUI_ITEM(intent));
+}
+
+/* Is this recovered BUI ok? */
+static inline bool
+xfs_bui_validate(
struct xfs_mount *mp,
struct xfs_bui_log_item *buip)
+{
+ struct xfs_map_extent *map;
+
+ /* Only one mapping operation per BUI... */
+ if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
+ return false;
+ map = &buip->bui_format.bui_extents[0];
+
+ if (map->me_flags & ~XFS_BMAP_EXTENT_FLAGS)
+ return false;
+
+ switch (map->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+ case XFS_BMAP_MAP:
+ case XFS_BMAP_UNMAP:
+ break;
+ default:
+ return false;
+ }
+
+ if (!xfs_verify_ino(mp, map->me_owner))
+ return false;
+
+ if (!xfs_verify_fileext(mp, map->me_startoff, map->me_len))
+ return false;
+
+ if (map->me_flags & XFS_BMAP_EXTENT_REALTIME)
+ return xfs_verify_rtbext(mp, map->me_startblock, map->me_len);
+
+ return xfs_verify_fsbext(mp, map->me_startblock, map->me_len);
+}
+
+static inline struct xfs_bmap_intent *
+xfs_bui_recover_work(
+ struct xfs_mount *mp,
+ struct xfs_defer_pending *dfp,
+ struct xfs_inode **ipp,
+ struct xfs_map_extent *map)
{
- struct xfs_bud_log_item *budp;
+ struct xfs_bmap_intent *bi;
+ int error;
- budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
- xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops);
- budp->bud_buip = buip;
- budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+ error = xlog_recover_iget(mp, map->me_owner, ipp);
+ if (error)
+ return ERR_PTR(error);
- return budp;
+ bi = kmem_cache_zalloc(xfs_bmap_intent_cache,
+ GFP_KERNEL | __GFP_NOFAIL);
+ bi->bi_whichfork = (map->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+ bi->bi_type = map->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+ bi->bi_bmap.br_startblock = map->me_startblock;
+ bi->bi_bmap.br_startoff = map->me_startoff;
+ bi->bi_bmap.br_blockcount = map->me_len;
+ bi->bi_bmap.br_state = (map->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+ XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+ bi->bi_owner = *ipp;
+ xfs_bmap_update_get_group(mp, bi);
+
+ /* see xfs_bmap_defer_add for details */
+ if (bi->bi_type == XFS_BMAP_MAP)
+ bi->bi_owner->i_delayed_blks += bi->bi_bmap.br_blockcount;
+ xfs_defer_add_item(dfp, &bi->bi_list);
+ return bi;
}
/*
* Process a bmap update intent item that was recovered from the log.
* We need to update some inode's bmbt.
*/
-int
-xfs_bui_recover(
- struct xfs_mount *mp,
- struct xfs_bui_log_item *buip)
+STATIC int
+xfs_bmap_recover_work(
+ struct xfs_defer_pending *dfp,
+ struct list_head *capture_list)
{
- int error = 0;
- unsigned int bui_type;
- struct xfs_map_extent *bmap;
- xfs_fsblock_t startblock_fsb;
- xfs_fsblock_t inode_fsb;
- xfs_filblks_t count;
- bool op_ok;
- struct xfs_bud_log_item *budp;
- enum xfs_bmap_intent_type type;
- int whichfork;
- xfs_exntst_t state;
+ struct xfs_trans_res resv;
+ struct xfs_log_item *lip = dfp->dfp_intent;
+ struct xfs_bui_log_item *buip = BUI_ITEM(lip);
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
- struct xfs_defer_ops dfops;
- struct xfs_bmbt_irec irec;
- xfs_fsblock_t firstfsb;
-
- ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
+ struct xfs_mount *mp = lip->li_log->l_mp;
+ struct xfs_map_extent *map;
+ struct xfs_bmap_intent *work;
+ int iext_delta;
+ int error = 0;
- /* Only one mapping operation per BUI... */
- if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
- set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
- xfs_bui_release(buip);
- return -EIO;
+ if (!xfs_bui_validate(mp, buip)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &buip->bui_format, sizeof(buip->bui_format));
+ return -EFSCORRUPTED;
}
- /*
- * First check the validity of the extent described by the
- * BUI. If anything is bad, then toss the BUI.
- */
- bmap = &buip->bui_format.bui_extents[0];
- startblock_fsb = XFS_BB_TO_FSB(mp,
- XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
- inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
- XFS_INO_TO_FSB(mp, bmap->me_owner)));
- switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- op_ok = true;
- break;
- default:
- op_ok = false;
- break;
- }
- if (!op_ok || startblock_fsb == 0 ||
- bmap->me_len == 0 ||
- inode_fsb == 0 ||
- startblock_fsb >= mp->m_sb.sb_dblocks ||
- bmap->me_len >= mp->m_sb.sb_agblocks ||
- inode_fsb >= mp->m_sb.sb_dblocks ||
- (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) {
- /*
- * This will pull the BUI from the AIL and
- * free the memory associated with it.
- */
- set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
- xfs_bui_release(buip);
- return -EIO;
- }
+ map = &buip->bui_format.bui_extents[0];
+ work = xfs_bui_recover_work(mp, dfp, &ip, map);
+ if (IS_ERR(work))
+ return PTR_ERR(work);
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ /* Allocate transaction and do the work. */
+ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
+ error = xfs_trans_alloc(mp, &resv,
XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
if (error)
- return error;
- budp = xfs_trans_get_bud(tp, buip);
-
- /* Grab the inode. */
- error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
- if (error)
- goto err_inode;
+ goto err_rele;
- if (VFS_I(ip)->i_nlink == 0)
- xfs_iflags_set(ip, XFS_IRECOVERY);
- xfs_defer_init(&dfops, &firstfsb);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
- /* Process deferred bmap item. */
- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
- XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
- switch (bui_type) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- type = bui_type;
- break;
- default:
+ if (!!(map->me_flags & XFS_BMAP_EXTENT_REALTIME) !=
+ xfs_ifork_is_realtime(ip, work->bi_whichfork)) {
error = -EFSCORRUPTED;
- goto err_dfops;
+ goto err_cancel;
}
- xfs_trans_ijoin(tp, ip, 0);
- count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
- ip, whichfork, bmap->me_startoff,
- bmap->me_startblock, &count, state);
+ if (work->bi_type == XFS_BMAP_MAP)
+ iext_delta = XFS_IEXT_ADD_NOSPLIT_CNT;
+ else
+ iext_delta = XFS_IEXT_PUNCH_HOLE_CNT;
+
+ error = xfs_iext_count_extend(tp, ip, work->bi_whichfork, iext_delta);
if (error)
- goto err_dfops;
-
- if (count > 0) {
- ASSERT(type == XFS_BMAP_UNMAP);
- irec.br_startblock = bmap->me_startblock;
- irec.br_blockcount = count;
- irec.br_startoff = bmap->me_startoff;
- irec.br_state = state;
- error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
- if (error)
- goto err_dfops;
- }
+ goto err_cancel;
+
+ error = xlog_recover_finish_intent(tp, dfp);
+ if (error == -EFSCORRUPTED)
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &buip->bui_format, sizeof(buip->bui_format));
+ if (error)
+ goto err_cancel;
- /* Finish transaction, free inodes. */
- error = xfs_defer_finish(&tp, &dfops, NULL);
+ /*
+ * Commit transaction, which frees the transaction and saves the inode
+ * for later replay activities.
+ */
+ error = xfs_defer_ops_capture_and_commit(tp, capture_list);
if (error)
- goto err_dfops;
+ goto err_unlock;
- set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
- error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- IRELE(ip);
+ xfs_irele(ip);
+ return 0;
+err_cancel:
+ xfs_trans_cancel(tp);
+err_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+err_rele:
+ xfs_irele(ip);
return error;
+}
-err_dfops:
- xfs_defer_cancel(&dfops);
-err_inode:
- xfs_trans_cancel(tp);
- if (ip) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- IRELE(ip);
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_bmap_relog_intent(
+ struct xfs_trans *tp,
+ struct xfs_log_item *intent,
+ struct xfs_log_item *done_item)
+{
+ struct xfs_bui_log_item *buip;
+ struct xfs_map_extent *map;
+ unsigned int count;
+
+ count = BUI_ITEM(intent)->bui_format.bui_nextents;
+ map = BUI_ITEM(intent)->bui_format.bui_extents;
+
+ buip = xfs_bui_init(tp->t_mountp);
+ memcpy(buip->bui_format.bui_extents, map, count * sizeof(*map));
+ atomic_set(&buip->bui_next_extent, count);
+
+ return &buip->bui_item;
+}
+
+const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
+ .name = "bmap",
+ .max_items = XFS_BUI_MAX_FAST_EXTENTS,
+ .create_intent = xfs_bmap_update_create_intent,
+ .abort_intent = xfs_bmap_update_abort_intent,
+ .create_done = xfs_bmap_update_create_done,
+ .finish_item = xfs_bmap_update_finish_item,
+ .cancel_item = xfs_bmap_update_cancel_item,
+ .recover_work = xfs_bmap_recover_work,
+ .relog_intent = xfs_bmap_relog_intent,
+};
+
+STATIC bool
+xfs_bui_item_match(
+ struct xfs_log_item *lip,
+ uint64_t intent_id)
+{
+ return BUI_ITEM(lip)->bui_format.bui_id == intent_id;
+}
+
+static const struct xfs_item_ops xfs_bui_item_ops = {
+ .flags = XFS_ITEM_INTENT,
+ .iop_size = xfs_bui_item_size,
+ .iop_format = xfs_bui_item_format,
+ .iop_unpin = xfs_bui_item_unpin,
+ .iop_release = xfs_bui_item_release,
+ .iop_match = xfs_bui_item_match,
+};
+
+static inline void
+xfs_bui_copy_format(
+ struct xfs_bui_log_format *dst,
+ const struct xfs_bui_log_format *src)
+{
+ unsigned int i;
+
+ memcpy(dst, src, offsetof(struct xfs_bui_log_format, bui_extents));
+
+ for (i = 0; i < src->bui_nextents; i++)
+ memcpy(&dst->bui_extents[i], &src->bui_extents[i],
+ sizeof(struct xfs_map_extent));
+}
+
+/*
+ * This routine is called to create an in-core extent bmap update
+ * item from the bui format structure which was logged on disk.
+ * It allocates an in-core bui, copies the extents from the format
+ * structure into it, and adds the bui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_bui_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_bui_log_item *buip;
+ struct xfs_bui_log_format *bui_formatp;
+ size_t len;
+
+ bui_formatp = item->ri_buf[0].iov_base;
+
+ if (item->ri_buf[0].iov_len < xfs_bui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
+ return -EFSCORRUPTED;
}
- return error;
+
+ if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
+ return -EFSCORRUPTED;
+ }
+
+ len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents);
+ if (item->ri_buf[0].iov_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
+ return -EFSCORRUPTED;
+ }
+
+ buip = xfs_bui_init(mp);
+ xfs_bui_copy_format(&buip->bui_format, bui_formatp);
+ atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
+
+ xlog_recover_intent_item(log, &buip->bui_item, lsn,
+ &xfs_bmap_update_defer_type);
+ return 0;
+}
+
+const struct xlog_recover_item_ops xlog_bui_item_ops = {
+ .item_type = XFS_LI_BUI,
+ .commit_pass2 = xlog_recover_bui_commit_pass2,
+};
+
+/*
+ * This routine is called when an BUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding BUI if it
+ * was still in the log. To do this it searches the AIL for the BUI with an id
+ * equal to that in the BUD format structure. If we find it we drop the BUD
+ * reference, which removes the BUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_bud_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_bud_log_format *bud_formatp;
+
+ bud_formatp = item->ri_buf[0].iov_base;
+ if (item->ri_buf[0].iov_len != sizeof(struct xfs_bud_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].iov_base, item->ri_buf[0].iov_len);
+ return -EFSCORRUPTED;
+ }
+
+ xlog_recover_release_intent(log, XFS_LI_BUI, bud_formatp->bud_bui_id);
+ return 0;
}
+
+const struct xlog_recover_item_ops xlog_bud_item_ops = {
+ .item_type = XFS_LI_BUD,
+ .commit_pass2 = xlog_recover_bud_commit_pass2,
+};