summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_buf_item_recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_buf_item_recover.c')
-rw-r--r--fs/xfs/xfs_buf_item_recover.c204
1 files changed, 175 insertions, 29 deletions
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 09e893cf563c..e4c8af873632 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -22,6 +22,11 @@
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_quota.h"
+#include "xfs_alloc.h"
+#include "xfs_ag.h"
+#include "xfs_sb.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtbitmap.h"
/*
* This is the number of entries in the l_buf_cancel_table used during
@@ -154,7 +159,7 @@ STATIC enum xlog_recover_reorder
xlog_recover_buf_reorder(
struct xlog_recover_item *item)
{
- struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr;
+ struct xfs_buf_log_format *buf_f = item->ri_buf[0].iov_base;
if (buf_f->blf_flags & XFS_BLF_CANCEL)
return XLOG_REORDER_CANCEL_LIST;
@@ -168,7 +173,7 @@ xlog_recover_buf_ra_pass2(
struct xlog *log,
struct xlog_recover_item *item)
{
- struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr;
+ struct xfs_buf_log_format *buf_f = item->ri_buf[0].iov_base;
xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL);
}
@@ -182,11 +187,11 @@ xlog_recover_buf_commit_pass1(
struct xlog *log,
struct xlog_recover_item *item)
{
- struct xfs_buf_log_format *bf = item->ri_buf[0].i_addr;
+ struct xfs_buf_log_format *bf = item->ri_buf[0].iov_base;
if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) {
- xfs_err(log->l_mp, "bad buffer log item size (%d)",
- item->ri_buf[0].i_len);
+ xfs_err(log->l_mp, "bad buffer log item size (%zd)",
+ item->ri_buf[0].iov_len);
return -EFSCORRUPTED;
}
@@ -257,12 +262,18 @@ xlog_recover_validate_buf_type(
case XFS_BMAP_MAGIC:
bp->b_ops = &xfs_bmbt_buf_ops;
break;
+ case XFS_RTRMAP_CRC_MAGIC:
+ bp->b_ops = &xfs_rtrmapbt_buf_ops;
+ break;
case XFS_RMAP_CRC_MAGIC:
bp->b_ops = &xfs_rmapbt_buf_ops;
break;
case XFS_REFC_CRC_MAGIC:
bp->b_ops = &xfs_refcountbt_buf_ops;
break;
+ case XFS_RTREFC_CRC_MAGIC:
+ bp->b_ops = &xfs_rtrefcountbt_buf_ops;
+ break;
default:
warnmsg = "Bad btree block magic!";
break;
@@ -390,9 +401,18 @@ xlog_recover_validate_buf_type(
break;
#ifdef CONFIG_XFS_RT
case XFS_BLFT_RTBITMAP_BUF:
+ if (xfs_has_rtgroups(mp) && magic32 != XFS_RTBITMAP_MAGIC) {
+ warnmsg = "Bad rtbitmap magic!";
+ break;
+ }
+ bp->b_ops = xfs_rtblock_ops(mp, XFS_RTGI_BITMAP);
+ break;
case XFS_BLFT_RTSUMMARY_BUF:
- /* no magic numbers for verification of RT buffers */
- bp->b_ops = &xfs_rtbuf_ops;
+ if (xfs_has_rtgroups(mp) && magic32 != XFS_RTSUMMARY_MAGIC) {
+ warnmsg = "Bad rtsummary magic!";
+ break;
+ }
+ bp->b_ops = xfs_rtblock_ops(mp, XFS_RTGI_SUMMARY);
break;
#endif /* CONFIG_XFS_RT */
default:
@@ -467,8 +487,8 @@ xlog_recover_do_reg_buffer(
nbits = xfs_contig_bits(buf_f->blf_data_map,
buf_f->blf_map_size, bit);
ASSERT(nbits > 0);
- ASSERT(item->ri_buf[i].i_addr != NULL);
- ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
+ ASSERT(item->ri_buf[i].iov_base != NULL);
+ ASSERT(item->ri_buf[i].iov_len % XFS_BLF_CHUNK == 0);
ASSERT(BBTOB(bp->b_length) >=
((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
@@ -480,8 +500,8 @@ xlog_recover_do_reg_buffer(
* the log. Hence we need to trim nbits back to the length of
* the current region being copied out of the log.
*/
- if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
- nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
+ if (item->ri_buf[i].iov_len < (nbits << XFS_BLF_SHIFT))
+ nbits = item->ri_buf[i].iov_len >> XFS_BLF_SHIFT;
/*
* Do a sanity check if this is a dquot buffer. Just checking
@@ -491,18 +511,18 @@ xlog_recover_do_reg_buffer(
fa = NULL;
if (buf_f->blf_flags &
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
- if (item->ri_buf[i].i_addr == NULL) {
+ if (item->ri_buf[i].iov_base == NULL) {
xfs_alert(mp,
"XFS: NULL dquot in %s.", __func__);
goto next;
}
- if (item->ri_buf[i].i_len < size_disk_dquot) {
+ if (item->ri_buf[i].iov_len < size_disk_dquot) {
xfs_alert(mp,
- "XFS: dquot too small (%d) in %s.",
- item->ri_buf[i].i_len, __func__);
+ "XFS: dquot too small (%zd) in %s.",
+ item->ri_buf[i].iov_len, __func__);
goto next;
}
- fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, -1);
+ fa = xfs_dquot_verify(mp, item->ri_buf[i].iov_base, -1);
if (fa) {
xfs_alert(mp,
"dquot corrupt at %pS trying to replay into block 0x%llx",
@@ -513,7 +533,7 @@ xlog_recover_do_reg_buffer(
memcpy(xfs_buf_offset(bp,
(uint)bit << XFS_BLF_SHIFT), /* dest */
- item->ri_buf[i].i_addr, /* source */
+ item->ri_buf[i].iov_base, /* source */
nbits<<XFS_BLF_SHIFT); /* length */
next:
i++;
@@ -649,8 +669,8 @@ xlog_recover_do_inode_buffer(
if (next_unlinked_offset < reg_buf_offset)
continue;
- ASSERT(item->ri_buf[item_index].i_addr != NULL);
- ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
+ ASSERT(item->ri_buf[item_index].iov_base != NULL);
+ ASSERT((item->ri_buf[item_index].iov_len % XFS_BLF_CHUNK) == 0);
ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
/*
@@ -658,7 +678,7 @@ xlog_recover_do_inode_buffer(
* current di_next_unlinked field. Extract its value
* and copy it to the buffer copy.
*/
- logged_nextp = item->ri_buf[item_index].i_addr +
+ logged_nextp = item->ri_buf[item_index].iov_base +
next_unlinked_offset - reg_buf_offset;
if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
xfs_alert(mp,
@@ -685,6 +705,100 @@ xlog_recover_do_inode_buffer(
}
/*
+ * Update the in-memory superblock and perag structures from the primary SB
+ * buffer.
+ *
+ * This is required because transactions running after growfs may require the
+ * updated values to be set in a previous fully commit transaction.
+ */
+static int
+xlog_recover_do_primary_sb_buffer(
+ struct xfs_mount *mp,
+ struct xlog_recover_item *item,
+ struct xfs_buf *bp,
+ struct xfs_buf_log_format *buf_f,
+ xfs_lsn_t current_lsn)
+{
+ struct xfs_dsb *dsb = bp->b_addr;
+ xfs_agnumber_t orig_agcount = mp->m_sb.sb_agcount;
+ xfs_rgnumber_t orig_rgcount = mp->m_sb.sb_rgcount;
+ int error;
+
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
+
+ if (orig_agcount == 0) {
+ xfs_alert(mp, "Trying to grow file system without AGs");
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Update the in-core super block from the freshly recovered on-disk one.
+ */
+ xfs_sb_from_disk(&mp->m_sb, dsb);
+
+ /*
+ * Grow can change the device size. Mirror that into the buftarg.
+ */
+ mp->m_ddev_targp->bt_nr_sectors =
+ XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp) {
+ mp->m_rtdev_targp->bt_nr_sectors =
+ XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+ }
+
+ if (mp->m_sb.sb_agcount < orig_agcount) {
+ xfs_alert(mp, "Shrinking AG count in log recovery not supported");
+ return -EFSCORRUPTED;
+ }
+ if (mp->m_sb.sb_rgcount < orig_rgcount) {
+ xfs_warn(mp,
+ "Shrinking rtgroup count in log recovery not supported");
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * If the last AG was grown or shrunk, we also need to update the
+ * length in the in-core perag structure and values depending on it.
+ */
+ error = xfs_update_last_ag_size(mp, orig_agcount);
+ if (error)
+ return error;
+
+ /*
+ * If the last rtgroup was grown or shrunk, we also need to update the
+ * length in the in-core rtgroup structure and values depending on it.
+ * Ignore this on any filesystem with zero rtgroups.
+ */
+ if (orig_rgcount > 0) {
+ error = xfs_update_last_rtgroup_size(mp, orig_rgcount);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Initialize the new perags, and also update various block and inode
+ * allocator setting based off the number of AGs or total blocks.
+ * Because of the latter this also needs to happen if the agcount did
+ * not change.
+ */
+ error = xfs_initialize_perag(mp, orig_agcount, mp->m_sb.sb_agcount,
+ mp->m_sb.sb_dblocks, &mp->m_maxagi);
+ if (error) {
+ xfs_warn(mp, "Failed recovery per-ag init: %d", error);
+ return error;
+ }
+ mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+
+ error = xfs_initialize_rtgroups(mp, orig_rgcount, mp->m_sb.sb_rgcount,
+ mp->m_sb.sb_rextents);
+ if (error) {
+ xfs_warn(mp, "Failed recovery rtgroup init: %d", error);
+ return error;
+ }
+ return 0;
+}
+
+/*
* V5 filesystems know the age of the buffer on disk being recovered. We can
* have newer objects on disk than we are replaying, and so for these cases we
* don't want to replay the current change as that will make the buffer contents
@@ -727,11 +841,20 @@ xlog_recover_get_buf_lsn(
* UUIDs, so we must recover them immediately.
*/
blft = xfs_blft_from_flags(buf_f);
- if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
+ if (!xfs_has_rtgroups(mp) && (blft == XFS_BLFT_RTBITMAP_BUF ||
+ blft == XFS_BLFT_RTSUMMARY_BUF))
goto recover_immediately;
magic32 = be32_to_cpu(*(__be32 *)blk);
switch (magic32) {
+ case XFS_RTSUMMARY_MAGIC:
+ case XFS_RTBITMAP_MAGIC: {
+ struct xfs_rtbuf_blkinfo *hdr = blk;
+
+ lsn = be64_to_cpu(hdr->rt_lsn);
+ uuid = &hdr->rt_uuid;
+ break;
+ }
case XFS_ABTB_CRC_MAGIC:
case XFS_ABTC_CRC_MAGIC:
case XFS_ABTB_MAGIC:
@@ -748,6 +871,8 @@ xlog_recover_get_buf_lsn(
uuid = &btb->bb_u.s.bb_uuid;
break;
}
+ case XFS_RTRMAP_CRC_MAGIC:
+ case XFS_RTREFC_CRC_MAGIC:
case XFS_BMAP_CRC_MAGIC:
case XFS_BMAP_MAGIC: {
struct xfs_btree_block *btb = blk;
@@ -887,11 +1012,10 @@ xlog_recover_buf_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t current_lsn)
{
- struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr;
+ struct xfs_buf_log_format *buf_f = item->ri_buf[0].iov_base;
struct xfs_mount *mp = log->l_mp;
struct xfs_buf *bp;
int error;
- uint buf_flags;
xfs_lsn_t lsn;
/*
@@ -910,13 +1034,8 @@ xlog_recover_buf_commit_pass2(
}
trace_xfs_log_recover_buf_recover(log, buf_f);
-
- buf_flags = 0;
- if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
- buf_flags |= XBF_UNMAPPED;
-
error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
- buf_flags, &bp, NULL);
+ 0, &bp, NULL);
if (error)
return error;
@@ -967,11 +1086,38 @@ xlog_recover_buf_commit_pass2(
dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
if (!dirty)
goto out_release;
+ } else if ((xfs_blft_from_flags(buf_f) & XFS_BLFT_SB_BUF) &&
+ xfs_buf_daddr(bp) == 0) {
+ error = xlog_recover_do_primary_sb_buffer(mp, item, bp, buf_f,
+ current_lsn);
+ if (error)
+ goto out_writebuf;
+
+ /* Update the rt superblock if we have one. */
+ if (xfs_has_rtsb(mp) && mp->m_rtsb_bp) {
+ struct xfs_buf *rtsb_bp = mp->m_rtsb_bp;
+
+ xfs_buf_lock(rtsb_bp);
+ xfs_buf_hold(rtsb_bp);
+ xfs_update_rtsb(rtsb_bp, bp);
+ rtsb_bp->b_flags |= _XBF_LOGRECOVERY;
+ xfs_buf_delwri_queue(rtsb_bp, buffer_list);
+ xfs_buf_relse(rtsb_bp);
+ }
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
}
/*
+ * Buffer held by buf log item during 'normal' buffer recovery must
+ * be committed through buffer I/O submission path to ensure proper
+ * release. When error occurs during sb buffer recovery, log shutdown
+ * will be done before submitting buffer list so that buffers can be
+ * released correctly through ioend failure path.
+ */
+out_writebuf:
+
+ /*
* Perform delayed write on the buffer. Asynchronous writes will be
* slower when taking into account all the buffers to be flushed.
*