summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c248
1 files changed, 148 insertions, 100 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 1251c81e55f9..2f76531842f8 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -361,7 +361,7 @@ xlog_find_verify_cycle(
*new_blk = -1;
out:
- kmem_free(buffer);
+ kvfree(buffer);
return error;
}
@@ -477,7 +477,7 @@ xlog_find_verify_log_record(
*last_blk = i;
out:
- kmem_free(buffer);
+ kvfree(buffer);
return error;
}
@@ -731,7 +731,7 @@ validate_head:
goto out_free_buffer;
}
- kmem_free(buffer);
+ kvfree(buffer);
if (head_blk == log_bbnum)
*return_head_blk = 0;
else
@@ -745,7 +745,7 @@ validate_head:
return 0;
out_free_buffer:
- kmem_free(buffer);
+ kvfree(buffer);
if (error)
xfs_warn(log->l_mp, "failed to find log head");
return error;
@@ -999,7 +999,7 @@ xlog_verify_tail(
"Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
orig_tail, *tail_blk);
out:
- kmem_free(buffer);
+ kvfree(buffer);
return error;
}
@@ -1046,7 +1046,7 @@ xlog_verify_head(
error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
XLOG_MAX_ICLOGS, tmp_buffer,
&tmp_rhead_blk, &tmp_rhead, &tmp_wrapped);
- kmem_free(tmp_buffer);
+ kvfree(tmp_buffer);
if (error < 0)
return error;
@@ -1177,8 +1177,8 @@ xlog_check_unmount_rec(
*/
xlog_assign_atomic_lsn(&log->l_tail_lsn,
log->l_curr_cycle, after_umount_blk);
- xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
- log->l_curr_cycle, after_umount_blk);
+ log->l_ailp->ail_head_lsn =
+ atomic64_read(&log->l_tail_lsn);
*tail_blk = after_umount_blk;
*clean = true;
@@ -1212,11 +1212,7 @@ xlog_set_state(
if (bump_cycle)
log->l_curr_cycle++;
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
- atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
- xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
- BBTOB(log->l_curr_block));
- xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
- BBTOB(log->l_curr_block));
+ log->l_ailp->ail_head_lsn = be64_to_cpu(rhead->h_lsn);
}
/*
@@ -1340,7 +1336,7 @@ xlog_find_tail(
* headers if we have a filesystem using non-persistent counters.
*/
if (clean)
- set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate);
+ xfs_set_clean(log->l_mp);
/*
* Make sure that there are no blocks in front of the head
@@ -1365,7 +1361,7 @@ xlog_find_tail(
error = xlog_clear_stale_blocks(log, tail_lsn);
done:
- kmem_free(buffer);
+ kvfree(buffer);
if (error)
xfs_warn(log->l_mp, "failed to locate log tail");
@@ -1399,6 +1395,7 @@ xlog_find_zeroed(
xfs_daddr_t new_blk, last_blk, start_blk;
xfs_daddr_t num_scan_bblks;
int error, log_bbnum = log->l_logBBsize;
+ int ret = 1;
*blk_no = 0;
@@ -1413,8 +1410,7 @@ xlog_find_zeroed(
first_cycle = xlog_get_cycle(offset);
if (first_cycle == 0) { /* completely zeroed log */
*blk_no = 0;
- kmem_free(buffer);
- return 1;
+ goto out_free_buffer;
}
/* check partially zeroed log */
@@ -1424,8 +1420,8 @@ xlog_find_zeroed(
last_cycle = xlog_get_cycle(offset);
if (last_cycle != 0) { /* log completely written to */
- kmem_free(buffer);
- return 0;
+ ret = 0;
+ goto out_free_buffer;
}
/* we have a partially zeroed log */
@@ -1471,10 +1467,10 @@ xlog_find_zeroed(
*blk_no = last_blk;
out_free_buffer:
- kmem_free(buffer);
+ kvfree(buffer);
if (error)
return error;
- return 1;
+ return ret;
}
/*
@@ -1583,7 +1579,7 @@ xlog_write_log_records(
}
out_free_buffer:
- kmem_free(buffer);
+ kvfree(buffer);
return error;
}
@@ -1767,6 +1763,37 @@ xlog_recover_iget(
return 0;
}
+/*
+ * Get an inode so that we can recover a log operation.
+ *
+ * Log intent items that target inodes effectively contain a file handle.
+ * Check that the generation number matches the intent item like we do for
+ * other file handles. Log intent items defined after this validation weakness
+ * was identified must use this function.
+ */
+int
+xlog_recover_iget_handle(
+ struct xfs_mount *mp,
+ xfs_ino_t ino,
+ uint32_t gen,
+ struct xfs_inode **ipp)
+{
+ struct xfs_inode *ip;
+ int error;
+
+ error = xlog_recover_iget(mp, ino, &ip);
+ if (error)
+ return error;
+
+ if (VFS_I(ip)->i_generation != gen) {
+ xfs_irele(ip);
+ return -EFSCORRUPTED;
+ }
+
+ *ipp = ip;
+ return 0;
+}
+
/******************************************************************************
*
* Log recover routines
@@ -1789,6 +1816,14 @@ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
&xlog_bud_item_ops,
&xlog_attri_item_ops,
&xlog_attrd_item_ops,
+ &xlog_xmi_item_ops,
+ &xlog_xmd_item_ops,
+ &xlog_rtefi_item_ops,
+ &xlog_rtefd_item_ops,
+ &xlog_rtrui_item_ops,
+ &xlog_rtrud_item_ops,
+ &xlog_rtcui_item_ops,
+ &xlog_rtcud_item_ops,
};
static const struct xlog_recover_item_ops *
@@ -1820,7 +1855,7 @@ xlog_find_item_ops(
* from the transaction. However, we can't do that until after we've
* replayed all the other items because they may be dependent on the
* cancelled buffer and replaying the cancelled buffer can remove it
- * form the cancelled buffer table. Hence they have tobe done last.
+ * form the cancelled buffer table. Hence they have to be done last.
*
* 3. Inode allocation buffers must be replayed before inode items that
* read the buffer and replay changes into it. For filesystems using the
@@ -2057,7 +2092,8 @@ xlog_recover_add_item(
{
struct xlog_recover_item *item;
- item = kmem_zalloc(sizeof(struct xlog_recover_item), 0);
+ item = kzalloc(sizeof(struct xlog_recover_item),
+ GFP_KERNEL | __GFP_NOFAIL);
INIT_LIST_HEAD(&item->ri_list);
list_add_tail(&item->ri_list, head);
}
@@ -2098,7 +2134,7 @@ xlog_recover_add_to_cont_trans(
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len;
- ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL);
+ ptr = kvrealloc(old_ptr, len + old_len, GFP_KERNEL);
if (!ptr)
return -ENOMEM;
memcpy(&ptr[old_len], dp, len);
@@ -2160,7 +2196,7 @@ xlog_recover_add_to_trans(
return 0;
}
- ptr = kmem_alloc(len, 0);
+ ptr = xlog_kvmalloc(len);
memcpy(ptr, dp, len);
in_f = (struct xfs_inode_log_format *)ptr;
@@ -2182,14 +2218,13 @@ xlog_recover_add_to_trans(
"bad number of regions (%d) in inode log format",
in_f->ilf_size);
ASSERT(0);
- kmem_free(ptr);
+ kvfree(ptr);
return -EFSCORRUPTED;
}
item->ri_total = in_f->ilf_size;
- item->ri_buf =
- kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
- 0);
+ item->ri_buf = kzalloc(item->ri_total * sizeof(xfs_log_iovec_t),
+ GFP_KERNEL | __GFP_NOFAIL);
}
if (item->ri_total <= item->ri_cnt) {
@@ -2197,7 +2232,7 @@ xlog_recover_add_to_trans(
"log item region count (%d) overflowed size (%d)",
item->ri_cnt, item->ri_total);
ASSERT(0);
- kmem_free(ptr);
+ kvfree(ptr);
return -EFSCORRUPTED;
}
@@ -2227,13 +2262,13 @@ xlog_recover_free_trans(
/* Free the regions in the item. */
list_del(&item->ri_list);
for (i = 0; i < item->ri_cnt; i++)
- kmem_free(item->ri_buf[i].i_addr);
+ kvfree(item->ri_buf[i].i_addr);
/* Free the item itself */
- kmem_free(item->ri_buf);
- kmem_free(item);
+ kfree(item->ri_buf);
+ kfree(item);
}
/* Free the transaction recover structure */
- kmem_free(trans);
+ kfree(trans);
}
/*
@@ -2332,7 +2367,7 @@ xlog_recover_ophdr_to_trans(
* This is a new transaction so allocate a new recovery container to
* hold the recovery ops that will follow.
*/
- trans = kmem_zalloc(sizeof(struct xlog_recover), 0);
+ trans = kzalloc(sizeof(struct xlog_recover), GFP_KERNEL | __GFP_NOFAIL);
trans->r_log_tid = tid;
trans->r_lsn = be64_to_cpu(rhead->h_lsn);
INIT_LIST_HEAD(&trans->r_itemq);
@@ -2456,7 +2491,10 @@ xlog_recover_process_data(
ohead = (struct xlog_op_header *)dp;
dp += sizeof(*ohead);
- ASSERT(dp <= end);
+ if (dp > end) {
+ xfs_warn(log->l_mp, "%s: op header overrun", __func__);
+ return -EFSCORRUPTED;
+ }
/* errors will abort recovery */
error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
@@ -2645,7 +2683,7 @@ xlog_recover_clear_agi_bucket(
struct xfs_perag *pag,
int bucket)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_trans *tp;
struct xfs_agi *agi;
struct xfs_buf *agibp;
@@ -2656,7 +2694,7 @@ xlog_recover_clear_agi_bucket(
if (error)
goto out_error;
- error = xfs_read_agi(pag, tp, &agibp);
+ error = xfs_read_agi(pag, tp, 0, &agibp);
if (error)
goto out_abort;
@@ -2676,7 +2714,7 @@ out_abort:
xfs_trans_cancel(tp);
out_error:
xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__,
- pag->pag_agno);
+ pag_agno(pag));
return;
}
@@ -2686,7 +2724,7 @@ xlog_recover_iunlink_bucket(
struct xfs_agi *agi,
int bucket)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_inode *prev_ip = NULL;
struct xfs_inode *ip;
xfs_agino_t prev_agino, agino;
@@ -2694,9 +2732,8 @@ xlog_recover_iunlink_bucket(
agino = be32_to_cpu(agi->agi_unlinked[bucket]);
while (agino != NULLAGINO) {
- error = xfs_iget(mp, NULL,
- XFS_AGINO_TO_INO(mp, pag->pag_agno, agino),
- 0, 0, &ip);
+ error = xfs_iget(mp, NULL, xfs_agino_to_ino(pag, agino), 0, 0,
+ &ip);
if (error)
break;
@@ -2772,7 +2809,7 @@ xlog_recover_iunlink_ag(
int bucket;
int error;
- error = xfs_read_agi(pag, NULL, &agibp);
+ error = xfs_read_agi(pag, NULL, 0, &agibp);
if (error) {
/*
* AGI is b0rked. Don't process it.
@@ -2814,10 +2851,9 @@ static void
xlog_recover_process_iunlinks(
struct xlog *log)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
- for_each_perag(log->l_mp, agno, pag)
+ while ((pag = xfs_perag_next(log->l_mp, pag)))
xlog_recover_iunlink_ag(pag);
}
@@ -2966,7 +3002,7 @@ xlog_do_recovery_pass(
int error = 0, h_size, h_len;
int error2 = 0;
int bblks, split_bblks;
- int hblks, split_hblks, wrapped_hblks;
+ int hblks = 1, split_hblks, wrapped_hblks;
int i;
struct hlist_head rhash[XLOG_RHASH_SIZE];
LIST_HEAD (buffer_list);
@@ -2977,6 +3013,10 @@ xlog_do_recovery_pass(
for (i = 0; i < XLOG_RHASH_SIZE; i++)
INIT_HLIST_HEAD(&rhash[i]);
+ hbp = xlog_alloc_buffer(log, hblks);
+ if (!hbp)
+ return -ENOMEM;
+
/*
* Read the header of the tail block and get the iclog buffer size from
* h_size. Use this to tell how many sectors make up the log header.
@@ -2987,10 +3027,6 @@ xlog_do_recovery_pass(
* iclog header and extract the header size from it. Get a
* new hbp that is the correct size.
*/
- hbp = xlog_alloc_buffer(log, 1);
- if (!hbp)
- return -ENOMEM;
-
error = xlog_bread(log, tail_blk, 1, hbp, &offset);
if (error)
goto bread_err1;
@@ -3022,23 +3058,30 @@ xlog_do_recovery_pass(
if (error)
goto bread_err1;
- hblks = xlog_logrec_hblks(log, rhead);
- if (hblks != 1) {
- kmem_free(hbp);
- hbp = xlog_alloc_buffer(log, hblks);
+ /*
+ * This open codes xlog_logrec_hblks so that we can reuse the
+ * fixed up h_size value calculated above. Without that we'd
+ * still allocate the buffer based on the incorrect on-disk
+ * size.
+ */
+ if (h_size > XLOG_HEADER_CYCLE_SIZE &&
+ (rhead->h_version & cpu_to_be32(XLOG_VERSION_2))) {
+ hblks = DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE);
+ if (hblks > 1) {
+ kvfree(hbp);
+ hbp = xlog_alloc_buffer(log, hblks);
+ if (!hbp)
+ return -ENOMEM;
+ }
}
} else {
ASSERT(log->l_sectBBsize == 1);
- hblks = 1;
- hbp = xlog_alloc_buffer(log, 1);
h_size = XLOG_BIG_RECORD_BSIZE;
}
- if (!hbp)
- return -ENOMEM;
dbp = xlog_alloc_buffer(log, BTOBB(h_size));
if (!dbp) {
- kmem_free(hbp);
+ kvfree(hbp);
return -ENOMEM;
}
@@ -3199,16 +3242,33 @@ xlog_do_recovery_pass(
}
bread_err2:
- kmem_free(dbp);
+ kvfree(dbp);
bread_err1:
- kmem_free(hbp);
+ kvfree(hbp);
/*
- * Submit buffers that have been added from the last record processed,
- * regardless of error status.
+ * Submit buffers that have been dirtied by the last record recovered.
*/
- if (!list_empty(&buffer_list))
+ if (!list_empty(&buffer_list)) {
+ if (error) {
+ /*
+ * If there has been an item recovery error then we
+ * cannot allow partial checkpoint writeback to
+ * occur. We might have multiple checkpoints with the
+ * same start LSN in this buffer list, and partial
+ * writeback of a checkpoint in this situation can
+ * prevent future recovery of all the changes in the
+ * checkpoints at this start LSN.
+ *
+ * Note: Shutting down the filesystem will result in the
+ * delwri submission marking all the buffers stale,
+ * completing them and cleaning up _XBF_LOGRECOVERY
+ * state without doing any IO.
+ */
+ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+ }
error2 = xfs_buf_delwri_submit(&buffer_list);
+ }
if (error && first_bad)
*first_bad = rhead_blk;
@@ -3306,14 +3366,13 @@ xlog_do_recover(
/*
* We now update the tail_lsn since much of the recovery has completed
- * and there may be space available to use. If there were no extent
- * or iunlinks, we can free up the entire log and set the tail_lsn to
- * be the last_sync_lsn. This was set in xlog_find_tail to be the
- * lsn of the last known good LR on disk. If there are extent frees
- * or iunlinks they will have some entries in the AIL; so we look at
- * the AIL to determine how to set the tail_lsn.
+ * and there may be space available to use. If there were no extent or
+ * iunlinks, we can free up the entire log. This was set in
+ * xlog_find_tail to be the lsn of the last known good LR on disk. If
+ * there are extent frees or iunlinks they will have some entries in the
+ * AIL; so we look at the AIL to determine how to set the tail_lsn.
*/
- xlog_assign_tail_lsn(mp);
+ xfs_ail_assign_tail_lsn(log->l_ailp);
/*
* Now that we've finished replaying all buffer and inode updates,
@@ -3321,7 +3380,7 @@ xlog_do_recover(
*/
xfs_buf_lock(bp);
xfs_buf_hold(bp);
- error = _xfs_buf_read(bp, XBF_READ);
+ error = _xfs_buf_read(bp);
if (error) {
if (!xlog_is_shutdown(log)) {
xfs_buf_ioerror_alert(bp, __this_address);
@@ -3338,13 +3397,6 @@ xlog_do_recover(
/* re-initialise in-core superblock and geometry structures */
mp->m_features |= xfs_sb_version_to_features(sbp);
xfs_reinit_percpu_counters(mp);
- error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks,
- &mp->m_maxagi);
- if (error) {
- xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
- return error;
- }
- mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
/* Normal transactions can now occur */
clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
@@ -3443,12 +3495,19 @@ xlog_recover(
* part of recovery so that the root and real-time bitmap inodes can be read in
* from disk in between the two stages. This is necessary so that we can free
* space in the real-time portion of the file system.
+ *
+ * We run this whole process under GFP_NOFS allocation context. We do a
+ * combination of non-transactional and transactional work, yet we really don't
+ * want to recurse into the filesystem from direct reclaim during any of this
+ * processing. This allows all the recovery code run here not to care about the
+ * memory allocation context it is running in.
*/
int
xlog_recover_finish(
struct xlog *log)
{
- int error;
+ unsigned int nofs_flags = memalloc_nofs_save();
+ int error;
error = xlog_recover_process_intents(log);
if (error) {
@@ -3462,7 +3521,7 @@ xlog_recover_finish(
xlog_recover_cancel_intents(log);
xfs_alert(log->l_mp, "Failed to recover intents");
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
- return error;
+ goto out_error;
}
/*
@@ -3472,21 +3531,6 @@ xlog_recover_finish(
*/
xfs_log_force(log->l_mp, XFS_LOG_SYNC);
- /*
- * Now that we've recovered the log and all the intents, we can clear
- * the log incompat feature bits in the superblock because there's no
- * longer anything to protect. We rely on the AIL push to write out the
- * updated superblock after everything else.
- */
- if (xfs_clear_incompat_log_features(log->l_mp)) {
- error = xfs_sync_sb(log->l_mp, false);
- if (error < 0) {
- xfs_alert(log->l_mp,
- "Failed to clear log incompat features on recovery");
- return error;
- }
- }
-
xlog_recover_process_iunlinks(log);
/*
@@ -3508,9 +3552,13 @@ xlog_recover_finish(
* and AIL.
*/
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+ error = 0;
+ goto out_error;
}
- return 0;
+out_error:
+ memalloc_nofs_restore(nofs_flags);
+ return error;
}
void