diff options
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 248 |
1 files changed, 148 insertions, 100 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 1251c81e55f9..2f76531842f8 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -361,7 +361,7 @@ xlog_find_verify_cycle( *new_blk = -1; out: - kmem_free(buffer); + kvfree(buffer); return error; } @@ -477,7 +477,7 @@ xlog_find_verify_log_record( *last_blk = i; out: - kmem_free(buffer); + kvfree(buffer); return error; } @@ -731,7 +731,7 @@ validate_head: goto out_free_buffer; } - kmem_free(buffer); + kvfree(buffer); if (head_blk == log_bbnum) *return_head_blk = 0; else @@ -745,7 +745,7 @@ validate_head: return 0; out_free_buffer: - kmem_free(buffer); + kvfree(buffer); if (error) xfs_warn(log->l_mp, "failed to find log head"); return error; @@ -999,7 +999,7 @@ xlog_verify_tail( "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", orig_tail, *tail_blk); out: - kmem_free(buffer); + kvfree(buffer); return error; } @@ -1046,7 +1046,7 @@ xlog_verify_head( error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, XLOG_MAX_ICLOGS, tmp_buffer, &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); - kmem_free(tmp_buffer); + kvfree(tmp_buffer); if (error < 0) return error; @@ -1177,8 +1177,8 @@ xlog_check_unmount_rec( */ xlog_assign_atomic_lsn(&log->l_tail_lsn, log->l_curr_cycle, after_umount_blk); - xlog_assign_atomic_lsn(&log->l_last_sync_lsn, - log->l_curr_cycle, after_umount_blk); + log->l_ailp->ail_head_lsn = + atomic64_read(&log->l_tail_lsn); *tail_blk = after_umount_blk; *clean = true; @@ -1212,11 +1212,7 @@ xlog_set_state( if (bump_cycle) log->l_curr_cycle++; atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); - atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); - xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, - BBTOB(log->l_curr_block)); - xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, - BBTOB(log->l_curr_block)); + log->l_ailp->ail_head_lsn = be64_to_cpu(rhead->h_lsn); } /* @@ -1340,7 +1336,7 @@ xlog_find_tail( * headers if we have a filesystem using non-persistent counters. */ if (clean) - set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate); + xfs_set_clean(log->l_mp); /* * Make sure that there are no blocks in front of the head @@ -1365,7 +1361,7 @@ xlog_find_tail( error = xlog_clear_stale_blocks(log, tail_lsn); done: - kmem_free(buffer); + kvfree(buffer); if (error) xfs_warn(log->l_mp, "failed to locate log tail"); @@ -1399,6 +1395,7 @@ xlog_find_zeroed( xfs_daddr_t new_blk, last_blk, start_blk; xfs_daddr_t num_scan_bblks; int error, log_bbnum = log->l_logBBsize; + int ret = 1; *blk_no = 0; @@ -1413,8 +1410,7 @@ xlog_find_zeroed( first_cycle = xlog_get_cycle(offset); if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; - kmem_free(buffer); - return 1; + goto out_free_buffer; } /* check partially zeroed log */ @@ -1424,8 +1420,8 @@ xlog_find_zeroed( last_cycle = xlog_get_cycle(offset); if (last_cycle != 0) { /* log completely written to */ - kmem_free(buffer); - return 0; + ret = 0; + goto out_free_buffer; } /* we have a partially zeroed log */ @@ -1471,10 +1467,10 @@ xlog_find_zeroed( *blk_no = last_blk; out_free_buffer: - kmem_free(buffer); + kvfree(buffer); if (error) return error; - return 1; + return ret; } /* @@ -1583,7 +1579,7 @@ xlog_write_log_records( } out_free_buffer: - kmem_free(buffer); + kvfree(buffer); return error; } @@ -1767,6 +1763,37 @@ xlog_recover_iget( return 0; } +/* + * Get an inode so that we can recover a log operation. + * + * Log intent items that target inodes effectively contain a file handle. + * Check that the generation number matches the intent item like we do for + * other file handles. Log intent items defined after this validation weakness + * was identified must use this function. + */ +int +xlog_recover_iget_handle( + struct xfs_mount *mp, + xfs_ino_t ino, + uint32_t gen, + struct xfs_inode **ipp) +{ + struct xfs_inode *ip; + int error; + + error = xlog_recover_iget(mp, ino, &ip); + if (error) + return error; + + if (VFS_I(ip)->i_generation != gen) { + xfs_irele(ip); + return -EFSCORRUPTED; + } + + *ipp = ip; + return 0; +} + /****************************************************************************** * * Log recover routines @@ -1789,6 +1816,14 @@ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = { &xlog_bud_item_ops, &xlog_attri_item_ops, &xlog_attrd_item_ops, + &xlog_xmi_item_ops, + &xlog_xmd_item_ops, + &xlog_rtefi_item_ops, + &xlog_rtefd_item_ops, + &xlog_rtrui_item_ops, + &xlog_rtrud_item_ops, + &xlog_rtcui_item_ops, + &xlog_rtcud_item_ops, }; static const struct xlog_recover_item_ops * @@ -1820,7 +1855,7 @@ xlog_find_item_ops( * from the transaction. However, we can't do that until after we've * replayed all the other items because they may be dependent on the * cancelled buffer and replaying the cancelled buffer can remove it - * form the cancelled buffer table. Hence they have tobe done last. + * form the cancelled buffer table. Hence they have to be done last. * * 3. Inode allocation buffers must be replayed before inode items that * read the buffer and replay changes into it. For filesystems using the @@ -2057,7 +2092,8 @@ xlog_recover_add_item( { struct xlog_recover_item *item; - item = kmem_zalloc(sizeof(struct xlog_recover_item), 0); + item = kzalloc(sizeof(struct xlog_recover_item), + GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&item->ri_list); list_add_tail(&item->ri_list, head); } @@ -2098,7 +2134,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL); + ptr = kvrealloc(old_ptr, len + old_len, GFP_KERNEL); if (!ptr) return -ENOMEM; memcpy(&ptr[old_len], dp, len); @@ -2160,7 +2196,7 @@ xlog_recover_add_to_trans( return 0; } - ptr = kmem_alloc(len, 0); + ptr = xlog_kvmalloc(len); memcpy(ptr, dp, len); in_f = (struct xfs_inode_log_format *)ptr; @@ -2182,14 +2218,13 @@ xlog_recover_add_to_trans( "bad number of regions (%d) in inode log format", in_f->ilf_size); ASSERT(0); - kmem_free(ptr); + kvfree(ptr); return -EFSCORRUPTED; } item->ri_total = in_f->ilf_size; - item->ri_buf = - kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), - 0); + item->ri_buf = kzalloc(item->ri_total * sizeof(xfs_log_iovec_t), + GFP_KERNEL | __GFP_NOFAIL); } if (item->ri_total <= item->ri_cnt) { @@ -2197,7 +2232,7 @@ xlog_recover_add_to_trans( "log item region count (%d) overflowed size (%d)", item->ri_cnt, item->ri_total); ASSERT(0); - kmem_free(ptr); + kvfree(ptr); return -EFSCORRUPTED; } @@ -2227,13 +2262,13 @@ xlog_recover_free_trans( /* Free the regions in the item. */ list_del(&item->ri_list); for (i = 0; i < item->ri_cnt; i++) - kmem_free(item->ri_buf[i].i_addr); + kvfree(item->ri_buf[i].i_addr); /* Free the item itself */ - kmem_free(item->ri_buf); - kmem_free(item); + kfree(item->ri_buf); + kfree(item); } /* Free the transaction recover structure */ - kmem_free(trans); + kfree(trans); } /* @@ -2332,7 +2367,7 @@ xlog_recover_ophdr_to_trans( * This is a new transaction so allocate a new recovery container to * hold the recovery ops that will follow. */ - trans = kmem_zalloc(sizeof(struct xlog_recover), 0); + trans = kzalloc(sizeof(struct xlog_recover), GFP_KERNEL | __GFP_NOFAIL); trans->r_log_tid = tid; trans->r_lsn = be64_to_cpu(rhead->h_lsn); INIT_LIST_HEAD(&trans->r_itemq); @@ -2456,7 +2491,10 @@ xlog_recover_process_data( ohead = (struct xlog_op_header *)dp; dp += sizeof(*ohead); - ASSERT(dp <= end); + if (dp > end) { + xfs_warn(log->l_mp, "%s: op header overrun", __func__); + return -EFSCORRUPTED; + } /* errors will abort recovery */ error = xlog_recover_process_ophdr(log, rhash, rhead, ohead, @@ -2645,7 +2683,7 @@ xlog_recover_clear_agi_bucket( struct xfs_perag *pag, int bucket) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_trans *tp; struct xfs_agi *agi; struct xfs_buf *agibp; @@ -2656,7 +2694,7 @@ xlog_recover_clear_agi_bucket( if (error) goto out_error; - error = xfs_read_agi(pag, tp, &agibp); + error = xfs_read_agi(pag, tp, 0, &agibp); if (error) goto out_abort; @@ -2676,7 +2714,7 @@ out_abort: xfs_trans_cancel(tp); out_error: xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, - pag->pag_agno); + pag_agno(pag)); return; } @@ -2686,7 +2724,7 @@ xlog_recover_iunlink_bucket( struct xfs_agi *agi, int bucket) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_inode *prev_ip = NULL; struct xfs_inode *ip; xfs_agino_t prev_agino, agino; @@ -2694,9 +2732,8 @@ xlog_recover_iunlink_bucket( agino = be32_to_cpu(agi->agi_unlinked[bucket]); while (agino != NULLAGINO) { - error = xfs_iget(mp, NULL, - XFS_AGINO_TO_INO(mp, pag->pag_agno, agino), - 0, 0, &ip); + error = xfs_iget(mp, NULL, xfs_agino_to_ino(pag, agino), 0, 0, + &ip); if (error) break; @@ -2772,7 +2809,7 @@ xlog_recover_iunlink_ag( int bucket; int error; - error = xfs_read_agi(pag, NULL, &agibp); + error = xfs_read_agi(pag, NULL, 0, &agibp); if (error) { /* * AGI is b0rked. Don't process it. @@ -2814,10 +2851,9 @@ static void xlog_recover_process_iunlinks( struct xlog *log) { - struct xfs_perag *pag; - xfs_agnumber_t agno; + struct xfs_perag *pag = NULL; - for_each_perag(log->l_mp, agno, pag) + while ((pag = xfs_perag_next(log->l_mp, pag))) xlog_recover_iunlink_ag(pag); } @@ -2966,7 +3002,7 @@ xlog_do_recovery_pass( int error = 0, h_size, h_len; int error2 = 0; int bblks, split_bblks; - int hblks, split_hblks, wrapped_hblks; + int hblks = 1, split_hblks, wrapped_hblks; int i; struct hlist_head rhash[XLOG_RHASH_SIZE]; LIST_HEAD (buffer_list); @@ -2977,6 +3013,10 @@ xlog_do_recovery_pass( for (i = 0; i < XLOG_RHASH_SIZE; i++) INIT_HLIST_HEAD(&rhash[i]); + hbp = xlog_alloc_buffer(log, hblks); + if (!hbp) + return -ENOMEM; + /* * Read the header of the tail block and get the iclog buffer size from * h_size. Use this to tell how many sectors make up the log header. @@ -2987,10 +3027,6 @@ xlog_do_recovery_pass( * iclog header and extract the header size from it. Get a * new hbp that is the correct size. */ - hbp = xlog_alloc_buffer(log, 1); - if (!hbp) - return -ENOMEM; - error = xlog_bread(log, tail_blk, 1, hbp, &offset); if (error) goto bread_err1; @@ -3022,23 +3058,30 @@ xlog_do_recovery_pass( if (error) goto bread_err1; - hblks = xlog_logrec_hblks(log, rhead); - if (hblks != 1) { - kmem_free(hbp); - hbp = xlog_alloc_buffer(log, hblks); + /* + * This open codes xlog_logrec_hblks so that we can reuse the + * fixed up h_size value calculated above. Without that we'd + * still allocate the buffer based on the incorrect on-disk + * size. + */ + if (h_size > XLOG_HEADER_CYCLE_SIZE && + (rhead->h_version & cpu_to_be32(XLOG_VERSION_2))) { + hblks = DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE); + if (hblks > 1) { + kvfree(hbp); + hbp = xlog_alloc_buffer(log, hblks); + if (!hbp) + return -ENOMEM; + } } } else { ASSERT(log->l_sectBBsize == 1); - hblks = 1; - hbp = xlog_alloc_buffer(log, 1); h_size = XLOG_BIG_RECORD_BSIZE; } - if (!hbp) - return -ENOMEM; dbp = xlog_alloc_buffer(log, BTOBB(h_size)); if (!dbp) { - kmem_free(hbp); + kvfree(hbp); return -ENOMEM; } @@ -3199,16 +3242,33 @@ xlog_do_recovery_pass( } bread_err2: - kmem_free(dbp); + kvfree(dbp); bread_err1: - kmem_free(hbp); + kvfree(hbp); /* - * Submit buffers that have been added from the last record processed, - * regardless of error status. + * Submit buffers that have been dirtied by the last record recovered. */ - if (!list_empty(&buffer_list)) + if (!list_empty(&buffer_list)) { + if (error) { + /* + * If there has been an item recovery error then we + * cannot allow partial checkpoint writeback to + * occur. We might have multiple checkpoints with the + * same start LSN in this buffer list, and partial + * writeback of a checkpoint in this situation can + * prevent future recovery of all the changes in the + * checkpoints at this start LSN. + * + * Note: Shutting down the filesystem will result in the + * delwri submission marking all the buffers stale, + * completing them and cleaning up _XBF_LOGRECOVERY + * state without doing any IO. + */ + xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); + } error2 = xfs_buf_delwri_submit(&buffer_list); + } if (error && first_bad) *first_bad = rhead_blk; @@ -3306,14 +3366,13 @@ xlog_do_recover( /* * We now update the tail_lsn since much of the recovery has completed - * and there may be space available to use. If there were no extent - * or iunlinks, we can free up the entire log and set the tail_lsn to - * be the last_sync_lsn. This was set in xlog_find_tail to be the - * lsn of the last known good LR on disk. If there are extent frees - * or iunlinks they will have some entries in the AIL; so we look at - * the AIL to determine how to set the tail_lsn. + * and there may be space available to use. If there were no extent or + * iunlinks, we can free up the entire log. This was set in + * xlog_find_tail to be the lsn of the last known good LR on disk. If + * there are extent frees or iunlinks they will have some entries in the + * AIL; so we look at the AIL to determine how to set the tail_lsn. */ - xlog_assign_tail_lsn(mp); + xfs_ail_assign_tail_lsn(log->l_ailp); /* * Now that we've finished replaying all buffer and inode updates, @@ -3321,7 +3380,7 @@ xlog_do_recover( */ xfs_buf_lock(bp); xfs_buf_hold(bp); - error = _xfs_buf_read(bp, XBF_READ); + error = _xfs_buf_read(bp); if (error) { if (!xlog_is_shutdown(log)) { xfs_buf_ioerror_alert(bp, __this_address); @@ -3338,13 +3397,6 @@ xlog_do_recover( /* re-initialise in-core superblock and geometry structures */ mp->m_features |= xfs_sb_version_to_features(sbp); xfs_reinit_percpu_counters(mp); - error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks, - &mp->m_maxagi); - if (error) { - xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); - return error; - } - mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); /* Normal transactions can now occur */ clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); @@ -3443,12 +3495,19 @@ xlog_recover( * part of recovery so that the root and real-time bitmap inodes can be read in * from disk in between the two stages. This is necessary so that we can free * space in the real-time portion of the file system. + * + * We run this whole process under GFP_NOFS allocation context. We do a + * combination of non-transactional and transactional work, yet we really don't + * want to recurse into the filesystem from direct reclaim during any of this + * processing. This allows all the recovery code run here not to care about the + * memory allocation context it is running in. */ int xlog_recover_finish( struct xlog *log) { - int error; + unsigned int nofs_flags = memalloc_nofs_save(); + int error; error = xlog_recover_process_intents(log); if (error) { @@ -3462,7 +3521,7 @@ xlog_recover_finish( xlog_recover_cancel_intents(log); xfs_alert(log->l_mp, "Failed to recover intents"); xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); - return error; + goto out_error; } /* @@ -3472,21 +3531,6 @@ xlog_recover_finish( */ xfs_log_force(log->l_mp, XFS_LOG_SYNC); - /* - * Now that we've recovered the log and all the intents, we can clear - * the log incompat feature bits in the superblock because there's no - * longer anything to protect. We rely on the AIL push to write out the - * updated superblock after everything else. - */ - if (xfs_clear_incompat_log_features(log->l_mp)) { - error = xfs_sync_sb(log->l_mp, false); - if (error < 0) { - xfs_alert(log->l_mp, - "Failed to clear log incompat features on recovery"); - return error; - } - } - xlog_recover_process_iunlinks(log); /* @@ -3508,9 +3552,13 @@ xlog_recover_finish( * and AIL. */ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); + error = 0; + goto out_error; } - return 0; +out_error: + memalloc_nofs_restore(nofs_flags); + return error; } void |