diff options
Diffstat (limited to 'fs/f2fs/recovery.c')
| -rw-r--r-- | fs/f2fs/recovery.c | 605 |
1 files changed, 388 insertions, 217 deletions
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e3883db868d8..c3415ebb9f50 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -5,8 +5,10 @@ * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ +#include <linux/unaligned.h> #include <linux/fs.h> #include <linux/f2fs_fs.h> +#include <linux/sched/mm.h> #include "f2fs.h" #include "node.h" #include "segment.h" @@ -50,6 +52,10 @@ bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) if (sbi->last_valid_block_count + nalloc > sbi->user_block_count) return false; + if (NM_I(sbi)->max_rf_node_blocks && + percpu_counter_sum_positive(&sbi->rf_node_block_count) >= + NM_I(sbi)->max_rf_node_blocks) + return false; return true; } @@ -76,7 +82,7 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, if (IS_ERR(inode)) return ERR_CAST(inode); - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) goto err_out; @@ -86,7 +92,8 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, goto err_out; } - entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); + entry = f2fs_kmem_cache_alloc(fsync_entry_slab, + GFP_F2FS_ZERO, true, NULL); entry->inode = inode; list_add_tail(&entry->list, head); @@ -107,14 +114,58 @@ static void del_fsync_inode(struct fsync_inode_entry *entry, int drop) kmem_cache_free(fsync_entry_slab, entry); } -static int recover_dentry(struct inode *inode, struct page *ipage, +static int init_recovered_filename(const struct inode *dir, + struct f2fs_inode *raw_inode, + struct f2fs_filename *fname, + struct qstr *usr_fname) +{ + int err; + + memset(fname, 0, sizeof(*fname)); + fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen); + fname->disk_name.name = raw_inode->i_name; + + if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN)) + return -ENAMETOOLONG; + + if (!IS_ENCRYPTED(dir)) { + usr_fname->name = fname->disk_name.name; + usr_fname->len = fname->disk_name.len; + fname->usr_fname = usr_fname; + } + + /* Compute the hash of the filename */ + if (IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)) { + /* + * In this case the hash isn't computable without the key, so it + * was saved on-disk. + */ + if (fname->disk_name.len + sizeof(f2fs_hash_t) > F2FS_NAME_LEN) + return -EINVAL; + fname->hash = get_unaligned((f2fs_hash_t *) + &raw_inode->i_name[fname->disk_name.len]); + } else if (IS_CASEFOLDED(dir)) { + err = f2fs_init_casefolded_name(dir, fname); + if (err) + return err; + f2fs_hash_filename(dir, fname); + /* Case-sensitive match is fine for recovery */ + f2fs_free_casefolded_name(fname); + } else { + f2fs_hash_filename(dir, fname); + } + return 0; +} + +static int recover_dentry(struct inode *inode, struct folio *ifolio, struct list_head *dir_list) { - struct f2fs_inode *raw_inode = F2FS_INODE(ipage); + struct f2fs_inode *raw_inode = F2FS_INODE(ifolio); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; - struct fscrypt_name fname; - struct page *page; + struct f2fs_filename fname; + struct qstr usr_fname; + struct folio *folio; struct inode *dir, *einode; struct fsync_inode_entry *entry; int err = 0; @@ -132,18 +183,11 @@ static int recover_dentry(struct inode *inode, struct page *ipage, } dir = entry->inode; - - memset(&fname, 0, sizeof(struct fscrypt_name)); - fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen); - fname.disk_name.name = raw_inode->i_name; - - if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) { - WARN_ON(1); - err = -ENAMETOOLONG; + err = init_recovered_filename(dir, raw_inode, &fname, &usr_fname); + if (err) goto out; - } retry: - de = __f2fs_find_entry(dir, &fname, &page); + de = __f2fs_find_entry(dir, &fname, &folio); if (de && inode->i_ino == le32_to_cpu(de->ino)) goto out_put; @@ -157,7 +201,7 @@ retry: goto out_put; } - err = dquot_initialize(einode); + err = f2fs_dquot_initialize(einode); if (err) { iput(einode); goto out_put; @@ -168,11 +212,11 @@ retry: iput(einode); goto out_put; } - f2fs_delete_entry(de, page, dir, einode); + f2fs_delete_entry(de, folio, dir, einode); iput(einode); goto retry; - } else if (IS_ERR(page)) { - err = PTR_ERR(page); + } else if (IS_ERR(folio)) { + err = PTR_ERR(folio); } else { err = f2fs_add_dentry(dir, &fname, inode, inode->i_ino, inode->i_mode); @@ -182,22 +226,21 @@ retry: goto out; out_put: - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); out: if (file_enc_name(inode)) name = "<encrypted>"; else name = raw_inode->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), name, - IS_ERR(dir) ? 0 : dir->i_ino, err); + f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", + __func__, ino_of_node(ifolio), name, + IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } -static int recover_quota_data(struct inode *inode, struct page *page) +static int recover_quota_data(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(folio); struct iattr attr; uid_t i_uid = le32_to_cpu(raw->i_uid); gid_t i_gid = le32_to_cpu(raw->i_gid); @@ -205,18 +248,18 @@ static int recover_quota_data(struct inode *inode, struct page *page) memset(&attr, 0, sizeof(attr)); - attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid); - attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid); + attr.ia_vfsuid = VFSUIDT_INIT(make_kuid(inode->i_sb->s_user_ns, i_uid)); + attr.ia_vfsgid = VFSGIDT_INIT(make_kgid(inode->i_sb->s_user_ns, i_gid)); - if (!uid_eq(attr.ia_uid, inode->i_uid)) + if (!vfsuid_eq(attr.ia_vfsuid, i_uid_into_vfsuid(&nop_mnt_idmap, inode))) attr.ia_valid |= ATTR_UID; - if (!gid_eq(attr.ia_gid, inode->i_gid)) + if (!vfsgid_eq(attr.ia_vfsgid, i_gid_into_vfsgid(&nop_mnt_idmap, inode))) attr.ia_valid |= ATTR_GID; if (!attr.ia_valid) return 0; - err = dquot_transfer(inode, &attr); + err = dquot_transfer(&nop_mnt_idmap, inode, &attr); if (err) set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR); return err; @@ -234,15 +277,16 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) clear_inode_flag(inode, FI_DATA_EXIST); } -static int recover_inode(struct inode *inode, struct page *page) +static int recover_inode(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(folio); + struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; inode->i_mode = le16_to_cpu(raw->i_mode); - err = recover_quota_data(inode, page); + err = recover_quota_data(inode, folio); if (err) return err; @@ -259,29 +303,28 @@ static int recover_inode(struct inode *inode, struct page *page) i_projid = (projid_t)le32_to_cpu(raw->i_projid); kprojid = make_kprojid(&init_user_ns, i_projid); - if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) { + if (!projid_eq(kprojid, fi->i_projid)) { err = f2fs_transfer_project_quota(inode, kprojid); if (err) return err; - F2FS_I(inode)->i_projid = kprojid; + fi->i_projid = kprojid; } } } f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); - inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); - inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); - inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); - - F2FS_I(inode)->i_advise = raw->i_advise; - F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + inode_set_atime(inode, le64_to_cpu(raw->i_atime), + le32_to_cpu(raw->i_atime_nsec)); + inode_set_ctime(inode, le64_to_cpu(raw->i_ctime), + le32_to_cpu(raw->i_ctime_nsec)); + inode_set_mtime(inode, le64_to_cpu(raw->i_mtime), + le32_to_cpu(raw->i_mtime_nsec)); + + fi->i_advise = raw->i_advise; + fi->i_flags = le32_to_cpu(raw->i_flags); f2fs_set_inode_flags(inode); - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = - le16_to_cpu(raw->i_gc_failures); + fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); @@ -290,97 +333,151 @@ static int recover_inode(struct inode *inode, struct page *page) if (file_enc_name(inode)) name = "<encrypted>"; else - name = F2FS_INODE(page)->i_name; + name = F2FS_INODE(folio)->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(page), name, raw->i_inline); + f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", + ino_of_node(folio), name, raw->i_inline); + return 0; +} + +static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi, + unsigned int ra_blocks, unsigned int blkaddr, + unsigned int next_blkaddr) +{ + if (blkaddr + 1 == next_blkaddr) + ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS, + ra_blocks * 2); + else if (next_blkaddr % BLKS_PER_SEG(sbi)) + ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS, + ra_blocks / 2); + return ra_blocks; +} + +/* Detect looped node chain with Floyd's cycle detection algorithm. */ +static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, + block_t *blkaddr_fast, bool *is_detecting) +{ + unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; + int i; + + if (!*is_detecting) + return 0; + + for (i = 0; i < 2; i++) { + struct folio *folio; + + if (!f2fs_is_valid_blkaddr(sbi, *blkaddr_fast, META_POR)) { + *is_detecting = false; + return 0; + } + + folio = f2fs_get_tmp_folio(sbi, *blkaddr_fast); + if (IS_ERR(folio)) + return PTR_ERR(folio); + + if (!is_recoverable_dnode(folio)) { + f2fs_folio_put(folio, true); + *is_detecting = false; + return 0; + } + + ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, *blkaddr_fast, + next_blkaddr_of_node(folio)); + + *blkaddr_fast = next_blkaddr_of_node(folio); + f2fs_folio_put(folio, true); + + f2fs_ra_meta_pages_cond(sbi, *blkaddr_fast, ra_blocks); + } + + if (*blkaddr_fast == blkaddr) { + f2fs_notice(sbi, "%s: Detect looped node chain on blkaddr:%u." + " Run fsck to fix it.", __func__, blkaddr); + return -EINVAL; + } return 0; } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, - bool check_only) + bool check_only, bool *new_inode) { struct curseg_info *curseg; - struct page *page = NULL; - block_t blkaddr; - unsigned int loop_cnt = 0; - unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg - - valid_user_blocks(sbi); + block_t blkaddr, blkaddr_fast; + bool is_detecting = true; int err = 0; /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + blkaddr_fast = blkaddr; while (1) { struct fsync_inode_entry *entry; + struct folio *folio; if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; - page = f2fs_get_tmp_page(sbi, blkaddr); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_tmp_folio(sbi, blkaddr); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); break; } - if (!is_recoverable_dnode(page)) + if (!is_recoverable_dnode(folio)) { + f2fs_folio_put(folio, true); break; + } - if (!is_fsync_dnode(page)) + if (!is_fsync_dnode(folio)) goto next; - entry = get_fsync_inode(head, ino_of_node(page)); + entry = get_fsync_inode(head, ino_of_node(folio)); if (!entry) { bool quota_inode = false; if (!check_only && - IS_INODE(page) && is_dent_dnode(page)) { - err = f2fs_recover_inode_page(sbi, page); - if (err) + IS_INODE(folio) && + is_dent_dnode(folio)) { + err = f2fs_recover_inode_page(sbi, folio); + if (err) { + f2fs_folio_put(folio, true); break; + } quota_inode = true; } - /* - * CP | dnode(F) | inode(DF) - * For this case, we should not give up now. - */ - entry = add_fsync_inode(sbi, head, ino_of_node(page), + entry = add_fsync_inode(sbi, head, ino_of_node(folio), quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); + /* + * CP | dnode(F) | inode(DF) + * For this case, we should not give up now. + */ if (err == -ENOENT) { - err = 0; + if (check_only) + *new_inode = true; goto next; } + f2fs_folio_put(folio, true); break; } } entry->blkaddr = blkaddr; - if (IS_INODE(page) && is_dent_dnode(page)) + if (IS_INODE(folio) && is_dent_dnode(folio)) entry->last_dentry = blkaddr; next: - /* sanity check in order to detect looped node chain */ - if (++loop_cnt >= free_blocks || - blkaddr == next_blkaddr_of_node(page)) { - f2fs_msg(sbi->sb, KERN_NOTICE, - "%s: detect looped node chain, " - "blkaddr:%u, next:%u", - __func__, blkaddr, next_blkaddr_of_node(page)); - err = -EINVAL; - break; - } - /* check next segment */ - blkaddr = next_blkaddr_of_node(page); - f2fs_put_page(page, 1); + blkaddr = next_blkaddr_of_node(folio); + f2fs_folio_put(folio, true); - f2fs_ra_meta_pages_cond(sbi, blkaddr); + err = sanity_check_node_chain(sbi, blkaddr, &blkaddr_fast, + &is_detecting); + if (err) + break; } - f2fs_put_page(page, 1); return err; } @@ -400,11 +497,11 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); struct f2fs_summary_block *sum_node; struct f2fs_summary sum; - struct page *sum_page, *node_page; + struct folio *sum_folio, *node_folio; struct dnode_of_data tdn = *dn; nid_t ino, nid; struct inode *inode; - unsigned int offset; + unsigned int offset, ofs_in_node, max_addrs; block_t bidx; int i; @@ -415,41 +512,52 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, /* Get the previous summary */ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); + if (curseg->segno == segno) { sum = curseg->sum_blk->entries[blkoff]; goto got_it; } } - sum_page = f2fs_get_sum_page(sbi, segno); - if (IS_ERR(sum_page)) - return PTR_ERR(sum_page); - sum_node = (struct f2fs_summary_block *)page_address(sum_page); + sum_folio = f2fs_get_sum_folio(sbi, segno); + if (IS_ERR(sum_folio)) + return PTR_ERR(sum_folio); + sum_node = SUM_BLK_PAGE_ADDR(sum_folio, segno); sum = sum_node->entries[blkoff]; - f2fs_put_page(sum_page, 1); + f2fs_folio_put(sum_folio, true); got_it: /* Use the locked dnode page and inode */ nid = le32_to_cpu(sum.nid); + ofs_in_node = le16_to_cpu(sum.ofs_in_node); + + max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode); + if (ofs_in_node >= max_addrs) { + f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u", + ofs_in_node, dn->inode->i_ino, nid, max_addrs); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUMMARY); + return -EFSCORRUPTED; + } + if (dn->inode->i_ino == nid) { tdn.nid = nid; - if (!dn->inode_page_locked) - lock_page(dn->inode_page); - tdn.node_page = dn->inode_page; - tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + if (!dn->inode_folio_locked) + folio_lock(dn->inode_folio); + tdn.node_folio = dn->inode_folio; + tdn.ofs_in_node = ofs_in_node; goto truncate_out; } else if (dn->nid == nid) { - tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + tdn.ofs_in_node = ofs_in_node; goto truncate_out; } /* Get the node page */ - node_page = f2fs_get_node_page(sbi, nid); - if (IS_ERR(node_page)) - return PTR_ERR(node_page); + node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR); + if (IS_ERR(node_folio)) + return PTR_ERR(node_folio); - offset = ofs_of_node(node_page); - ino = ino_of_node(node_page); - f2fs_put_page(node_page, 1); + offset = ofs_of_node(node_folio); + ino = ino_of_node(node_folio); + f2fs_folio_put(node_folio, true); if (ino != dn->inode->i_ino) { int ret; @@ -459,7 +567,7 @@ got_it: if (IS_ERR(inode)) return PTR_ERR(inode); - ret = dquot_initialize(inode); + ret = f2fs_dquot_initialize(inode); if (ret) { iput(inode); return ret; @@ -475,8 +583,8 @@ got_it: * if inode page is locked, unlock temporarily, but its reference * count keeps alive. */ - if (ino == dn->inode->i_ino && dn->inode_page_locked) - unlock_page(dn->inode_page); + if (ino == dn->inode->i_ino && dn->inode_folio_locked) + folio_unlock(dn->inode_folio); set_new_dnode(&tdn, inode, NULL, NULL, 0); if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) @@ -489,70 +597,108 @@ got_it: out: if (ino != dn->inode->i_ino) iput(inode); - else if (dn->inode_page_locked) - lock_page(dn->inode_page); + else if (dn->inode_folio_locked) + folio_lock(dn->inode_folio); return 0; truncate_out: - if (datablock_addr(tdn.inode, tdn.node_page, - tdn.ofs_in_node) == blkaddr) + if (f2fs_data_blkaddr(&tdn) == blkaddr) f2fs_truncate_data_blocks_range(&tdn, 1); - if (dn->inode->i_ino == nid && !dn->inode_page_locked) - unlock_page(dn->inode_page); + if (dn->inode->i_ino == nid && !dn->inode_folio_locked) + folio_unlock(dn->inode_folio); return 0; } +static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn) +{ + int i, err = 0; + + for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) { + err = f2fs_reserve_new_block(dn); + if (!err) + break; + } + + return err; +} + static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page) + struct folio *folio) { struct dnode_of_data dn; struct node_info ni; - unsigned int start, end; + unsigned int start = 0, end = 0, index; int err = 0, recovered = 0; /* step 1: recover xattr */ - if (IS_INODE(page)) { - f2fs_recover_inline_xattr(inode, page); - } else if (f2fs_has_xattr_block(ofs_of_node(page))) { - err = f2fs_recover_xattr_data(inode, page); + if (IS_INODE(folio)) { + err = f2fs_recover_inline_xattr(inode, folio); + if (err) + goto out; + } else if (f2fs_has_xattr_block(ofs_of_node(folio))) { + err = f2fs_recover_xattr_data(inode, folio); if (!err) recovered++; goto out; } /* step 2: recover inline data */ - if (f2fs_recover_inline_data(inode, page)) + err = f2fs_recover_inline_data(inode, folio); + if (err) { + if (err == 1) + err = 0; goto out; + } /* step 3: recover data indices */ - start = f2fs_start_bidx_of_node(ofs_of_node(page), inode); - end = start + ADDRS_PER_PAGE(page, inode); + start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode); + end = start + ADDRS_PER_PAGE(folio, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); retry_dn: err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_NOFS); goto retry_dn; } goto out; } - f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); + f2fs_folio_wait_writeback(dn.node_folio, NODE, true, true); - err = f2fs_get_node_info(sbi, dn.nid, &ni); + err = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (err) goto err; - f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); - f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); + f2fs_bug_on(sbi, ni.ino != ino_of_node(folio)); - for (; start < end; start++, dn.ofs_in_node++) { + if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) { + f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", + inode->i_ino, ofs_of_node(dn.node_folio), + ofs_of_node(folio)); + err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); + goto err; + } + + for (index = start; index < end; index++, dn.ofs_in_node++) { block_t src, dest; - src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - dest = datablock_addr(dn.inode, page, dn.ofs_in_node); + src = f2fs_data_blkaddr(&dn); + dest = data_blkaddr(dn.inode, folio, dn.ofs_in_node); + + if (__is_valid_data_blkaddr(src) && + !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { + err = -EFSCORRUPTED; + goto err; + } + + if (__is_valid_data_blkaddr(dest) && + !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { + err = -EFSCORRUPTED; + goto err; + } /* skip recovering if dest is the same as src */ if (src == dest) @@ -565,9 +711,9 @@ retry_dn: } if (!file_keep_isize(inode) && - (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT))) + (i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT))) f2fs_i_size_write(inode, - (loff_t)(start + 1) << PAGE_SHIFT); + (loff_t)(index + 1) << PAGE_SHIFT); /* * dest is reserved block, invalidate src block @@ -575,20 +721,17 @@ retry_dn: */ if (dest == NEW_ADDR) { f2fs_truncate_data_blocks_range(&dn, 1); - f2fs_reserve_new_block(&dn); + + err = f2fs_reserve_new_block_retry(&dn); + if (err) + goto err; continue; } /* dest is valid block, try to recover from src to dest */ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - if (src == NULL_ADDR) { - err = f2fs_reserve_new_block(&dn); - while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) - err = f2fs_reserve_new_block(&dn); - /* We should not get -ENOSPC */ - f2fs_bug_on(sbi, err); + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; } @@ -597,12 +740,20 @@ retry_prev: err = check_index_in_prev_nodes(sbi, dest, &dn); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + memalloc_retry_wait(GFP_NOFS); goto retry_prev; } goto err; } + if (f2fs_is_valid_blkaddr(sbi, dest, + DATA_GENERIC_ENHANCE_UPDATE)) { + f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u", + dest, inode->i_ino, dn.ofs_in_node); + err = -EFSCORRUPTED; + goto err; + } + /* write dummy data page */ f2fs_replace_block(sbi, &dn, src, dest, ni.version, false, false); @@ -610,18 +761,18 @@ retry_prev: } } - copy_node_footer(dn.node_page, page); - fill_node_footer(dn.node_page, dn.nid, ni.ino, - ofs_of_node(page), false); - set_page_dirty(dn.node_page); + copy_node_footer(dn.node_folio, folio); + fill_node_footer(dn.node_folio, dn.nid, ni.ino, + ofs_of_node(folio), false); + folio_mark_dirty(dn.node_folio); err: f2fs_put_dnode(&dn); out: - f2fs_msg(sbi->sb, KERN_NOTICE, - "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", - inode->i_ino, - file_keep_isize(inode) ? "keep" : "recover", - recovered, err); + f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), " + "range (%u, %u), recovered = %d, err = %d", + inode->i_ino, nid_of_node(folio), + file_keep_isize(inode) ? "keep" : "recover", + start, end, recovered, err); return err; } @@ -629,9 +780,17 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, struct list_head *tmp_inode_list, struct list_head *dir_list) { struct curseg_info *curseg; - struct page *page = NULL; int err = 0; block_t blkaddr; + unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; + unsigned int recoverable_dnode = 0; + unsigned int fsynced_dnode = 0; + unsigned int total_dnode = 0; + unsigned int recovered_inode = 0; + unsigned int recovered_dentry = 0; + unsigned int recovered_dnode = 0; + + f2fs_notice(sbi, "do_recover_data: start to recover dnode"); /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -639,103 +798,101 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, while (1) { struct fsync_inode_entry *entry; + struct folio *folio; if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; - f2fs_ra_meta_pages_cond(sbi, blkaddr); - - page = f2fs_get_tmp_page(sbi, blkaddr); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_tmp_folio(sbi, blkaddr); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); break; } - if (!is_recoverable_dnode(page)) { - f2fs_put_page(page, 1); + if (!is_recoverable_dnode(folio)) { + f2fs_folio_put(folio, true); break; } + recoverable_dnode++; - entry = get_fsync_inode(inode_list, ino_of_node(page)); + entry = get_fsync_inode(inode_list, ino_of_node(folio)); if (!entry) goto next; + fsynced_dnode++; /* * inode(x) | CP | inode(x) | dnode(F) * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(page)) { - err = recover_inode(entry->inode, page); - if (err) + if (IS_INODE(folio)) { + err = recover_inode(entry->inode, folio); + if (err) { + f2fs_folio_put(folio, true); break; + } + recovered_inode++; } if (entry->last_dentry == blkaddr) { - err = recover_dentry(entry->inode, page, dir_list); + err = recover_dentry(entry->inode, folio, dir_list); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } + recovered_dentry++; } - err = do_recover_data(sbi, entry->inode, page); + err = do_recover_data(sbi, entry->inode, folio); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } + recovered_dnode++; if (entry->blkaddr == blkaddr) list_move_tail(&entry->list, tmp_inode_list); next: + ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, + next_blkaddr_of_node(folio)); + /* check next segment */ - blkaddr = next_blkaddr_of_node(page); - f2fs_put_page(page, 1); + blkaddr = next_blkaddr_of_node(folio); + f2fs_folio_put(folio, true); + + f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); + total_dnode++; } if (!err) - f2fs_allocate_new_segments(sbi); + err = f2fs_allocate_new_segments(sbi); + + f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, " + "total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d", + recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode, + recovered_dentry, recovered_dnode, err); return err; } int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { - struct list_head inode_list, tmp_inode_list; - struct list_head dir_list; + LIST_HEAD(inode_list); + LIST_HEAD(tmp_inode_list); + LIST_HEAD(dir_list); int err; int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; -#ifdef CONFIG_QUOTA - int quota_enabled; -#endif - - if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, - "recover fsync data on readonly fs"); - sbi->sb->s_flags &= ~SB_RDONLY; - } - -#ifdef CONFIG_QUOTA - /* Needed for iput() to work correctly and not trash data */ - sbi->sb->s_flags |= SB_ACTIVE; - /* Turn on quotas so that they are updated correctly */ - quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); -#endif + bool new_inode = false; - fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", - sizeof(struct fsync_inode_entry)); - if (!fsync_entry_slab) { - err = -ENOMEM; - goto out; - } + f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, " + "check_only: %d", check_only); - INIT_LIST_HEAD(&inode_list); - INIT_LIST_HEAD(&tmp_inode_list); - INIT_LIST_HEAD(&dir_list); + if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) + f2fs_info(sbi, "recover fsync data on readonly fs"); /* prevent checkpoint */ - mutex_lock(&sbi->cp_mutex); + f2fs_down_write(&sbi->cp_global_sem); /* step #1: find fsynced inode numbers */ - err = find_fsync_dnodes(sbi, &inode_list, check_only); - if (err || list_empty(&inode_list)) + err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode); + if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode))) goto skip; if (check_only) { @@ -749,10 +906,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); - else { - /* restore s_flags to let iput() trash data */ - sbi->sb->s_flags = s_flags; - } + else + f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE); skip: destroy_fsync_dnodes(&inode_list, err); destroy_fsync_dnodes(&tmp_inode_list, err); @@ -764,10 +919,20 @@ skip: if (err) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); - } else { - clear_sbi_flag(sbi, SBI_POR_DOING); } - mutex_unlock(&sbi->cp_mutex); + + /* + * If fsync data succeeds or there is no fsync data to recover, + * and the f2fs is not read only, check and fix zoned block devices' + * write pointer consistency. + */ + if (!err) + err = f2fs_check_and_fix_write_pointer(sbi); + + if (!err) + clear_sbi_flag(sbi, SBI_POR_DOING); + + f2fs_up_write(&sbi->cp_global_sem); /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list, err); @@ -779,18 +944,24 @@ skip: struct cp_control cpc = { .reason = CP_RECOVERY, }; + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); } } - kmem_cache_destroy(fsync_entry_slab); -out: -#ifdef CONFIG_QUOTA - /* Turn quotas off */ - if (quota_enabled) - f2fs_quota_off_umount(sbi->sb); -#endif sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ - return ret ? ret: err; + return ret ? ret : err; +} + +int __init f2fs_create_recovery_cache(void) +{ + fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", + sizeof(struct fsync_inode_entry)); + return fsync_entry_slab ? 0 : -ENOMEM; +} + +void f2fs_destroy_recovery_cache(void) +{ + kmem_cache_destroy(fsync_entry_slab); } |
