summaryrefslogtreecommitdiff
path: root/fs/f2fs/recovery.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs/recovery.c')
-rw-r--r--fs/f2fs/recovery.c605
1 files changed, 388 insertions, 217 deletions
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index e3883db868d8..c3415ebb9f50 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -5,8 +5,10 @@
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*/
+#include <linux/unaligned.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"
@@ -50,6 +52,10 @@ bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
return false;
+ if (NM_I(sbi)->max_rf_node_blocks &&
+ percpu_counter_sum_positive(&sbi->rf_node_block_count) >=
+ NM_I(sbi)->max_rf_node_blocks)
+ return false;
return true;
}
@@ -76,7 +82,7 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
if (IS_ERR(inode))
return ERR_CAST(inode);
- err = dquot_initialize(inode);
+ err = f2fs_dquot_initialize(inode);
if (err)
goto err_out;
@@ -86,7 +92,8 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
goto err_out;
}
- entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
+ entry = f2fs_kmem_cache_alloc(fsync_entry_slab,
+ GFP_F2FS_ZERO, true, NULL);
entry->inode = inode;
list_add_tail(&entry->list, head);
@@ -107,14 +114,58 @@ static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
kmem_cache_free(fsync_entry_slab, entry);
}
-static int recover_dentry(struct inode *inode, struct page *ipage,
+static int init_recovered_filename(const struct inode *dir,
+ struct f2fs_inode *raw_inode,
+ struct f2fs_filename *fname,
+ struct qstr *usr_fname)
+{
+ int err;
+
+ memset(fname, 0, sizeof(*fname));
+ fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen);
+ fname->disk_name.name = raw_inode->i_name;
+
+ if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN))
+ return -ENAMETOOLONG;
+
+ if (!IS_ENCRYPTED(dir)) {
+ usr_fname->name = fname->disk_name.name;
+ usr_fname->len = fname->disk_name.len;
+ fname->usr_fname = usr_fname;
+ }
+
+ /* Compute the hash of the filename */
+ if (IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)) {
+ /*
+ * In this case the hash isn't computable without the key, so it
+ * was saved on-disk.
+ */
+ if (fname->disk_name.len + sizeof(f2fs_hash_t) > F2FS_NAME_LEN)
+ return -EINVAL;
+ fname->hash = get_unaligned((f2fs_hash_t *)
+ &raw_inode->i_name[fname->disk_name.len]);
+ } else if (IS_CASEFOLDED(dir)) {
+ err = f2fs_init_casefolded_name(dir, fname);
+ if (err)
+ return err;
+ f2fs_hash_filename(dir, fname);
+ /* Case-sensitive match is fine for recovery */
+ f2fs_free_casefolded_name(fname);
+ } else {
+ f2fs_hash_filename(dir, fname);
+ }
+ return 0;
+}
+
+static int recover_dentry(struct inode *inode, struct folio *ifolio,
struct list_head *dir_list)
{
- struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
+ struct f2fs_inode *raw_inode = F2FS_INODE(ifolio);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
- struct fscrypt_name fname;
- struct page *page;
+ struct f2fs_filename fname;
+ struct qstr usr_fname;
+ struct folio *folio;
struct inode *dir, *einode;
struct fsync_inode_entry *entry;
int err = 0;
@@ -132,18 +183,11 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
}
dir = entry->inode;
-
- memset(&fname, 0, sizeof(struct fscrypt_name));
- fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
- fname.disk_name.name = raw_inode->i_name;
-
- if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
- WARN_ON(1);
- err = -ENAMETOOLONG;
+ err = init_recovered_filename(dir, raw_inode, &fname, &usr_fname);
+ if (err)
goto out;
- }
retry:
- de = __f2fs_find_entry(dir, &fname, &page);
+ de = __f2fs_find_entry(dir, &fname, &folio);
if (de && inode->i_ino == le32_to_cpu(de->ino))
goto out_put;
@@ -157,7 +201,7 @@ retry:
goto out_put;
}
- err = dquot_initialize(einode);
+ err = f2fs_dquot_initialize(einode);
if (err) {
iput(einode);
goto out_put;
@@ -168,11 +212,11 @@ retry:
iput(einode);
goto out_put;
}
- f2fs_delete_entry(de, page, dir, einode);
+ f2fs_delete_entry(de, folio, dir, einode);
iput(einode);
goto retry;
- } else if (IS_ERR(page)) {
- err = PTR_ERR(page);
+ } else if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
} else {
err = f2fs_add_dentry(dir, &fname, inode,
inode->i_ino, inode->i_mode);
@@ -182,22 +226,21 @@ retry:
goto out;
out_put:
- f2fs_put_page(page, 0);
+ f2fs_folio_put(folio, false);
out:
if (file_enc_name(inode))
name = "<encrypted>";
else
name = raw_inode->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "%s: ino = %x, name = %s, dir = %lx, err = %d",
- __func__, ino_of_node(ipage), name,
- IS_ERR(dir) ? 0 : dir->i_ino, err);
+ f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
+ __func__, ino_of_node(ifolio), name,
+ IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
-static int recover_quota_data(struct inode *inode, struct page *page)
+static int recover_quota_data(struct inode *inode, struct folio *folio)
{
- struct f2fs_inode *raw = F2FS_INODE(page);
+ struct f2fs_inode *raw = F2FS_INODE(folio);
struct iattr attr;
uid_t i_uid = le32_to_cpu(raw->i_uid);
gid_t i_gid = le32_to_cpu(raw->i_gid);
@@ -205,18 +248,18 @@ static int recover_quota_data(struct inode *inode, struct page *page)
memset(&attr, 0, sizeof(attr));
- attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
- attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
+ attr.ia_vfsuid = VFSUIDT_INIT(make_kuid(inode->i_sb->s_user_ns, i_uid));
+ attr.ia_vfsgid = VFSGIDT_INIT(make_kgid(inode->i_sb->s_user_ns, i_gid));
- if (!uid_eq(attr.ia_uid, inode->i_uid))
+ if (!vfsuid_eq(attr.ia_vfsuid, i_uid_into_vfsuid(&nop_mnt_idmap, inode)))
attr.ia_valid |= ATTR_UID;
- if (!gid_eq(attr.ia_gid, inode->i_gid))
+ if (!vfsgid_eq(attr.ia_vfsgid, i_gid_into_vfsgid(&nop_mnt_idmap, inode)))
attr.ia_valid |= ATTR_GID;
if (!attr.ia_valid)
return 0;
- err = dquot_transfer(inode, &attr);
+ err = dquot_transfer(&nop_mnt_idmap, inode, &attr);
if (err)
set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
return err;
@@ -234,15 +277,16 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
clear_inode_flag(inode, FI_DATA_EXIST);
}
-static int recover_inode(struct inode *inode, struct page *page)
+static int recover_inode(struct inode *inode, struct folio *folio)
{
- struct f2fs_inode *raw = F2FS_INODE(page);
+ struct f2fs_inode *raw = F2FS_INODE(folio);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
char *name;
int err;
inode->i_mode = le16_to_cpu(raw->i_mode);
- err = recover_quota_data(inode, page);
+ err = recover_quota_data(inode, folio);
if (err)
return err;
@@ -259,29 +303,28 @@ static int recover_inode(struct inode *inode, struct page *page)
i_projid = (projid_t)le32_to_cpu(raw->i_projid);
kprojid = make_kprojid(&init_user_ns, i_projid);
- if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) {
+ if (!projid_eq(kprojid, fi->i_projid)) {
err = f2fs_transfer_project_quota(inode,
kprojid);
if (err)
return err;
- F2FS_I(inode)->i_projid = kprojid;
+ fi->i_projid = kprojid;
}
}
}
f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
- inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
- inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
- inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
- inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
- inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
- inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
-
- F2FS_I(inode)->i_advise = raw->i_advise;
- F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags);
+ inode_set_atime(inode, le64_to_cpu(raw->i_atime),
+ le32_to_cpu(raw->i_atime_nsec));
+ inode_set_ctime(inode, le64_to_cpu(raw->i_ctime),
+ le32_to_cpu(raw->i_ctime_nsec));
+ inode_set_mtime(inode, le64_to_cpu(raw->i_mtime),
+ le32_to_cpu(raw->i_mtime_nsec));
+
+ fi->i_advise = raw->i_advise;
+ fi->i_flags = le32_to_cpu(raw->i_flags);
f2fs_set_inode_flags(inode);
- F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] =
- le16_to_cpu(raw->i_gc_failures);
+ fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures);
recover_inline_flags(inode, raw);
@@ -290,97 +333,151 @@ static int recover_inode(struct inode *inode, struct page *page)
if (file_enc_name(inode))
name = "<encrypted>";
else
- name = F2FS_INODE(page)->i_name;
+ name = F2FS_INODE(folio)->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "recover_inode: ino = %x, name = %s, inline = %x",
- ino_of_node(page), name, raw->i_inline);
+ f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
+ ino_of_node(folio), name, raw->i_inline);
+ return 0;
+}
+
+static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
+ unsigned int ra_blocks, unsigned int blkaddr,
+ unsigned int next_blkaddr)
+{
+ if (blkaddr + 1 == next_blkaddr)
+ ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS,
+ ra_blocks * 2);
+ else if (next_blkaddr % BLKS_PER_SEG(sbi))
+ ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS,
+ ra_blocks / 2);
+ return ra_blocks;
+}
+
+/* Detect looped node chain with Floyd's cycle detection algorithm. */
+static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
+ block_t *blkaddr_fast, bool *is_detecting)
+{
+ unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
+ int i;
+
+ if (!*is_detecting)
+ return 0;
+
+ for (i = 0; i < 2; i++) {
+ struct folio *folio;
+
+ if (!f2fs_is_valid_blkaddr(sbi, *blkaddr_fast, META_POR)) {
+ *is_detecting = false;
+ return 0;
+ }
+
+ folio = f2fs_get_tmp_folio(sbi, *blkaddr_fast);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+
+ if (!is_recoverable_dnode(folio)) {
+ f2fs_folio_put(folio, true);
+ *is_detecting = false;
+ return 0;
+ }
+
+ ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, *blkaddr_fast,
+ next_blkaddr_of_node(folio));
+
+ *blkaddr_fast = next_blkaddr_of_node(folio);
+ f2fs_folio_put(folio, true);
+
+ f2fs_ra_meta_pages_cond(sbi, *blkaddr_fast, ra_blocks);
+ }
+
+ if (*blkaddr_fast == blkaddr) {
+ f2fs_notice(sbi, "%s: Detect looped node chain on blkaddr:%u."
+ " Run fsck to fix it.", __func__, blkaddr);
+ return -EINVAL;
+ }
return 0;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
- bool check_only)
+ bool check_only, bool *new_inode)
{
struct curseg_info *curseg;
- struct page *page = NULL;
- block_t blkaddr;
- unsigned int loop_cnt = 0;
- unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
- valid_user_blocks(sbi);
+ block_t blkaddr, blkaddr_fast;
+ bool is_detecting = true;
int err = 0;
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ blkaddr_fast = blkaddr;
while (1) {
struct fsync_inode_entry *entry;
+ struct folio *folio;
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
- page = f2fs_get_tmp_page(sbi, blkaddr);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
+ folio = f2fs_get_tmp_folio(sbi, blkaddr);
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
break;
}
- if (!is_recoverable_dnode(page))
+ if (!is_recoverable_dnode(folio)) {
+ f2fs_folio_put(folio, true);
break;
+ }
- if (!is_fsync_dnode(page))
+ if (!is_fsync_dnode(folio))
goto next;
- entry = get_fsync_inode(head, ino_of_node(page));
+ entry = get_fsync_inode(head, ino_of_node(folio));
if (!entry) {
bool quota_inode = false;
if (!check_only &&
- IS_INODE(page) && is_dent_dnode(page)) {
- err = f2fs_recover_inode_page(sbi, page);
- if (err)
+ IS_INODE(folio) &&
+ is_dent_dnode(folio)) {
+ err = f2fs_recover_inode_page(sbi, folio);
+ if (err) {
+ f2fs_folio_put(folio, true);
break;
+ }
quota_inode = true;
}
- /*
- * CP | dnode(F) | inode(DF)
- * For this case, we should not give up now.
- */
- entry = add_fsync_inode(sbi, head, ino_of_node(page),
+ entry = add_fsync_inode(sbi, head, ino_of_node(folio),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
+ /*
+ * CP | dnode(F) | inode(DF)
+ * For this case, we should not give up now.
+ */
if (err == -ENOENT) {
- err = 0;
+ if (check_only)
+ *new_inode = true;
goto next;
}
+ f2fs_folio_put(folio, true);
break;
}
}
entry->blkaddr = blkaddr;
- if (IS_INODE(page) && is_dent_dnode(page))
+ if (IS_INODE(folio) && is_dent_dnode(folio))
entry->last_dentry = blkaddr;
next:
- /* sanity check in order to detect looped node chain */
- if (++loop_cnt >= free_blocks ||
- blkaddr == next_blkaddr_of_node(page)) {
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "%s: detect looped node chain, "
- "blkaddr:%u, next:%u",
- __func__, blkaddr, next_blkaddr_of_node(page));
- err = -EINVAL;
- break;
- }
-
/* check next segment */
- blkaddr = next_blkaddr_of_node(page);
- f2fs_put_page(page, 1);
+ blkaddr = next_blkaddr_of_node(folio);
+ f2fs_folio_put(folio, true);
- f2fs_ra_meta_pages_cond(sbi, blkaddr);
+ err = sanity_check_node_chain(sbi, blkaddr, &blkaddr_fast,
+ &is_detecting);
+ if (err)
+ break;
}
- f2fs_put_page(page, 1);
return err;
}
@@ -400,11 +497,11 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
struct f2fs_summary_block *sum_node;
struct f2fs_summary sum;
- struct page *sum_page, *node_page;
+ struct folio *sum_folio, *node_folio;
struct dnode_of_data tdn = *dn;
nid_t ino, nid;
struct inode *inode;
- unsigned int offset;
+ unsigned int offset, ofs_in_node, max_addrs;
block_t bidx;
int i;
@@ -415,41 +512,52 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
/* Get the previous summary */
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
+
if (curseg->segno == segno) {
sum = curseg->sum_blk->entries[blkoff];
goto got_it;
}
}
- sum_page = f2fs_get_sum_page(sbi, segno);
- if (IS_ERR(sum_page))
- return PTR_ERR(sum_page);
- sum_node = (struct f2fs_summary_block *)page_address(sum_page);
+ sum_folio = f2fs_get_sum_folio(sbi, segno);
+ if (IS_ERR(sum_folio))
+ return PTR_ERR(sum_folio);
+ sum_node = SUM_BLK_PAGE_ADDR(sum_folio, segno);
sum = sum_node->entries[blkoff];
- f2fs_put_page(sum_page, 1);
+ f2fs_folio_put(sum_folio, true);
got_it:
/* Use the locked dnode page and inode */
nid = le32_to_cpu(sum.nid);
+ ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+
+ max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode);
+ if (ofs_in_node >= max_addrs) {
+ f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u",
+ ofs_in_node, dn->inode->i_ino, nid, max_addrs);
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUMMARY);
+ return -EFSCORRUPTED;
+ }
+
if (dn->inode->i_ino == nid) {
tdn.nid = nid;
- if (!dn->inode_page_locked)
- lock_page(dn->inode_page);
- tdn.node_page = dn->inode_page;
- tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+ if (!dn->inode_folio_locked)
+ folio_lock(dn->inode_folio);
+ tdn.node_folio = dn->inode_folio;
+ tdn.ofs_in_node = ofs_in_node;
goto truncate_out;
} else if (dn->nid == nid) {
- tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+ tdn.ofs_in_node = ofs_in_node;
goto truncate_out;
}
/* Get the node page */
- node_page = f2fs_get_node_page(sbi, nid);
- if (IS_ERR(node_page))
- return PTR_ERR(node_page);
+ node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
+ if (IS_ERR(node_folio))
+ return PTR_ERR(node_folio);
- offset = ofs_of_node(node_page);
- ino = ino_of_node(node_page);
- f2fs_put_page(node_page, 1);
+ offset = ofs_of_node(node_folio);
+ ino = ino_of_node(node_folio);
+ f2fs_folio_put(node_folio, true);
if (ino != dn->inode->i_ino) {
int ret;
@@ -459,7 +567,7 @@ got_it:
if (IS_ERR(inode))
return PTR_ERR(inode);
- ret = dquot_initialize(inode);
+ ret = f2fs_dquot_initialize(inode);
if (ret) {
iput(inode);
return ret;
@@ -475,8 +583,8 @@ got_it:
* if inode page is locked, unlock temporarily, but its reference
* count keeps alive.
*/
- if (ino == dn->inode->i_ino && dn->inode_page_locked)
- unlock_page(dn->inode_page);
+ if (ino == dn->inode->i_ino && dn->inode_folio_locked)
+ folio_unlock(dn->inode_folio);
set_new_dnode(&tdn, inode, NULL, NULL, 0);
if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
@@ -489,70 +597,108 @@ got_it:
out:
if (ino != dn->inode->i_ino)
iput(inode);
- else if (dn->inode_page_locked)
- lock_page(dn->inode_page);
+ else if (dn->inode_folio_locked)
+ folio_lock(dn->inode_folio);
return 0;
truncate_out:
- if (datablock_addr(tdn.inode, tdn.node_page,
- tdn.ofs_in_node) == blkaddr)
+ if (f2fs_data_blkaddr(&tdn) == blkaddr)
f2fs_truncate_data_blocks_range(&tdn, 1);
- if (dn->inode->i_ino == nid && !dn->inode_page_locked)
- unlock_page(dn->inode_page);
+ if (dn->inode->i_ino == nid && !dn->inode_folio_locked)
+ folio_unlock(dn->inode_folio);
return 0;
}
+static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn)
+{
+ int i, err = 0;
+
+ for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) {
+ err = f2fs_reserve_new_block(dn);
+ if (!err)
+ break;
+ }
+
+ return err;
+}
+
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
- struct page *page)
+ struct folio *folio)
{
struct dnode_of_data dn;
struct node_info ni;
- unsigned int start, end;
+ unsigned int start = 0, end = 0, index;
int err = 0, recovered = 0;
/* step 1: recover xattr */
- if (IS_INODE(page)) {
- f2fs_recover_inline_xattr(inode, page);
- } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
- err = f2fs_recover_xattr_data(inode, page);
+ if (IS_INODE(folio)) {
+ err = f2fs_recover_inline_xattr(inode, folio);
+ if (err)
+ goto out;
+ } else if (f2fs_has_xattr_block(ofs_of_node(folio))) {
+ err = f2fs_recover_xattr_data(inode, folio);
if (!err)
recovered++;
goto out;
}
/* step 2: recover inline data */
- if (f2fs_recover_inline_data(inode, page))
+ err = f2fs_recover_inline_data(inode, folio);
+ if (err) {
+ if (err == 1)
+ err = 0;
goto out;
+ }
/* step 3: recover data indices */
- start = f2fs_start_bidx_of_node(ofs_of_node(page), inode);
- end = start + ADDRS_PER_PAGE(page, inode);
+ start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode);
+ end = start + ADDRS_PER_PAGE(folio, inode);
set_new_dnode(&dn, inode, NULL, NULL, 0);
retry_dn:
err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(GFP_NOFS);
goto retry_dn;
}
goto out;
}
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
+ f2fs_folio_wait_writeback(dn.node_folio, NODE, true, true);
- err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (err)
goto err;
- f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
- f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
+ f2fs_bug_on(sbi, ni.ino != ino_of_node(folio));
- for (; start < end; start++, dn.ofs_in_node++) {
+ if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) {
+ f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
+ inode->i_ino, ofs_of_node(dn.node_folio),
+ ofs_of_node(folio));
+ err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
+ goto err;
+ }
+
+ for (index = start; index < end; index++, dn.ofs_in_node++) {
block_t src, dest;
- src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
- dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
+ src = f2fs_data_blkaddr(&dn);
+ dest = data_blkaddr(dn.inode, folio, dn.ofs_in_node);
+
+ if (__is_valid_data_blkaddr(src) &&
+ !f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
+ err = -EFSCORRUPTED;
+ goto err;
+ }
+
+ if (__is_valid_data_blkaddr(dest) &&
+ !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
+ err = -EFSCORRUPTED;
+ goto err;
+ }
/* skip recovering if dest is the same as src */
if (src == dest)
@@ -565,9 +711,9 @@ retry_dn:
}
if (!file_keep_isize(inode) &&
- (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
+ (i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT)))
f2fs_i_size_write(inode,
- (loff_t)(start + 1) << PAGE_SHIFT);
+ (loff_t)(index + 1) << PAGE_SHIFT);
/*
* dest is reserved block, invalidate src block
@@ -575,20 +721,17 @@ retry_dn:
*/
if (dest == NEW_ADDR) {
f2fs_truncate_data_blocks_range(&dn, 1);
- f2fs_reserve_new_block(&dn);
+
+ err = f2fs_reserve_new_block_retry(&dn);
+ if (err)
+ goto err;
continue;
}
/* dest is valid block, try to recover from src to dest */
if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
-
if (src == NULL_ADDR) {
- err = f2fs_reserve_new_block(&dn);
- while (err &&
- IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
- err = f2fs_reserve_new_block(&dn);
- /* We should not get -ENOSPC */
- f2fs_bug_on(sbi, err);
+ err = f2fs_reserve_new_block_retry(&dn);
if (err)
goto err;
}
@@ -597,12 +740,20 @@ retry_prev:
err = check_index_in_prev_nodes(sbi, dest, &dn);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(GFP_NOFS);
goto retry_prev;
}
goto err;
}
+ if (f2fs_is_valid_blkaddr(sbi, dest,
+ DATA_GENERIC_ENHANCE_UPDATE)) {
+ f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
+ dest, inode->i_ino, dn.ofs_in_node);
+ err = -EFSCORRUPTED;
+ goto err;
+ }
+
/* write dummy data page */
f2fs_replace_block(sbi, &dn, src, dest,
ni.version, false, false);
@@ -610,18 +761,18 @@ retry_prev:
}
}
- copy_node_footer(dn.node_page, page);
- fill_node_footer(dn.node_page, dn.nid, ni.ino,
- ofs_of_node(page), false);
- set_page_dirty(dn.node_page);
+ copy_node_footer(dn.node_folio, folio);
+ fill_node_footer(dn.node_folio, dn.nid, ni.ino,
+ ofs_of_node(folio), false);
+ folio_mark_dirty(dn.node_folio);
err:
f2fs_put_dnode(&dn);
out:
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
- inode->i_ino,
- file_keep_isize(inode) ? "keep" : "recover",
- recovered, err);
+ f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), "
+ "range (%u, %u), recovered = %d, err = %d",
+ inode->i_ino, nid_of_node(folio),
+ file_keep_isize(inode) ? "keep" : "recover",
+ start, end, recovered, err);
return err;
}
@@ -629,9 +780,17 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
struct list_head *tmp_inode_list, struct list_head *dir_list)
{
struct curseg_info *curseg;
- struct page *page = NULL;
int err = 0;
block_t blkaddr;
+ unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
+ unsigned int recoverable_dnode = 0;
+ unsigned int fsynced_dnode = 0;
+ unsigned int total_dnode = 0;
+ unsigned int recovered_inode = 0;
+ unsigned int recovered_dentry = 0;
+ unsigned int recovered_dnode = 0;
+
+ f2fs_notice(sbi, "do_recover_data: start to recover dnode");
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -639,103 +798,101 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
while (1) {
struct fsync_inode_entry *entry;
+ struct folio *folio;
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
- f2fs_ra_meta_pages_cond(sbi, blkaddr);
-
- page = f2fs_get_tmp_page(sbi, blkaddr);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
+ folio = f2fs_get_tmp_folio(sbi, blkaddr);
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
break;
}
- if (!is_recoverable_dnode(page)) {
- f2fs_put_page(page, 1);
+ if (!is_recoverable_dnode(folio)) {
+ f2fs_folio_put(folio, true);
break;
}
+ recoverable_dnode++;
- entry = get_fsync_inode(inode_list, ino_of_node(page));
+ entry = get_fsync_inode(inode_list, ino_of_node(folio));
if (!entry)
goto next;
+ fsynced_dnode++;
/*
* inode(x) | CP | inode(x) | dnode(F)
* In this case, we can lose the latest inode(x).
* So, call recover_inode for the inode update.
*/
- if (IS_INODE(page)) {
- err = recover_inode(entry->inode, page);
- if (err)
+ if (IS_INODE(folio)) {
+ err = recover_inode(entry->inode, folio);
+ if (err) {
+ f2fs_folio_put(folio, true);
break;
+ }
+ recovered_inode++;
}
if (entry->last_dentry == blkaddr) {
- err = recover_dentry(entry->inode, page, dir_list);
+ err = recover_dentry(entry->inode, folio, dir_list);
if (err) {
- f2fs_put_page(page, 1);
+ f2fs_folio_put(folio, true);
break;
}
+ recovered_dentry++;
}
- err = do_recover_data(sbi, entry->inode, page);
+ err = do_recover_data(sbi, entry->inode, folio);
if (err) {
- f2fs_put_page(page, 1);
+ f2fs_folio_put(folio, true);
break;
}
+ recovered_dnode++;
if (entry->blkaddr == blkaddr)
list_move_tail(&entry->list, tmp_inode_list);
next:
+ ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
+ next_blkaddr_of_node(folio));
+
/* check next segment */
- blkaddr = next_blkaddr_of_node(page);
- f2fs_put_page(page, 1);
+ blkaddr = next_blkaddr_of_node(folio);
+ f2fs_folio_put(folio, true);
+
+ f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
+ total_dnode++;
}
if (!err)
- f2fs_allocate_new_segments(sbi);
+ err = f2fs_allocate_new_segments(sbi);
+
+ f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, "
+ "total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d",
+ recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode,
+ recovered_dentry, recovered_dnode, err);
return err;
}
int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
- struct list_head inode_list, tmp_inode_list;
- struct list_head dir_list;
+ LIST_HEAD(inode_list);
+ LIST_HEAD(tmp_inode_list);
+ LIST_HEAD(dir_list);
int err;
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
-#ifdef CONFIG_QUOTA
- int quota_enabled;
-#endif
-
- if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "recover fsync data on readonly fs");
- sbi->sb->s_flags &= ~SB_RDONLY;
- }
-
-#ifdef CONFIG_QUOTA
- /* Needed for iput() to work correctly and not trash data */
- sbi->sb->s_flags |= SB_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
- quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
-#endif
+ bool new_inode = false;
- fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
- sizeof(struct fsync_inode_entry));
- if (!fsync_entry_slab) {
- err = -ENOMEM;
- goto out;
- }
+ f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
+ "check_only: %d", check_only);
- INIT_LIST_HEAD(&inode_list);
- INIT_LIST_HEAD(&tmp_inode_list);
- INIT_LIST_HEAD(&dir_list);
+ if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE))
+ f2fs_info(sbi, "recover fsync data on readonly fs");
/* prevent checkpoint */
- mutex_lock(&sbi->cp_mutex);
+ f2fs_down_write(&sbi->cp_global_sem);
/* step #1: find fsynced inode numbers */
- err = find_fsync_dnodes(sbi, &inode_list, check_only);
- if (err || list_empty(&inode_list))
+ err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode);
+ if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode)))
goto skip;
if (check_only) {
@@ -749,10 +906,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
- else {
- /* restore s_flags to let iput() trash data */
- sbi->sb->s_flags = s_flags;
- }
+ else
+ f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
skip:
destroy_fsync_dnodes(&inode_list, err);
destroy_fsync_dnodes(&tmp_inode_list, err);
@@ -764,10 +919,20 @@ skip:
if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
truncate_inode_pages_final(META_MAPPING(sbi));
- } else {
- clear_sbi_flag(sbi, SBI_POR_DOING);
}
- mutex_unlock(&sbi->cp_mutex);
+
+ /*
+ * If fsync data succeeds or there is no fsync data to recover,
+ * and the f2fs is not read only, check and fix zoned block devices'
+ * write pointer consistency.
+ */
+ if (!err)
+ err = f2fs_check_and_fix_write_pointer(sbi);
+
+ if (!err)
+ clear_sbi_flag(sbi, SBI_POR_DOING);
+
+ f2fs_up_write(&sbi->cp_global_sem);
/* let's drop all the directory inodes for clean checkpoint */
destroy_fsync_dnodes(&dir_list, err);
@@ -779,18 +944,24 @@ skip:
struct cp_control cpc = {
.reason = CP_RECOVERY,
};
+ stat_inc_cp_call_count(sbi, TOTAL_CALL);
err = f2fs_write_checkpoint(sbi, &cpc);
}
}
- kmem_cache_destroy(fsync_entry_slab);
-out:
-#ifdef CONFIG_QUOTA
- /* Turn quotas off */
- if (quota_enabled)
- f2fs_quota_off_umount(sbi->sb);
-#endif
sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
- return ret ? ret: err;
+ return ret ? ret : err;
+}
+
+int __init f2fs_create_recovery_cache(void)
+{
+ fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
+ sizeof(struct fsync_inode_entry));
+ return fsync_entry_slab ? 0 : -ENOMEM;
+}
+
+void f2fs_destroy_recovery_cache(void)
+{
+ kmem_cache_destroy(fsync_entry_slab);
}