diff options
Diffstat (limited to 'fs/f2fs')
| -rw-r--r-- | fs/f2fs/Kconfig | 3 | ||||
| -rw-r--r-- | fs/f2fs/acl.c | 42 | ||||
| -rw-r--r-- | fs/f2fs/acl.h | 10 | ||||
| -rw-r--r-- | fs/f2fs/checkpoint.c | 477 | ||||
| -rw-r--r-- | fs/f2fs/compress.c | 616 | ||||
| -rw-r--r-- | fs/f2fs/data.c | 1493 | ||||
| -rw-r--r-- | fs/f2fs/debug.c | 206 | ||||
| -rw-r--r-- | fs/f2fs/dir.c | 426 | ||||
| -rw-r--r-- | fs/f2fs/extent_cache.c | 261 | ||||
| -rw-r--r-- | fs/f2fs/f2fs.h | 1403 | ||||
| -rw-r--r-- | fs/f2fs/file.c | 1370 | ||||
| -rw-r--r-- | fs/f2fs/gc.c | 696 | ||||
| -rw-r--r-- | fs/f2fs/gc.h | 38 | ||||
| -rw-r--r-- | fs/f2fs/inline.c | 379 | ||||
| -rw-r--r-- | fs/f2fs/inode.c | 439 | ||||
| -rw-r--r-- | fs/f2fs/namei.c | 354 | ||||
| -rw-r--r-- | fs/f2fs/node.c | 1239 | ||||
| -rw-r--r-- | fs/f2fs/node.h | 95 | ||||
| -rw-r--r-- | fs/f2fs/recovery.c | 341 | ||||
| -rw-r--r-- | fs/f2fs/segment.c | 1516 | ||||
| -rw-r--r-- | fs/f2fs/segment.h | 315 | ||||
| -rw-r--r-- | fs/f2fs/shrinker.c | 99 | ||||
| -rw-r--r-- | fs/f2fs/super.c | 3263 | ||||
| -rw-r--r-- | fs/f2fs/sysfs.c | 596 | ||||
| -rw-r--r-- | fs/f2fs/verity.c | 31 | ||||
| -rw-r--r-- | fs/f2fs/xattr.c | 199 | ||||
| -rw-r--r-- | fs/f2fs/xattr.h | 32 |
27 files changed, 9595 insertions, 6344 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 68a1e23e1557..5916a02fb46d 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -4,8 +4,7 @@ config F2FS_FS depends on BLOCK select BUFFER_HEAD select NLS - select CRYPTO - select CRYPTO_CRC32 + select CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION select FS_ENCRYPTION_ALGS if FS_ENCRYPTION select FS_IOMAP diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index ec2aeccb69a3..fa8d81a30fb9 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -9,6 +9,7 @@ * * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> */ +#include <linux/fs_struct.h> #include <linux/f2fs_fs.h> #include "f2fs.h" #include "xattr.h" @@ -166,7 +167,7 @@ fail: } static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, - struct page *dpage) + struct folio *dfolio) { int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; void *value = NULL; @@ -176,13 +177,13 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, if (type == ACL_TYPE_ACCESS) name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); + retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dfolio); if (retval > 0) { value = f2fs_kmalloc(F2FS_I_SB(inode), retval, GFP_F2FS_ZERO); if (!value) return ERR_PTR(-ENOMEM); retval = f2fs_getxattr(inode, name_index, "", value, - retval, dpage); + retval, dfolio); } if (retval > 0) @@ -219,8 +220,7 @@ static int f2fs_acl_update_mode(struct mnt_idmap *idmap, return error; if (error == 0) *acl = NULL; - if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) && - !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) + if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; *mode_p = mode; return 0; @@ -228,7 +228,7 @@ static int f2fs_acl_update_mode(struct mnt_idmap *idmap, static int __f2fs_set_acl(struct mnt_idmap *idmap, struct inode *inode, int type, - struct posix_acl *acl, struct page *ipage) + struct posix_acl *acl, struct folio *ifolio) { int name_index; void *value = NULL; @@ -239,9 +239,8 @@ static int __f2fs_set_acl(struct mnt_idmap *idmap, switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl && !ipage) { - error = f2fs_acl_update_mode(idmap, inode, - &mode, &acl); + if (acl && !ifolio) { + error = f2fs_acl_update_mode(idmap, inode, &mode, &acl); if (error) return error; set_acl_inode(inode, mode); @@ -266,7 +265,7 @@ static int __f2fs_set_acl(struct mnt_idmap *idmap, } } - error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); + error = f2fs_setxattr(inode, name_index, "", value, size, ifolio, 0); kfree(value); if (!error) @@ -297,9 +296,8 @@ static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, struct posix_acl *clone = NULL; if (acl) { - int size = sizeof(struct posix_acl) + acl->a_count * - sizeof(struct posix_acl_entry); - clone = kmemdup(acl, size, flags); + clone = kmemdup(acl, struct_size(acl, a_entries, acl->a_count), + flags); if (clone) refcount_set(&clone->a_refcount, 1); } @@ -362,7 +360,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) static int f2fs_acl_create(struct inode *dir, umode_t *mode, struct posix_acl **default_acl, struct posix_acl **acl, - struct page *dpage) + struct folio *dfolio) { struct posix_acl *p; struct posix_acl *clone; @@ -374,7 +372,7 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) return 0; - p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); + p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dfolio); if (!p || p == ERR_PTR(-EOPNOTSUPP)) { *mode &= ~current_umask(); return 0; @@ -411,29 +409,29 @@ release_acl: return ret; } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, - struct page *dpage) +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct folio *ifolio, + struct folio *dfolio) { struct posix_acl *default_acl = NULL, *acl = NULL; int error; - error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage); + error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dfolio); if (error) return error; f2fs_mark_inode_dirty_sync(inode, true); if (default_acl) { - error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, default_acl, - ipage); + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, + default_acl, ifolio); posix_acl_release(default_acl); } else { inode->i_default_acl = NULL; } if (acl) { if (!error) - error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, acl, - ipage); + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, + acl, ifolio); posix_acl_release(acl); } else { inode->i_acl = NULL; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 94ebfbfbdc6f..20e87e63c089 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -33,17 +33,17 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL -extern struct posix_acl *f2fs_get_acl(struct inode *, int, bool); -extern int f2fs_set_acl(struct mnt_idmap *, struct dentry *, +struct posix_acl *f2fs_get_acl(struct inode *, int, bool); +int f2fs_set_acl(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); -extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, - struct page *); +int f2fs_init_acl(struct inode *, struct inode *, struct folio *ifolio, + struct folio *dfolio); #else #define f2fs_get_acl NULL #define f2fs_set_acl NULL static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, - struct page *ipage, struct page *dpage) + struct folio *ifolio, struct folio *dfolio) { return 0; } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8fd3b7f9fb88..300664269eb6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -21,7 +21,7 @@ #include "iostat.h" #include <trace/events/f2fs.h> -#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) +#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 3)) static struct kmem_cache *ino_entry_slab; struct kmem_cache *f2fs_inode_entry_slab; @@ -29,36 +29,36 @@ struct kmem_cache *f2fs_inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, unsigned char reason) { - f2fs_build_fault_attr(sbi, 0, 0); + f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL); if (!end_io) f2fs_flush_merged_writes(sbi); - f2fs_handle_critical_error(sbi, reason, end_io); + f2fs_handle_critical_error(sbi, reason); } /* * We guarantee no failure on the returned page. */ -struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct folio *f2fs_grab_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); - struct page *page; + struct folio *folio; repeat: - page = f2fs_grab_cache_page(mapping, index, false); - if (!page) { + folio = f2fs_grab_cache_folio(mapping, index, false); + if (IS_ERR(folio)) { cond_resched(); goto repeat; } - f2fs_wait_on_page_writeback(page, META, true, true); - if (!PageUptodate(page)) - SetPageUptodate(page); - return page; + f2fs_folio_wait_writeback(folio, META, true, true); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + return folio; } -static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, +static struct folio *__get_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index, bool is_meta) { struct address_space *mapping = META_MAPPING(sbi); - struct page *page; + struct folio *folio; struct f2fs_io_info fio = { .sbi = sbi, .type = META, @@ -74,64 +74,64 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, if (unlikely(!is_meta)) fio.op_flags &= ~REQ_META; repeat: - page = f2fs_grab_cache_page(mapping, index, false); - if (!page) { + folio = f2fs_grab_cache_folio(mapping, index, false); + if (IS_ERR(folio)) { cond_resched(); goto repeat; } - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) goto out; - fio.page = page; + fio.folio = folio; err = f2fs_submit_page_bio(&fio); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return ERR_PTR(err); } f2fs_update_iostat(sbi, NULL, FS_META_READ_IO, F2FS_BLKSIZE); - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); + folio_lock(folio); + if (unlikely(!is_meta_folio(folio))) { + f2fs_folio_put(folio, true); goto repeat; } - if (unlikely(!PageUptodate(page))) { - f2fs_handle_page_eio(sbi, page->index, META); - f2fs_put_page(page, 1); + if (unlikely(!folio_test_uptodate(folio))) { + f2fs_handle_page_eio(sbi, folio, META); + f2fs_folio_put(folio, true); return ERR_PTR(-EIO); } out: - return page; + return folio; } -struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct folio *f2fs_get_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index) { - return __get_meta_page(sbi, index, true); + return __get_meta_folio(sbi, index, true); } -struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index) +struct folio *f2fs_get_meta_folio_retry(struct f2fs_sb_info *sbi, pgoff_t index) { - struct page *page; + struct folio *folio; int count = 0; retry: - page = __get_meta_page(sbi, index, true); - if (IS_ERR(page)) { - if (PTR_ERR(page) == -EIO && + folio = __get_meta_folio(sbi, index, true); + if (IS_ERR(folio)) { + if (PTR_ERR(folio) == -EIO && ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_META_PAGE); } - return page; + return folio; } /* for POR only */ -struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct folio *f2fs_get_tmp_folio(struct f2fs_sb_info *sbi, pgoff_t index) { - return __get_meta_page(sbi, index, false); + return __get_meta_folio(sbi, index, false); } static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, @@ -154,49 +154,47 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, if (unlikely(f2fs_cp_error(sbi))) return exist; - if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) { - f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", - blkaddr, exist); - set_sbi_flag(sbi, SBI_NEED_FSCK); - return exist; - } + if ((exist && type == DATA_GENERIC_ENHANCE_UPDATE) || + (!exist && type == DATA_GENERIC_ENHANCE)) + goto out_err; + if (!exist && type != DATA_GENERIC_ENHANCE_UPDATE) + goto out_handle; + return exist; - if (!exist && type == DATA_GENERIC_ENHANCE) { - f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", - blkaddr, exist); - set_sbi_flag(sbi, SBI_NEED_FSCK); - dump_stack(); - } +out_err: + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + dump_stack(); +out_handle: + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return exist; } -bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, +static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { - if (time_to_inject(sbi, FAULT_BLKADDR)) - return false; - switch (type) { case META_NAT: break; case META_SIT: if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) - return false; + goto check_only; break; case META_SSA: if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || blkaddr < SM_I(sbi)->ssa_blkaddr)) - return false; + goto check_only; break; case META_CP: if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || blkaddr < __start_cp_addr(sbi))) - return false; + goto check_only; break; case META_POR: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) - return false; + goto check_only; break; case DATA_GENERIC: case DATA_GENERIC_ENHANCE: @@ -213,7 +211,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, blkaddr); set_sbi_flag(sbi, SBI_NEED_FSCK); dump_stack(); - return false; + goto err; } else { return __is_bitmap_valid(sbi, blkaddr, type); } @@ -221,13 +219,31 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case META_GENERIC: if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || blkaddr >= MAIN_BLKADDR(sbi))) - return false; + goto err; break; default: BUG(); } return true; +err: + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); +check_only: + return false; +} + +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY)) + return false; + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type); +} + +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type); } /* @@ -236,7 +252,6 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync) { - struct page *page; block_t blkno = start; struct f2fs_io_info fio = { .sbi = sbi, @@ -255,6 +270,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { + struct folio *folio; if (!f2fs_is_valid_blkaddr(sbi, blkno, type)) goto out; @@ -284,18 +300,18 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, BUG(); } - page = f2fs_grab_cache_page(META_MAPPING(sbi), + folio = f2fs_grab_cache_folio(META_MAPPING(sbi), fio.new_blkaddr, false); - if (!page) + if (IS_ERR(folio)) continue; - if (PageUptodate(page)) { - f2fs_put_page(page, 1); + if (folio_test_uptodate(folio)) { + f2fs_folio_put(folio, true); continue; } - fio.page = page; + fio.folio = folio; err = f2fs_submit_page_bio(&fio); - f2fs_put_page(page, err ? 1 : 0); + f2fs_folio_put(folio, err ? true : false); if (!err) f2fs_update_iostat(sbi, NULL, FS_META_READ_IO, @@ -309,65 +325,54 @@ out: void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, unsigned int ra_blocks) { - struct page *page; + struct folio *folio; bool readahead = false; if (ra_blocks == RECOVERY_MIN_RA_BLOCKS) return; - page = find_get_page(META_MAPPING(sbi), index); - if (!page || !PageUptodate(page)) + folio = filemap_get_folio(META_MAPPING(sbi), index); + if (IS_ERR(folio) || !folio_test_uptodate(folio)) readahead = true; - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); if (readahead) f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true); } -static int __f2fs_write_meta_page(struct page *page, +static bool __f2fs_write_meta_folio(struct folio *folio, struct writeback_control *wbc, enum iostat_type io_type) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); - trace_f2fs_writepage(page, META); + trace_f2fs_writepage(folio, META); if (unlikely(f2fs_cp_error(sbi))) { if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_META); - unlock_page(page); - return 0; + folio_unlock(folio); + return true; } goto redirty_out; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) - goto redirty_out; - f2fs_do_write_meta_page(sbi, page, io_type); + f2fs_do_write_meta_page(sbi, folio, io_type); dec_page_count(sbi, F2FS_DIRTY_META); - if (wbc->for_reclaim) - f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); - - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_write(sbi, META); - return 0; + return true; redirty_out: - redirty_page_for_writepage(wbc, page); - return AOP_WRITEPAGE_ACTIVATE; -} - -static int f2fs_write_meta_page(struct page *page, - struct writeback_control *wbc) -{ - return __f2fs_write_meta_page(page, wbc, FS_META_IO); + folio_redirty_for_writepage(wbc, folio); + return false; } static int f2fs_write_meta_pages(struct address_space *mapping, @@ -410,9 +415,7 @@ long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, struct folio_batch fbatch; long nwritten = 0; int nr_folios; - struct writeback_control wbc = { - .for_reclaim = 0, - }; + struct writeback_control wbc = {}; struct blk_plug plug; folio_batch_init(&fbatch); @@ -436,7 +439,7 @@ long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, folio_lock(folio); - if (unlikely(folio->mapping != mapping)) { + if (unlikely(!is_meta_folio(folio))) { continue_unlock: folio_unlock(folio); continue; @@ -446,13 +449,12 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(&folio->page, META, - true, true); + f2fs_folio_wait_writeback(folio, META, true, true); if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - if (__f2fs_write_meta_page(&folio->page, &wbc, + if (!__f2fs_write_meta_folio(folio, &wbc, io_type)) { folio_unlock(folio); break; @@ -477,20 +479,19 @@ stop: static bool f2fs_dirty_meta_folio(struct address_space *mapping, struct folio *folio) { - trace_f2fs_set_page_dirty(&folio->page, META); + trace_f2fs_set_page_dirty(folio, META); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_META); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); return true; } return false; } const struct address_space_operations f2fs_meta_aops = { - .writepage = f2fs_write_meta_page, .writepages = f2fs_write_meta_pages, .dirty_folio = f2fs_dirty_meta_folio, .invalidate_folio = f2fs_invalidate_folio, @@ -503,6 +504,7 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, { struct inode_management *im = &sbi->im[type]; struct ino_entry *e = NULL, *new = NULL; + int ret; if (type == FLUSH_INO) { rcu_read_lock(); @@ -515,7 +517,8 @@ retry: new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS, true, NULL); - radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); + ret = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); + f2fs_bug_on(sbi, ret); spin_lock(&im->ino_lock); e = radix_tree_lookup(&im->ino_root, ino); @@ -740,26 +743,26 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); for (i = 0; i < orphan_blocks; i++) { - struct page *page; + struct folio *folio; struct f2fs_orphan_block *orphan_blk; - page = f2fs_get_meta_page(sbi, start_blk + i); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_meta_folio(sbi, start_blk + i); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto out; } - orphan_blk = (struct f2fs_orphan_block *)page_address(page); + orphan_blk = folio_address(folio); for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { nid_t ino = le32_to_cpu(orphan_blk->ino[j]); err = recover_orphan_inode(sbi, ino); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); goto out; } } - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); @@ -776,7 +779,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) unsigned int nentries = 0; unsigned short index = 1; unsigned short orphan_blocks; - struct page *page = NULL; + struct folio *folio = NULL; struct ino_entry *orphan = NULL; struct inode_management *im = &sbi->im[ORPHAN_INO]; @@ -791,10 +794,9 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) /* loop for each orphan inode entry and write them in journal block */ list_for_each_entry(orphan, head, list) { - if (!page) { - page = f2fs_grab_meta_page(sbi, start_blk++); - orphan_blk = - (struct f2fs_orphan_block *)page_address(page); + if (!folio) { + folio = f2fs_grab_meta_folio(sbi, start_blk++); + orphan_blk = folio_address(folio); memset(orphan_blk, 0, sizeof(*orphan_blk)); } @@ -809,62 +811,61 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) orphan_blk->blk_addr = cpu_to_le16(index); orphan_blk->blk_count = cpu_to_le16(orphan_blocks); orphan_blk->entry_count = cpu_to_le32(nentries); - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); index++; nentries = 0; - page = NULL; + folio = NULL; } } - if (page) { + if (folio) { orphan_blk->blk_addr = cpu_to_le16(index); orphan_blk->blk_count = cpu_to_le16(orphan_blocks); orphan_blk->entry_count = cpu_to_le32(nentries); - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } } -static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi, - struct f2fs_checkpoint *ckpt) +static __u32 f2fs_checkpoint_chksum(struct f2fs_checkpoint *ckpt) { unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset); __u32 chksum; - chksum = f2fs_crc32(sbi, ckpt, chksum_ofs); + chksum = f2fs_crc32(ckpt, chksum_ofs); if (chksum_ofs < CP_CHKSUM_OFFSET) { chksum_ofs += sizeof(chksum); - chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs, - F2FS_BLKSIZE - chksum_ofs); + chksum = f2fs_chksum(chksum, (__u8 *)ckpt + chksum_ofs, + F2FS_BLKSIZE - chksum_ofs); } return chksum; } static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, - struct f2fs_checkpoint **cp_block, struct page **cp_page, + struct f2fs_checkpoint **cp_block, struct folio **cp_folio, unsigned long long *version) { size_t crc_offset = 0; __u32 crc; - *cp_page = f2fs_get_meta_page(sbi, cp_addr); - if (IS_ERR(*cp_page)) - return PTR_ERR(*cp_page); + *cp_folio = f2fs_get_meta_folio(sbi, cp_addr); + if (IS_ERR(*cp_folio)) + return PTR_ERR(*cp_folio); - *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); + *cp_block = folio_address(*cp_folio); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); if (crc_offset < CP_MIN_CHKSUM_OFFSET || crc_offset > CP_CHKSUM_OFFSET) { - f2fs_put_page(*cp_page, 1); + f2fs_folio_put(*cp_folio, true); f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } - crc = f2fs_checkpoint_chksum(sbi, *cp_block); + crc = f2fs_checkpoint_chksum(*cp_block); if (crc != cur_cp_crc(*cp_block)) { - f2fs_put_page(*cp_page, 1); + f2fs_folio_put(*cp_folio, true); f2fs_warn(sbi, "invalid crc value"); return -EINVAL; } @@ -873,23 +874,23 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, return 0; } -static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, +static struct folio *validate_checkpoint(struct f2fs_sb_info *sbi, block_t cp_addr, unsigned long long *version) { - struct page *cp_page_1 = NULL, *cp_page_2 = NULL; + struct folio *cp_folio_1 = NULL, *cp_folio_2 = NULL; struct f2fs_checkpoint *cp_block = NULL; unsigned long long cur_version = 0, pre_version = 0; unsigned int cp_blocks; int err; err = get_checkpoint_version(sbi, cp_addr, &cp_block, - &cp_page_1, version); + &cp_folio_1, version); if (err) return NULL; cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count); - if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) { + if (cp_blocks > BLKS_PER_SEG(sbi) || cp_blocks <= F2FS_CP_PACKS) { f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; @@ -898,19 +899,19 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, cp_addr += cp_blocks - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, - &cp_page_2, version); + &cp_folio_2, version); if (err) goto invalid_cp; cur_version = *version; if (cur_version == pre_version) { *version = cur_version; - f2fs_put_page(cp_page_2, 1); - return cp_page_1; + f2fs_folio_put(cp_folio_2, true); + return cp_folio_1; } - f2fs_put_page(cp_page_2, 1); + f2fs_folio_put(cp_folio_2, true); invalid_cp: - f2fs_put_page(cp_page_1, 1); + f2fs_folio_put(cp_folio_1, true); return NULL; } @@ -918,7 +919,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *cp_block; struct f2fs_super_block *fsb = sbi->raw_super; - struct page *cp1, *cp2, *cur_page; + struct folio *cp1, *cp2, *cur_folio; unsigned long blk_size = sbi->blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; @@ -945,22 +946,22 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) if (cp1 && cp2) { if (ver_after(cp2_version, cp1_version)) - cur_page = cp2; + cur_folio = cp2; else - cur_page = cp1; + cur_folio = cp1; } else if (cp1) { - cur_page = cp1; + cur_folio = cp1; } else if (cp2) { - cur_page = cp2; + cur_folio = cp2; } else { err = -EFSCORRUPTED; goto fail_no_cp; } - cp_block = (struct f2fs_checkpoint *)page_address(cur_page); + cp_block = folio_address(cur_folio); memcpy(sbi->ckpt, cp_block, blk_size); - if (cur_page == cp1) + if (cur_folio == cp1) sbi->cur_cp_pack = 1; else sbi->cur_cp_pack = 2; @@ -975,30 +976,30 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) goto done; cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); - if (cur_page == cp2) + if (cur_folio == cp2) cp_blk_no += BIT(le32_to_cpu(fsb->log_blocks_per_seg)); for (i = 1; i < cp_blks; i++) { void *sit_bitmap_ptr; unsigned char *ckpt = (unsigned char *)sbi->ckpt; - cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); - if (IS_ERR(cur_page)) { - err = PTR_ERR(cur_page); + cur_folio = f2fs_get_meta_folio(sbi, cp_blk_no + i); + if (IS_ERR(cur_folio)) { + err = PTR_ERR(cur_folio); goto free_fail_no_cp; } - sit_bitmap_ptr = page_address(cur_page); + sit_bitmap_ptr = folio_address(cur_folio); memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); - f2fs_put_page(cur_page, 1); + f2fs_folio_put(cur_folio, true); } done: - f2fs_put_page(cp1, 1); - f2fs_put_page(cp2, 1); + f2fs_folio_put(cp1, true); + f2fs_folio_put(cp2, true); return 0; free_fail_no_cp: - f2fs_put_page(cp1, 1); - f2fs_put_page(cp2, 1); + f2fs_folio_put(cp1, true); + f2fs_folio_put(cp2, true); fail_no_cp: kvfree(sbi->ckpt); return err; @@ -1044,7 +1045,7 @@ void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); } void f2fs_remove_dirty_inode(struct inode *inode) @@ -1170,6 +1171,11 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); ckpt->next_free_nid = cpu_to_le32(last_nid); + + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + percpu_counter_set(&sbi->alloc_valid_block_count, 0); + percpu_counter_set(&sbi->rf_node_block_count, 0); } static bool __need_flush_quota(struct f2fs_sb_info *sbi) @@ -1203,7 +1209,6 @@ static int block_operations(struct f2fs_sb_info *sbi) struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, - .for_reclaim = 0, }; int err = 0, cnt = 0; @@ -1215,7 +1220,7 @@ static int block_operations(struct f2fs_sb_info *sbi) retry_flush_quotas: f2fs_lock_all(sbi); if (__need_flush_quota(sbi)) { - int locked; + bool need_lock = sbi->umount_lock_holder != current; if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); @@ -1224,11 +1229,13 @@ retry_flush_quotas: } f2fs_unlock_all(sbi); - /* only failed during mount/umount/freeze/quotactl */ - locked = down_read_trylock(&sbi->sb->s_umount); - f2fs_quota_sync(sbi->sb, -1); - if (locked) + /* don't grab s_umount lock during mount/umount/remount/freeze/quotactl */ + if (!need_lock) { + f2fs_do_quota_sync(sbi->sb, -1); + } else if (down_read_trylock(&sbi->sb->s_umount)) { + f2fs_do_quota_sync(sbi->sb, -1); up_read(&sbi->sb->s_umount); + } cond_resched(); goto retry_flush_quotas; } @@ -1311,7 +1318,7 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) f2fs_submit_merged_write(sbi, DATA); prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); - io_schedule_timeout(DEFAULT_IO_TIMEOUT); + io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait); } @@ -1322,21 +1329,13 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long flags; - if (cpc->reason & CP_UMOUNT) { - if (le32_to_cpu(ckpt->cp_pack_total_block_count) + - NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) { - clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG); - f2fs_notice(sbi, "Disable nat_bits due to no space"); - } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) && - f2fs_nat_bitmap_enabled(sbi)) { - f2fs_enable_nat_bits(sbi); - set_ckpt_flags(sbi, CP_NAT_BITS_FLAG); - f2fs_notice(sbi, "Rebuild and enable nat_bits"); - } - } - spin_lock_irqsave(&sbi->cp_lock, flags); + if ((cpc->reason & CP_UMOUNT) && + le32_to_cpu(ckpt->cp_pack_total_block_count) > + sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) + disable_nat_bits(sbi, false); + if (cpc->reason & CP_TRIMMED) __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); else @@ -1393,35 +1392,31 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) static void commit_checkpoint(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { - struct writeback_control wbc = { - .for_reclaim = 0, - }; + struct writeback_control wbc = {}; /* - * filemap_get_folios_tag and lock_page again will take + * filemap_get_folios_tag and folio_lock again will take * some extra time. Therefore, f2fs_update_meta_pages and * f2fs_sync_meta_pages are combined in this function. */ - struct page *page = f2fs_grab_meta_page(sbi, blk_addr); - int err; + struct folio *folio = f2fs_grab_meta_folio(sbi, blk_addr); - f2fs_wait_on_page_writeback(page, META, true, true); + memcpy(folio_address(folio), src, PAGE_SIZE); - memcpy(page_address(page), src, PAGE_SIZE); - - set_page_dirty(page); - if (unlikely(!clear_page_dirty_for_io(page))) + folio_mark_dirty(folio); + if (unlikely(!folio_clear_dirty_for_io(folio))) f2fs_bug_on(sbi, 1); /* writeout cp pack 2 page */ - err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); - if (unlikely(err && f2fs_cp_error(sbi))) { - f2fs_put_page(page, 1); - return; + if (unlikely(!__f2fs_write_meta_folio(folio, &wbc, FS_CP_META_IO))) { + if (f2fs_cp_error(sbi)) { + f2fs_folio_put(folio, true); + return; + } + f2fs_bug_on(sbi, true); } - f2fs_bug_on(sbi, err); - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); /* submit checkpoint (with barrier if NOBARRIER is not set) */ f2fs_submit_merged_write(sbi, META_FLUSH); @@ -1447,6 +1442,34 @@ u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi) return get_sectors_written(sbi->sb->s_bdev); } +static inline void stat_cp_time(struct cp_control *cpc, enum cp_time type) +{ + cpc->stats.times[type] = ktime_get(); +} + +static inline void check_cp_time(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ + unsigned long long sb_diff, cur_diff; + enum cp_time ct; + + sb_diff = (u64)ktime_ms_delta(sbi->cp_stats.times[CP_TIME_END], + sbi->cp_stats.times[CP_TIME_START]); + cur_diff = (u64)ktime_ms_delta(cpc->stats.times[CP_TIME_END], + cpc->stats.times[CP_TIME_START]); + + if (cur_diff > sb_diff) { + sbi->cp_stats = cpc->stats; + if (cur_diff < CP_LONG_LATENCY_THRESHOLD) + return; + + f2fs_warn(sbi, "checkpoint was blocked for %llu ms", cur_diff); + for (ct = CP_TIME_START; ct < CP_TIME_MAX - 1; ct++) + f2fs_warn(sbi, "Step#%d: %llu ms", ct, + (u64)ktime_ms_delta(cpc->stats.times[ct + 1], + cpc->stats.times[ct])); + } +} + static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1464,6 +1487,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + stat_cp_time(cpc, CP_TIME_SYNC_META); + /* start to update checkpoint, cp ver is already updated previously */ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); @@ -1511,7 +1536,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); - crc32 = f2fs_checkpoint_chksum(sbi, ckpt); + crc32 = f2fs_checkpoint_chksum(ckpt); *((__le32 *)((unsigned char *)ckpt + le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); @@ -1519,18 +1544,17 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk = __start_cp_next_addr(sbi); /* write nat bits */ - if ((cpc->reason & CP_UMOUNT) && - is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) { + if (enabled_nat_bits(sbi, cpc)) { __u64 cp_ver = cur_cp_version(ckpt); block_t blk; cp_ver |= ((__u64)crc32 << 32); *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver); - blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks; + blk = start_blk + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) f2fs_update_meta_page(sbi, nm_i->nat_bits + - (i << F2FS_BLKSIZE_BITS), blk + i); + F2FS_BLK_TO_BYTES(i), blk + i); } /* write out checkpoint buffer at block 0 */ @@ -1559,27 +1583,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += NR_CURSEG_NODE_TYPE; } - /* update user_block_counts */ - sbi->last_valid_block_count = sbi->total_valid_block_count; - percpu_counter_set(&sbi->alloc_valid_block_count, 0); - percpu_counter_set(&sbi->rf_node_block_count, 0); - /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + stat_cp_time(cpc, CP_TIME_SYNC_CP_META); + /* Wait for all dirty meta pages to be submitted for IO */ f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META); + stat_cp_time(cpc, CP_TIME_WAIT_DIRTY_META); /* wait for previous submitted meta pages writeback */ f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); + stat_cp_time(cpc, CP_TIME_WAIT_CP_DATA); /* flush all device cache */ err = f2fs_flush_device_cache(sbi); if (err) return err; + stat_cp_time(cpc, CP_TIME_FLUSH_DEVICE); /* barrier and flush checkpoint cp pack 2 page if it can */ commit_checkpoint(sbi, ckpt, start_blk); f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); + stat_cp_time(cpc, CP_TIME_WAIT_LAST_CP); /* * invalidate intermediate page cache borrowed from meta inode which are @@ -1587,8 +1612,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) */ if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) || f2fs_sb_has_compression(sbi)) - invalidate_mapping_pages(META_MAPPING(sbi), - MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); + f2fs_bug_on(sbi, + invalidate_inode_pages2_range(META_MAPPING(sbi), + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1)); f2fs_release_ino_entry(sbi, false); @@ -1623,6 +1649,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long long ckpt_ver; int err = 0; + stat_cp_time(cpc, CP_TIME_START); + if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi)) return -EROFS; @@ -1634,6 +1662,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason != CP_RESIZE) f2fs_down_write(&sbi->cp_global_sem); + stat_cp_time(cpc, CP_TIME_LOCK); + if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || ((cpc->reason & CP_DISCARD) && !sbi->discard_blks))) @@ -1643,13 +1673,15 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) goto out; } - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, CP_PHASE_START_BLOCK_OPS); err = block_operations(sbi); if (err) goto out; - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); + stat_cp_time(cpc, CP_TIME_OP_LOCK); + + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, CP_PHASE_FINISH_BLOCK_OPS); f2fs_flush_merged_writes(sbi); @@ -1688,6 +1720,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_flush_sit_entries(sbi, cpc); + stat_cp_time(cpc, CP_TIME_FLUSH_META); + /* save inmem log status */ f2fs_save_inmem_curseg(sbi); @@ -1701,16 +1735,19 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } f2fs_restore_inmem_curseg(sbi); + f2fs_reinit_atgc_curseg(sbi); + stat_inc_cp_count(sbi); stop: unblock_operations(sbi); - stat_inc_cp_count(sbi->stat_info); + stat_cp_time(cpc, CP_TIME_END); + check_cp_time(sbi, cpc); if (cpc->reason & CP_RECOVERY) f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); /* update CP_TIME to trigger checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, CP_PHASE_FINISH_CHECKPOINT); out: if (cpc->reason != CP_RESIZE) f2fs_up_write(&sbi->cp_global_sem); @@ -1730,9 +1767,9 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi) im->ino_num = 0; } - sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - + sbi->max_orphans = (BLKS_PER_SEG(sbi) - F2FS_CP_PACKS - NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) * - F2FS_ORPHANS_PER_BLOCK; + F2FS_ORPHANS_PER_BLOCK; } int __init f2fs_create_checkpoint_caches(void) @@ -1787,6 +1824,7 @@ static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi) llist_for_each_entry_safe(req, next, dispatch_list, llnode) { diff = (u64)ktime_ms_delta(ktime_get(), req->queue_time); req->ret = ret; + req->delta_time = diff; complete(&req->wait); sum_diff += diff; @@ -1848,7 +1886,8 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi) struct cp_control cpc; cpc.reason = __get_cp_reason(sbi); - if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) { + if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC || + sbi->umount_lock_holder == current) { int ret; f2fs_down_write(&sbi->gc_lock); @@ -1881,6 +1920,12 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi) else flush_remained_ckpt_reqs(sbi, &req); + if (unlikely(req.delta_time >= CP_LONG_LATENCY_THRESHOLD)) { + f2fs_warn_ratelimited(sbi, + "blocked on checkpoint for %u ms", cprc->peak_time); + dump_stack(); + } + return req.ret; } @@ -1929,7 +1974,7 @@ void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi) /* Let's wait for the previous dispatched checkpoint. */ while (atomic_read(&cprc->queued_ckpt)) - io_schedule_timeout(DEFAULT_IO_TIMEOUT); + io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); } void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 0f7df9c11af3..7b68bf22989d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -23,20 +23,18 @@ static struct kmem_cache *cic_entry_slab; static struct kmem_cache *dic_entry_slab; -static void *page_array_alloc(struct inode *inode, int nr) +static void *page_array_alloc(struct f2fs_sb_info *sbi, int nr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int size = sizeof(struct page *) * nr; if (likely(size <= sbi->page_array_slab_size)) return f2fs_kmem_cache_alloc(sbi->page_array_slab, - GFP_F2FS_ZERO, false, F2FS_I_SB(inode)); + GFP_F2FS_ZERO, false, sbi); return f2fs_kzalloc(sbi, size, GFP_NOFS); } -static void page_array_free(struct inode *inode, void *pages, int nr) +static void page_array_free(struct f2fs_sb_info *sbi, void *pages, int nr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int size = sizeof(struct page *) * nr; if (!pages) @@ -73,28 +71,28 @@ static pgoff_t start_idx_of_cluster(struct compress_ctx *cc) return cc->cluster_idx << cc->log_cluster_size; } -bool f2fs_is_compressed_page(struct page *page) +bool f2fs_is_compressed_page(struct folio *folio) { - if (!PagePrivate(page)) - return false; - if (!page_private(page)) + if (!folio->private) return false; - if (page_private_nonpointer(page)) + if (folio_test_f2fs_nonpointer(folio)) return false; - f2fs_bug_on(F2FS_M_SB(page->mapping), - *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); + f2fs_bug_on(F2FS_F_SB(folio), + *((u32 *)folio->private) != F2FS_COMPRESSED_PAGE_MAGIC); return true; } static void f2fs_set_compressed_page(struct page *page, struct inode *inode, pgoff_t index, void *data) { - attach_page_private(page, (void *)data); + struct folio *folio = page_folio(page); + + folio_attach_private(folio, (void *)data); /* i_crypto_info and iv index */ - page->index = index; - page->mapping = inode->i_mapping; + folio->index = index; + folio->mapping = inode->i_mapping; } static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) @@ -122,7 +120,7 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len) } static void f2fs_put_rpages_wbc(struct compress_ctx *cc, - struct writeback_control *wbc, bool redirty, int unlock) + struct writeback_control *wbc, bool redirty, bool unlock) { unsigned int i; @@ -135,9 +133,11 @@ static void f2fs_put_rpages_wbc(struct compress_ctx *cc, } } -struct page *f2fs_compress_control_page(struct page *page) +struct folio *f2fs_compress_control_folio(struct folio *folio) { - return ((struct compress_io_ctx *)page_private(page))->rpages[0]; + struct compress_io_ctx *ctx = folio->private; + + return page_folio(ctx->rpages[0]); } int f2fs_init_compress_ctx(struct compress_ctx *cc) @@ -145,13 +145,13 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc) if (cc->rpages) return 0; - cc->rpages = page_array_alloc(cc->inode, cc->cluster_size); + cc->rpages = page_array_alloc(F2FS_I_SB(cc->inode), cc->cluster_size); return cc->rpages ? 0 : -ENOMEM; } void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) { - page_array_free(cc->inode, cc->rpages, cc->cluster_size); + page_array_free(F2FS_I_SB(cc->inode), cc->rpages, cc->cluster_size); cc->rpages = NULL; cc->nr_rpages = 0; cc->nr_cpages = 0; @@ -160,24 +160,24 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) cc->cluster_idx = NULL_CLUSTER; } -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio) { unsigned int cluster_ofs; - if (!f2fs_cluster_can_merge_page(cc, page->index)) + if (!f2fs_cluster_can_merge_page(cc, folio->index)) f2fs_bug_on(F2FS_I_SB(cc->inode), 1); - cluster_ofs = offset_in_cluster(cc, page->index); - cc->rpages[cluster_ofs] = page; + cluster_ofs = offset_in_cluster(cc, folio->index); + cc->rpages[cluster_ofs] = folio_page(folio, 0); cc->nr_rpages++; - cc->cluster_idx = cluster_idx(cc, page->index); + cc->cluster_idx = cluster_idx(cc, folio->index); } #ifdef CONFIG_F2FS_FS_LZO static int lzo_init_compress_ctx(struct compress_ctx *cc) { - cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), - LZO1X_MEM_COMPRESS, GFP_NOFS); + cc->private = f2fs_vmalloc(F2FS_I_SB(cc->inode), + LZO1X_MEM_COMPRESS); if (!cc->private) return -ENOMEM; @@ -187,7 +187,7 @@ static int lzo_init_compress_ctx(struct compress_ctx *cc) static void lzo_destroy_compress_ctx(struct compress_ctx *cc) { - kvfree(cc->private); + vfree(cc->private); cc->private = NULL; } @@ -198,8 +198,8 @@ static int lzo_compress_pages(struct compress_ctx *cc) ret = lzo1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, &cc->clen, cc->private); if (ret != LZO_E_OK) { - printk_ratelimited("%sF2FS-fs (%s): lzo compress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "lzo compress failed, ret:%d", ret); return -EIO; } return 0; @@ -212,17 +212,15 @@ static int lzo_decompress_pages(struct decompress_io_ctx *dic) ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, dic->rbuf, &dic->rlen); if (ret != LZO_E_OK) { - printk_ratelimited("%sF2FS-fs (%s): lzo decompress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + f2fs_err_ratelimited(dic->sbi, + "lzo decompress failed, ret:%d", ret); return -EIO; } if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lzo invalid rlen:%zu, " - "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, - dic->rlen, - PAGE_SIZE << dic->log_cluster_size); + f2fs_err_ratelimited(dic->sbi, + "lzo invalid rlen:%zu, expected:%lu", + dic->rlen, PAGE_SIZE << dic->log_cluster_size); return -EIO; } return 0; @@ -246,7 +244,7 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc) size = LZ4HC_MEM_COMPRESS; #endif - cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), size, GFP_NOFS); + cc->private = f2fs_vmalloc(F2FS_I_SB(cc->inode), size); if (!cc->private) return -ENOMEM; @@ -261,7 +259,7 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc) static void lz4_destroy_compress_ctx(struct compress_ctx *cc) { - kvfree(cc->private); + vfree(cc->private); cc->private = NULL; } @@ -294,16 +292,15 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, dic->clen, dic->rlen); if (ret < 0) { - printk_ratelimited("%sF2FS-fs (%s): lz4 decompress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + f2fs_err_ratelimited(dic->sbi, + "lz4 decompress failed, ret:%d", ret); return -EIO; } if (ret != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, " - "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, ret, - PAGE_SIZE << dic->log_cluster_size); + f2fs_err_ratelimited(dic->sbi, + "lz4 invalid ret:%d, expected:%lu", + ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; } return 0; @@ -343,17 +340,15 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) params = zstd_get_params(level, cc->rlen); workspace_size = zstd_cstream_workspace_bound(¶ms.cParams); - workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), - workspace_size, GFP_NOFS); + workspace = f2fs_vmalloc(F2FS_I_SB(cc->inode), workspace_size); if (!workspace) return -ENOMEM; stream = zstd_init_cstream(¶ms, 0, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_cstream failed\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, - __func__); - kvfree(workspace); + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "%s zstd_init_cstream failed", __func__); + vfree(workspace); return -EIO; } @@ -366,7 +361,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) static void zstd_destroy_compress_ctx(struct compress_ctx *cc) { - kvfree(cc->private); + vfree(cc->private); cc->private = NULL; cc->private2 = NULL; } @@ -390,16 +385,16 @@ static int zstd_compress_pages(struct compress_ctx *cc) ret = zstd_compress_stream(stream, &outbuf, &inbuf); if (zstd_is_error(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_compress_stream failed, ret: %d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "%s zstd_compress_stream failed, ret: %d", __func__, zstd_get_error_code(ret)); return -EIO; } ret = zstd_end_stream(stream, &outbuf); if (zstd_is_error(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_end_stream returned %d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "%s zstd_end_stream returned %d", __func__, zstd_get_error_code(ret)); return -EIO; } @@ -425,17 +420,15 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) workspace_size = zstd_dstream_workspace_bound(max_window_size); - workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode), - workspace_size, GFP_NOFS); + workspace = f2fs_vmalloc(dic->sbi, workspace_size); if (!workspace) return -ENOMEM; stream = zstd_init_dstream(max_window_size, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_dstream failed\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, - __func__); - kvfree(workspace); + f2fs_err_ratelimited(dic->sbi, + "%s zstd_init_dstream failed", __func__); + vfree(workspace); return -EIO; } @@ -447,7 +440,7 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic) { - kvfree(dic->private); + vfree(dic->private); dic->private = NULL; dic->private2 = NULL; } @@ -469,16 +462,15 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) ret = zstd_decompress_stream(stream, &outbuf, &inbuf); if (zstd_is_error(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_decompress_stream failed, ret: %d\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + f2fs_err_ratelimited(dic->sbi, + "%s zstd_decompress_stream failed, ret: %d", __func__, zstd_get_error_code(ret)); return -EIO; } if (dic->rlen != outbuf.pos) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD invalid rlen:%zu, " - "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, + f2fs_err_ratelimited(dic->sbi, + "%s ZSTD invalid rlen:%zu, expected:%lu", __func__, dic->rlen, PAGE_SIZE << dic->log_cluster_size); return -EIO; @@ -512,8 +504,8 @@ static int lzorle_compress_pages(struct compress_ctx *cc) ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, &cc->clen, cc->private); if (ret != LZO_E_OK) { - printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "lzo-rle compress failed, ret:%d", ret); return -EIO; } return 0; @@ -597,11 +589,14 @@ static struct page *f2fs_compress_alloc_page(void) static void f2fs_compress_free_page(struct page *page) { + struct folio *folio; + if (!page) return; - detach_page_private(page); - page->mapping = NULL; - unlock_page(page); + folio = page_folio(page); + folio_detach_private(folio); + folio->mapping = NULL; + folio_unlock(folio); mempool_free(page, compress_page_pool); } @@ -623,6 +618,7 @@ static void *f2fs_vmap(struct page **pages, unsigned int count) static int f2fs_compress_pages(struct compress_ctx *cc) { + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); struct f2fs_inode_info *fi = F2FS_I(cc->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; @@ -643,19 +639,14 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); cc->valid_nr_cpages = cc->nr_cpages; - cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages); + cc->cpages = page_array_alloc(sbi, cc->nr_cpages); if (!cc->cpages) { ret = -ENOMEM; goto destroy_compress_ctx; } - for (i = 0; i < cc->nr_cpages; i++) { + for (i = 0; i < cc->nr_cpages; i++) cc->cpages[i] = f2fs_compress_alloc_page(); - if (!cc->cpages[i]) { - ret = -ENOMEM; - goto out_free_cpages; - } - } cc->rbuf = f2fs_vmap(cc->rpages, cc->cluster_size); if (!cc->rbuf) { @@ -683,8 +674,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->cbuf->clen = cpu_to_le32(cc->clen); if (fi->i_compress_flag & BIT(COMPRESS_CHKSUM)) - chksum = f2fs_crc32(F2FS_I_SB(cc->inode), - cc->cbuf->cdata, cc->clen); + chksum = f2fs_crc32(cc->cbuf->cdata, cc->clen); cc->cbuf->chksum = cpu_to_le32(chksum); for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) @@ -723,7 +713,7 @@ out_free_cpages: if (cc->cpages[i]) f2fs_compress_free_page(cc->cpages[i]); } - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; destroy_compress_ctx: if (cops->destroy_compress_ctx) @@ -741,7 +731,7 @@ static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct f2fs_sb_info *sbi = dic->sbi; struct f2fs_inode_info *fi = F2FS_I(dic->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; @@ -769,10 +759,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) ret = -EFSCORRUPTED; /* Avoid f2fs_commit_super in irq context */ - if (!in_task) - f2fs_handle_error_async(sbi, ERROR_FAIL_DECOMPRESSION); - else - f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION); + f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION); goto out_release; } @@ -780,14 +767,14 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) if (!ret && (fi->i_compress_flag & BIT(COMPRESS_CHKSUM))) { u32 provided = le32_to_cpu(dic->cbuf->chksum); - u32 calculated = f2fs_crc32(sbi, dic->cbuf->cdata, dic->clen); + u32 calculated = f2fs_crc32(dic->cbuf->cdata, dic->clen); if (provided != calculated) { if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) { set_inode_flag(dic->inode, FI_COMPRESS_CORRUPT); - printk_ratelimited( - "%sF2FS-fs (%s): checksum invalid, nid = %lu, %x vs %x", - KERN_INFO, sbi->sb->s_id, dic->inode->i_ino, + f2fs_info_ratelimited(sbi, + "checksum invalid, nid = %lu, %x vs %x", + dic->inode->i_ino, provided, calculated); } set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -803,25 +790,27 @@ out_end_io: f2fs_decompress_end_io(dic, ret, in_task); } +static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, + struct folio *folio, nid_t ino, block_t blkaddr); + /* * This is called when a page of a compressed cluster has been read from disk * (or failed to be read from disk). It checks whether this page was the last * page being waited on in the cluster, and if so, it decompresses the cluster * (or in the case of a failure, cleans up without actually decompressing). */ -void f2fs_end_read_compressed_page(struct page *page, bool failed, +void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task) { - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); - struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct decompress_io_ctx *dic = folio->private; + struct f2fs_sb_info *sbi = dic->sbi; dec_page_count(sbi, F2FS_RD_DATA); if (failed) WRITE_ONCE(dic->failed, true); else if (blkaddr && in_task) - f2fs_cache_compressed_page(sbi, page, + f2fs_cache_compressed_page(sbi, folio, dic->inode->i_ino, blkaddr); if (atomic_dec_and_test(&dic->remaining_pages)) @@ -855,7 +844,7 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate) { - unsigned long pgidx = pages[index]->index; + unsigned long pgidx = page_folio(pages[index])->index; int i = uptodate ? 0 : 1; /* @@ -869,9 +858,11 @@ bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, return false; for (; i < cc->cluster_size; i++) { - if (pages[index + i]->index != pgidx + i) + struct folio *folio = page_folio(pages[index + i]); + + if (folio->index != pgidx + i) return false; - if (uptodate && !PageUptodate(pages[index + i])) + if (uptodate && !folio_test_uptodate(folio)) return false; } @@ -890,7 +881,7 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc) f2fs_bug_on(F2FS_I_SB(cc->inode), !page); /* beyond EOF */ - if (page->index >= nr_pages) + if (page_folio(page)->index >= nr_pages) return true; } return false; @@ -898,14 +889,15 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc) bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { +#ifdef CONFIG_F2FS_CHECK_FS struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size; - bool compressed = dn->data_blkaddr == COMPRESS_ADDR; int cluster_end = 0; + unsigned int count; int i; char *reason = ""; - if (!compressed) + if (dn->data_blkaddr != COMPRESS_ADDR) return false; /* [..., COMPR_ADDR, ...] */ @@ -914,8 +906,8 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) goto out; } - for (i = 1; i < cluster_size; i++) { - block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, + for (i = 1, count = 1; i < cluster_size; i++, count++) { + block_t blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); /* [COMPR_ADDR, ..., COMPR_ADDR] */ @@ -934,19 +926,42 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) goto out; } } + + f2fs_bug_on(F2FS_I_SB(dn->inode), count != cluster_size && + !is_inode_flag_set(dn->inode, FI_COMPRESS_RELEASED)); + return false; out: f2fs_warn(sbi, "access invalid cluster, ino:%lu, nid:%u, ofs_in_node:%u, reason:%s", dn->inode->i_ino, dn->nid, dn->ofs_in_node, reason); set_sbi_flag(sbi, SBI_NEED_FSCK); return true; +#else + return false; +#endif } -static int __f2fs_cluster_blocks(struct inode *inode, - unsigned int cluster_idx, bool compr) +static int __f2fs_get_cluster_blocks(struct inode *inode, + struct dnode_of_data *dn) { - struct dnode_of_data dn; unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; + int count, i; + + for (i = 0, count = 0; i < cluster_size; i++) { + block_t blkaddr = data_blkaddr(dn->inode, dn->node_folio, + dn->ofs_in_node + i); + + if (__is_valid_data_blkaddr(blkaddr)) + count++; + } + + return count; +} + +static int __f2fs_cluster_blocks(struct inode *inode, unsigned int cluster_idx, + enum cluster_check_type type) +{ + struct dnode_of_data dn; unsigned int start_idx = cluster_idx << F2FS_I(inode)->i_log_cluster_size; int ret; @@ -961,31 +976,16 @@ static int __f2fs_cluster_blocks(struct inode *inode, if (f2fs_sanity_check_cluster(&dn)) { ret = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_CLUSTER); goto fail; } if (dn.data_blkaddr == COMPRESS_ADDR) { - int i; - - ret = 1; - for (i = 1; i < cluster_size; i++) { - block_t blkaddr; - - blkaddr = data_blkaddr(dn.inode, - dn.node_page, dn.ofs_in_node + i); - if (compr) { - if (__is_valid_data_blkaddr(blkaddr)) - ret++; - } else { - if (blkaddr != NULL_ADDR) - ret++; - } - } - - f2fs_bug_on(F2FS_I_SB(inode), - !compr && ret != cluster_size && - !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)); + if (type == CLUSTER_COMPR_BLKS) + ret = 1 + __f2fs_get_cluster_blocks(inode, &dn); + else if (type == CLUSTER_IS_COMPR) + ret = 1; + } else if (type == CLUSTER_RAW_BLKS) { + ret = __f2fs_get_cluster_blocks(inode, &dn); } fail: f2fs_put_dnode(&dn); @@ -995,15 +995,33 @@ fail: /* return # of compressed blocks in compressed cluster */ static int f2fs_compressed_blocks(struct compress_ctx *cc) { - return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, true); + return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, + CLUSTER_COMPR_BLKS); +} + +/* return # of raw blocks in non-compressed cluster */ +static int f2fs_decompressed_blocks(struct inode *inode, + unsigned int cluster_idx) +{ + return __f2fs_cluster_blocks(inode, cluster_idx, + CLUSTER_RAW_BLKS); } -/* return # of valid blocks in compressed cluster */ +/* return whether cluster is compressed one or not */ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { return __f2fs_cluster_blocks(inode, index >> F2FS_I(inode)->i_log_cluster_size, - false); + CLUSTER_IS_COMPR); +} + +/* return whether cluster contains non raw blocks or not */ +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index) +{ + unsigned int cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size; + + return f2fs_decompressed_blocks(inode, cluster_idx) != + F2FS_I(inode)->i_cluster_size; } static bool cluster_may_compress(struct compress_ctx *cc) @@ -1029,13 +1047,40 @@ static void set_cluster_writeback(struct compress_ctx *cc) } } +static void cancel_cluster_writeback(struct compress_ctx *cc, + struct compress_io_ctx *cic, int submitted) +{ + int i; + + /* Wait for submitted IOs. */ + if (submitted > 1) { + f2fs_submit_merged_write(F2FS_I_SB(cc->inode), DATA); + while (atomic_read(&cic->pending_pages) != + (cc->valid_nr_cpages - submitted + 1)) + f2fs_io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); + } + + /* Cancel writeback and stay locked. */ + for (i = 0; i < cc->cluster_size; i++) { + if (i < submitted) { + inode_inc_dirty_pages(cc->inode); + lock_page(cc->rpages[i]); + } + clear_page_private_gcing(cc->rpages[i]); + if (folio_test_writeback(page_folio(cc->rpages[i]))) + end_page_writeback(cc->rpages[i]); + } +} + static void set_cluster_dirty(struct compress_ctx *cc) { int i; for (i = 0; i < cc->cluster_size; i++) - if (cc->rpages[i]) + if (cc->rpages[i]) { set_page_dirty(cc->rpages[i]); + set_page_private_gcing(cc->rpages[i]); + } } static int prepare_compress_overwrite(struct compress_ctx *cc, @@ -1043,7 +1088,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, { struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); struct address_space *mapping = cc->inode->i_mapping; - struct page *page; + struct folio *folio; sector_t last_block_in_bio; fgf_t fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; pgoff_t start_idx = start_idx_of_cluster(cc); @@ -1058,26 +1103,26 @@ retry: if (ret) return ret; - /* keep page reference to avoid page reclaim */ + /* keep folio reference to avoid page reclaim */ for (i = 0; i < cc->cluster_size; i++) { - page = f2fs_pagecache_get_page(mapping, start_idx + i, - fgp_flag, GFP_NOFS); - if (!page) { - ret = -ENOMEM; + folio = f2fs_filemap_get_folio(mapping, start_idx + i, + fgp_flag, GFP_NOFS); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); goto unlock_pages; } - if (PageUptodate(page)) - f2fs_put_page(page, 1); + if (folio_test_uptodate(folio)) + f2fs_folio_put(folio, true); else - f2fs_compress_ctx_add_page(cc, page); + f2fs_compress_ctx_add_page(cc, folio); } if (!f2fs_cluster_is_empty(cc)) { struct bio *bio = NULL; ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, - &last_block_in_bio, false, true); + &last_block_in_bio, NULL, true); f2fs_put_rpages(cc); f2fs_destroy_compress_ctx(cc, true); if (ret) @@ -1093,16 +1138,17 @@ retry: for (i = 0; i < cc->cluster_size; i++) { f2fs_bug_on(sbi, cc->rpages[i]); - page = find_lock_page(mapping, start_idx + i); - if (!page) { - /* page can be truncated */ + folio = filemap_lock_folio(mapping, start_idx + i); + if (IS_ERR(folio)) { + /* folio could be truncated */ goto release_and_retry; } - f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_compress_ctx_add_page(cc, page); + f2fs_folio_wait_writeback(folio, DATA, true, true); + f2fs_compress_ctx_add_page(cc, folio); - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { + f2fs_handle_page_eio(sbi, folio, DATA); release_and_retry: f2fs_put_rpages(cc); f2fs_unlock_rpages(cc, i + 1); @@ -1150,12 +1196,13 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, .cluster_size = F2FS_I(inode)->i_cluster_size, .rpages = fsdata, }; - bool first_index = (index == cc.rpages[0]->index); + struct folio *folio = page_folio(cc.rpages[0]); + bool first_index = (index == folio->index); if (copied) set_cluster_dirty(&cc); - f2fs_put_rpages_wbc(&cc, NULL, false, 1); + f2fs_put_rpages_wbc(&cc, NULL, false, true); f2fs_destroy_compress_ctx(&cc, false); return first_index; @@ -1165,9 +1212,11 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock) { void *fsdata = NULL; struct page *pagep; + struct page **rpages; int log_cluster_size = F2FS_I(inode)->i_log_cluster_size; pgoff_t start_idx = from >> (PAGE_SHIFT + log_cluster_size) << log_cluster_size; + int i; int err; err = f2fs_is_compressed_cluster(inode, start_idx); @@ -1188,26 +1237,30 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock) if (err <= 0) return err; - if (err > 0) { - struct page **rpages = fsdata; - int cluster_size = F2FS_I(inode)->i_cluster_size; - int i; + rpages = fsdata; - for (i = cluster_size - 1; i >= 0; i--) { - loff_t start = rpages[i]->index << PAGE_SHIFT; + for (i = (1 << log_cluster_size) - 1; i >= 0; i--) { + struct folio *folio = page_folio(rpages[i]); + loff_t start = (loff_t)folio->index << PAGE_SHIFT; + loff_t offset = from > start ? from - start : 0; - if (from <= start) { - zero_user_segment(rpages[i], 0, PAGE_SIZE); - } else { - zero_user_segment(rpages[i], from - start, - PAGE_SIZE); - break; - } - } + folio_zero_segment(folio, offset, folio_size(folio)); - f2fs_compress_write_end(inode, fsdata, start_idx, true); + if (from >= start) + break; } - return 0; + + f2fs_compress_write_end(inode, fsdata, start_idx, true); + + err = filemap_write_and_wait_range(inode->i_mapping, + round_down(from, 1 << log_cluster_size << PAGE_SHIFT), + LLONG_MAX); + if (err) + return err; + + truncate_pagecache(inode, from); + + return f2fs_do_truncate_blocks(inode, round_up(from, PAGE_SIZE), lock); } static int f2fs_write_compressed_pages(struct compress_ctx *cc, @@ -1228,12 +1281,12 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .page = NULL, .encrypted_page = NULL, .compressed_page = NULL, - .submitted = 0, .io_type = io_type, .io_wbc = wbc, .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ? 1 : 0, }; + struct folio *folio; struct dnode_of_data dn; struct node_info ni; struct compress_io_ctx *cic; @@ -1245,7 +1298,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(cc->rpages[0]->mapping, -EIO); + mapping_set_error(inode->i_mapping, -EIO); goto out_free; } @@ -1267,12 +1320,13 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, goto out_unlock_op; for (i = 0; i < cc->cluster_size; i++) { - if (data_blkaddr(dn.inode, dn.node_page, + if (data_blkaddr(dn.inode, dn.node_folio, dn.ofs_in_node + i) == NULL_ADDR) goto out_put_dnode; } - psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; + folio = page_folio(cc->rpages[last_index]); + psize = folio_next_pos(folio); err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); if (err) @@ -1287,7 +1341,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; cic->inode = inode; atomic_set(&cic->pending_pages, cc->valid_nr_cpages); - cic->rpages = page_array_alloc(cc->inode, cc->cluster_size); + cic->rpages = page_array_alloc(sbi, cc->cluster_size); if (!cic->rpages) goto out_put_cic; @@ -1295,10 +1349,10 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->valid_nr_cpages; i++) { f2fs_set_compressed_page(cc->cpages[i], inode, - cc->rpages[i + 1]->index, cic); + page_folio(cc->rpages[i + 1])->index, cic); fio.compressed_page = cc->cpages[i]; - fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_page, + fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_folio, dn.ofs_in_node + i + 1); /* wait for GCed page writeback via META_MAPPING */ @@ -1330,7 +1384,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (blkaddr == COMPRESS_ADDR) fio.compr_blocks++; if (__is_valid_data_blkaddr(blkaddr)) - f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr, 1); f2fs_update_data_blkaddr(&dn, COMPRESS_ADDR); goto unlock_continue; } @@ -1340,7 +1394,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (i > cc->valid_nr_cpages) { if (__is_valid_data_blkaddr(blkaddr)) { - f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr, 1); f2fs_update_data_blkaddr(&dn, NEW_ADDR); } goto unlock_continue; @@ -1354,11 +1408,20 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, fio.compressed_page = cc->cpages[i - 1]; cc->cpages[i - 1] = NULL; + fio.submitted = 0; f2fs_outplace_write_data(&dn, &fio); + if (unlikely(!fio.submitted)) { + cancel_cluster_writeback(cc, cic, i); + + /* To call fscrypt_finalize_bounce_page */ + i = cc->valid_nr_cpages; + *submitted = 0; + goto out_destroy_crypt; + } (*submitted)++; unlock_continue: inode_dec_dirty_pages(cc->inode); - unlock_page(fio.page); + folio_unlock(fio.folio); } if (fio.compr_blocks) @@ -1367,8 +1430,6 @@ unlock_continue: add_compr_block_stat(inode, cc->valid_nr_cpages); set_inode_flag(cc->inode, FI_APPEND_WRITE); - if (cc->cluster_idx == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); if (quota_inode) @@ -1382,16 +1443,19 @@ unlock_continue: spin_unlock(&fi->i_size_lock); f2fs_put_rpages(cc); - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; f2fs_destroy_compress_ctx(cc, false); return 0; out_destroy_crypt: - page_array_free(cc->inode, cic->rpages, cc->cluster_size); + page_array_free(sbi, cic->rpages, cc->cluster_size); - for (--i; i >= 0; i--) + for (--i; i >= 0; i--) { + if (!cc->cpages[i]) + continue; fscrypt_finalize_bounce_page(&cc->cpages[i]); + } out_put_cic: kmem_cache_free(cic_entry_slab, cic); out_put_dnode: @@ -1406,24 +1470,26 @@ out_free: f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); + page_array_free(sbi, cc->cpages, cc->nr_cpages); cc->cpages = NULL; return -EAGAIN; } -void f2fs_compress_write_end_io(struct bio *bio, struct page *page) +void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) { + struct page *page = &folio->page; struct f2fs_sb_info *sbi = bio->bi_private; - struct compress_io_ctx *cic = - (struct compress_io_ctx *)page_private(page); + struct compress_io_ctx *cic = folio->private; + enum count_type type = WB_DATA_TYPE(folio, + f2fs_is_compressed_page(folio)); int i; - if (unlikely(bio->bi_status)) + if (unlikely(bio->bi_status != BLK_STS_OK)) mapping_set_error(cic->inode->i_mapping, -EIO); f2fs_compress_free_page(page); - dec_page_count(sbi, F2FS_WB_DATA); + dec_page_count(sbi, type); if (atomic_dec_return(&cic->pending_pages)) return; @@ -1434,17 +1500,19 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) end_page_writeback(cic->rpages[i]); } - page_array_free(cic->inode, cic->rpages, cic->nr_rpages); + page_array_free(sbi, cic->rpages, cic->nr_rpages); kmem_cache_free(cic_entry_slab, cic); } static int f2fs_write_raw_pages(struct compress_ctx *cc, - int *submitted, + int *submitted_p, struct writeback_control *wbc, enum iostat_type io_type) { struct address_space *mapping = cc->inode->i_mapping; - int _submitted, compr_blocks, ret, i; + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + int submitted, compr_blocks, i; + int ret = 0; compr_blocks = f2fs_compressed_blocks(cc); @@ -1459,58 +1527,68 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, if (compr_blocks < 0) return compr_blocks; + /* overwrite compressed cluster w/ normal cluster */ + if (compr_blocks > 0) + f2fs_lock_op(sbi); + for (i = 0; i < cc->cluster_size; i++) { + struct folio *folio; + if (!cc->rpages[i]) continue; + folio = page_folio(cc->rpages[i]); retry_write: - lock_page(cc->rpages[i]); + folio_lock(folio); - if (cc->rpages[i]->mapping != mapping) { + if (folio->mapping != mapping) { continue_unlock: - unlock_page(cc->rpages[i]); + folio_unlock(folio); continue; } - if (!PageDirty(cc->rpages[i])) + if (!folio_test_dirty(folio)) goto continue_unlock; - if (PageWriteback(cc->rpages[i])) { + if (folio_test_writeback(folio)) { if (wbc->sync_mode == WB_SYNC_NONE) goto continue_unlock; - f2fs_wait_on_page_writeback(cc->rpages[i], DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); } - if (!clear_page_dirty_for_io(cc->rpages[i])) + if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + submitted = 0; + ret = f2fs_write_single_data_page(folio, &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(cc->rpages[i]); + if (ret == 1) { ret = 0; } else if (ret == -EAGAIN) { + ret = 0; /* * for quota file, just redirty left pages to * avoid deadlock caused by cluster update race * from foreground operation. */ if (IS_NOQUOTA(cc->inode)) - return 0; - ret = 0; - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + goto out; + f2fs_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); goto retry_write; } - return ret; + goto out; } - *submitted += _submitted; + *submitted_p += submitted; } - f2fs_balance_fs(F2FS_M_SB(mapping), true); +out: + if (compr_blocks > 0) + f2fs_unlock_op(sbi); - return 0; + f2fs_balance_fs(sbi, true); + return ret; } int f2fs_write_multi_pages(struct compress_ctx *cc, @@ -1527,7 +1605,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, add_compr_block_stat(cc->inode, cc->cluster_size); goto write; } else if (err) { - f2fs_put_rpages_wbc(cc, wbc, true, 1); + f2fs_put_rpages_wbc(cc, wbc, true, true); goto destroy_out; } @@ -1541,7 +1619,7 @@ write: f2fs_bug_on(F2FS_I_SB(cc->inode), *submitted); err = f2fs_write_raw_pages(cc, submitted, wbc, io_type); - f2fs_put_rpages_wbc(cc, wbc, false, 0); + f2fs_put_rpages_wbc(cc, wbc, false, false); destroy_out: f2fs_destroy_compress_ctx(cc, false); return err; @@ -1556,14 +1634,13 @@ static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi, static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, bool pre_alloc) { - const struct f2fs_compress_ops *cops = - f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; int i; - if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) return 0; - dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); + dic->tpages = page_array_alloc(dic->sbi, dic->cluster_size); if (!dic->tpages) return -ENOMEM; @@ -1574,8 +1651,6 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, } dic->tpages[i] = f2fs_compress_alloc_page(); - if (!dic->tpages[i]) - return -ENOMEM; } dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); @@ -1595,10 +1670,9 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, bool bypass_destroy_callback, bool pre_alloc) { - const struct f2fs_compress_ops *cops = - f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; + const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; - if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) return; if (!bypass_destroy_callback && cops->destroy_decompress_ctx) @@ -1625,7 +1699,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) if (!dic) return ERR_PTR(-ENOMEM); - dic->rpages = page_array_alloc(cc->inode, cc->cluster_size); + dic->rpages = page_array_alloc(sbi, cc->cluster_size); if (!dic->rpages) { kmem_cache_free(dic_entry_slab, dic); return ERR_PTR(-ENOMEM); @@ -1633,6 +1707,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; dic->inode = cc->inode; + dic->sbi = sbi; + dic->compress_algorithm = F2FS_I(cc->inode)->i_compress_algorithm; atomic_set(&dic->remaining_pages, cc->nr_cpages); dic->cluster_idx = cc->cluster_idx; dic->cluster_size = cc->cluster_size; @@ -1646,7 +1722,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->rpages[i] = cc->rpages[i]; dic->nr_rpages = cc->cluster_size; - dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages); + dic->cpages = page_array_alloc(sbi, dic->nr_cpages); if (!dic->cpages) { ret = -ENOMEM; goto out_free; @@ -1656,11 +1732,6 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) struct page *page; page = f2fs_compress_alloc_page(); - if (!page) { - ret = -ENOMEM; - goto out_free; - } - f2fs_set_compressed_page(page, cc->inode, start_idx + i + 1, dic); dic->cpages[i] = page; @@ -1681,6 +1752,8 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, bool bypass_destroy_callback) { int i; + /* use sbi in dic to avoid UFA of dic->inode*/ + struct f2fs_sb_info *sbi = dic->sbi; f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); @@ -1692,7 +1765,7 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, continue; f2fs_compress_free_page(dic->tpages[i]); } - page_array_free(dic->inode, dic->tpages, dic->cluster_size); + page_array_free(sbi, dic->tpages, dic->cluster_size); } if (dic->cpages) { @@ -1701,10 +1774,10 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, continue; f2fs_compress_free_page(dic->cpages[i]); } - page_array_free(dic->inode, dic->cpages, dic->nr_cpages); + page_array_free(sbi, dic->cpages, dic->nr_cpages); } - page_array_free(dic->inode, dic->rpages, dic->nr_rpages); + page_array_free(sbi, dic->rpages, dic->nr_rpages); kmem_cache_free(dic_entry_slab, dic); } @@ -1723,8 +1796,7 @@ static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task) f2fs_free_dic(dic, false); } else { INIT_WORK(&dic->free_work, f2fs_late_free_dic); - queue_work(F2FS_I_SB(dic->inode)->post_read_wq, - &dic->free_work); + queue_work(dic->sbi->post_read_wq, &dic->free_work); } } } @@ -1795,14 +1867,13 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, } /* - * Put a reference to a compressed page's decompress_io_ctx. + * Put a reference to a compressed folio's decompress_io_ctx. * - * This is called when the page is no longer needed and can be freed. + * This is called when the folio is no longer needed and can be freed. */ -void f2fs_put_page_dic(struct page *page, bool in_task) +void f2fs_put_folio_dic(struct folio *folio, bool in_task) { - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); + struct decompress_io_ctx *dic = folio->private; f2fs_put_dic(dic, in_task); } @@ -1811,16 +1882,18 @@ void f2fs_put_page_dic(struct page *page, bool in_task) * check whether cluster blocks are contiguous, and add extent cache entry * only if cluster blocks are logically and physically contiguous. */ -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node) { - bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR; + bool compressed = data_blkaddr(dn->inode, dn->node_folio, + ofs_in_node) == COMPRESS_ADDR; int i = compressed ? 1 : 0; - block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, + ofs_in_node + i); for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) { - block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + block_t blkaddr = data_blkaddr(dn->inode, dn->node_folio, + ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) break; @@ -1842,17 +1915,18 @@ struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi) return sbi->compress_inode->i_mapping; } -void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr) +void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) { if (!sbi->compress_inode) return; - invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr); + invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr + len - 1); } -void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, - nid_t ino, block_t blkaddr) +static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, + struct folio *folio, nid_t ino, block_t blkaddr) { - struct page *cpage; + struct folio *cfolio; int ret; if (!test_opt(sbi, COMPRESS_CACHE)) @@ -1864,53 +1938,49 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, if (!f2fs_available_free_memory(sbi, COMPRESS_PAGE)) return; - cpage = find_get_page(COMPRESS_MAPPING(sbi), blkaddr); - if (cpage) { - f2fs_put_page(cpage, 0); + cfolio = filemap_get_folio(COMPRESS_MAPPING(sbi), blkaddr); + if (!IS_ERR(cfolio)) { + f2fs_folio_put(cfolio, false); return; } - cpage = alloc_page(__GFP_NOWARN | __GFP_IO); - if (!cpage) + cfolio = filemap_alloc_folio(__GFP_NOWARN | __GFP_IO, 0, NULL); + if (!cfolio) return; - ret = add_to_page_cache_lru(cpage, COMPRESS_MAPPING(sbi), + ret = filemap_add_folio(COMPRESS_MAPPING(sbi), cfolio, blkaddr, GFP_NOFS); if (ret) { - f2fs_put_page(cpage, 0); + f2fs_folio_put(cfolio, false); return; } - set_page_private_data(cpage, ino); + folio_set_f2fs_data(cfolio, ino); - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) - goto out; - - memcpy(page_address(cpage), page_address(page), PAGE_SIZE); - SetPageUptodate(cpage); -out: - f2fs_put_page(cpage, 1); + memcpy(folio_address(cfolio), folio_address(folio), PAGE_SIZE); + folio_mark_uptodate(cfolio); + f2fs_folio_put(cfolio, true); } -bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page, +bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr) { - struct page *cpage; + struct folio *cfolio; bool hitted = false; if (!test_opt(sbi, COMPRESS_CACHE)) return false; - cpage = f2fs_pagecache_get_page(COMPRESS_MAPPING(sbi), + cfolio = f2fs_filemap_get_folio(COMPRESS_MAPPING(sbi), blkaddr, FGP_LOCK | FGP_NOWAIT, GFP_NOFS); - if (cpage) { - if (PageUptodate(cpage)) { + if (!IS_ERR(cfolio)) { + if (folio_test_uptodate(cfolio)) { atomic_inc(&sbi->compress_page_hit); - memcpy(page_address(page), - page_address(cpage), PAGE_SIZE); + memcpy(folio_address(folio), + folio_address(cfolio), folio_size(folio)); hitted = true; } - f2fs_put_page(cpage, 1); + f2fs_folio_put(cfolio, true); } return hitted; @@ -1944,12 +2014,12 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) continue; } - if (ino != get_page_private_data(&folio->page)) { + if (ino != folio_get_f2fs_data(folio)) { folio_unlock(folio); continue; } - generic_error_remove_page(mapping, &folio->page); + generic_error_remove_folio(mapping, folio); folio_unlock(folio); } folio_batch_release(&fbatch); @@ -1988,7 +2058,7 @@ void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; - char slab_name[32]; + char slab_name[35]; if (!f2fs_sb_has_compression(sbi)) return 0; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5882afe71d82..c30e69392a62 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -7,7 +7,6 @@ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> -#include <linux/buffer_head.h> #include <linux/sched/mm.h> #include <linux/mpage.h> #include <linux/writeback.h> @@ -48,14 +47,14 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -static bool __is_cp_guaranteed(struct page *page) +bool f2fs_is_cp_guaranteed(const struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; struct inode *inode; struct f2fs_sb_info *sbi; - if (!mapping) - return false; + if (fscrypt_is_bounce_folio(folio)) + return folio_test_f2fs_gcing(fscrypt_pagecache_folio(folio)); inode = mapping->host; sbi = F2FS_I_SB(inode); @@ -65,17 +64,15 @@ static bool __is_cp_guaranteed(struct page *page) S_ISDIR(inode->i_mode)) return true; - if (f2fs_is_compressed_page(page)) - return false; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || - page_private_gcing(page)) + folio_test_f2fs_gcing(folio)) return true; return false; } -static enum count_type __read_io_type(struct page *page) +static enum count_type __read_io_type(struct folio *folio) { - struct address_space *mapping = page_file_mapping(page); + struct address_space *mapping = folio->mapping; if (mapping) { struct inode *inode = mapping->host; @@ -139,27 +136,22 @@ struct bio_post_read_ctx { */ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; struct bio_post_read_ctx *ctx = bio->bi_private; - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(page)) { + if (f2fs_is_compressed_page(folio)) { if (ctx && !ctx->decompression_attempted) - f2fs_end_read_compressed_page(page, true, 0, + f2fs_end_read_compressed_page(folio, true, 0, in_task); - f2fs_put_page_dic(page, in_task); + f2fs_put_folio_dic(folio, in_task); continue; } - if (bio->bi_status) - ClearPageUptodate(page); - else - SetPageUptodate(page); - dec_page_count(F2FS_P_SB(page), __read_io_type(page)); - unlock_page(page); + dec_page_count(F2FS_F_SB(folio), __read_io_type(folio)); + folio_end_read(folio, bio->bi_status == BLK_STS_OK); } if (ctx) @@ -189,14 +181,13 @@ static void f2fs_verify_bio(struct work_struct *work) * as those were handled separately by f2fs_end_read_compressed_page(). */ if (may_have_compressed_pages) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - if (!f2fs_is_compressed_page(page) && - !fsverity_verify_page(page)) { + if (!f2fs_is_compressed_page(folio) && + !fsverity_verify_page(&folio->page)) { bio->bi_status = BLK_STS_IOERR; break; } @@ -241,16 +232,15 @@ static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task) static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, bool in_task) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; bool all_compressed = true; block_t blkaddr = ctx->fs_blkaddr; - bio_for_each_segment_all(bv, ctx->bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, ctx->bio) { + struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(page)) - f2fs_end_read_compressed_page(page, false, blkaddr, + if (f2fs_is_compressed_page(folio)) + f2fs_end_read_compressed_page(folio, false, blkaddr, in_task); else all_compressed = false; @@ -288,9 +278,9 @@ static void f2fs_post_read_work(struct work_struct *work) static void f2fs_read_end_io(struct bio *bio) { - struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); + struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio)); struct bio_post_read_ctx *ctx; - bool intask = in_task(); + bool intask = in_task() && !irqs_disabled(); iostat_update_and_unbind_ctx(bio); ctx = bio->bi_private; @@ -298,7 +288,7 @@ static void f2fs_read_end_io(struct bio *bio) if (time_to_inject(sbi, FAULT_READ_IO)) bio->bi_status = BLK_STS_IOERR; - if (bio->bi_status) { + if (bio->bi_status != BLK_STS_OK) { f2fs_finish_read_bio(bio, intask); return; } @@ -327,8 +317,7 @@ static void f2fs_read_end_io(struct bio *bio) static void f2fs_write_end_io(struct bio *bio) { struct f2fs_sb_info *sbi; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + struct folio_iter fi; iostat_update_and_unbind_ctx(bio); sbi = bio->bi_private; @@ -336,45 +325,41 @@ static void f2fs_write_end_io(struct bio *bio) if (time_to_inject(sbi, FAULT_WRITE_IO)) bio->bi_status = BLK_STS_IOERR; - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - enum count_type type = WB_DATA_TYPE(page); + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; + enum count_type type; - if (page_private_dummy(page)) { - clear_page_private_dummy(page); - unlock_page(page); - mempool_free(page, sbi->write_io_dummy); + if (fscrypt_is_bounce_folio(folio)) { + struct folio *io_folio = folio; - if (unlikely(bio->bi_status)) - f2fs_stop_checkpoint(sbi, true, - STOP_CP_REASON_WRITE_FAIL); - continue; + folio = fscrypt_pagecache_folio(io_folio); + fscrypt_free_bounce_page(&io_folio->page); } - fscrypt_finalize_bounce_page(&page); - #ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_is_compressed_page(page)) { - f2fs_compress_write_end_io(bio, page); + if (f2fs_is_compressed_page(folio)) { + f2fs_compress_write_end_io(bio, folio); continue; } #endif - if (unlikely(bio->bi_status)) { - mapping_set_error(page->mapping, -EIO); + type = WB_DATA_TYPE(folio, false); + + if (unlikely(bio->bi_status != BLK_STS_OK)) { + mapping_set_error(folio->mapping, -EIO); if (type == F2FS_WB_CP_DATA) f2fs_stop_checkpoint(sbi, true, STOP_CP_REASON_WRITE_FAIL); } - f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && - page->index != nid_of_node(page)); + f2fs_bug_on(sbi, is_node_folio(folio) && + folio->index != nid_of_node(folio)); dec_page_count(sbi, type); - if (f2fs_in_warm_node_list(sbi, page)) - f2fs_del_fsync_node_entry(sbi, page); - clear_page_private_gcing(page); - end_page_writeback(page); + if (f2fs_in_warm_node_list(sbi, folio)) + f2fs_del_fsync_node_entry(sbi, folio); + folio_clear_f2fs_gcing(folio); + folio_end_writeback(folio); } if (!get_pages(sbi, F2FS_WB_CP_DATA) && wq_has_sleeper(&sbi->cp_wait)) @@ -457,6 +442,11 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) op_flags |= REQ_META; if (BIT(fio->temp) & fua_flag) op_flags |= REQ_FUA; + + if (fio->type == DATA && + F2FS_I(fio->folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE) + op_flags |= REQ_PRIO; + return op_flags; } @@ -478,6 +468,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) } else { bio->bi_end_io = f2fs_write_end_io; bio->bi_private = sbi; + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, + fio->type, fio->temp); } iostat_alloc_and_bind_ctx(sbi, bio, NULL); @@ -524,51 +516,10 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, submit_bio(bio); } -static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio) -{ - unsigned int start = - (bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi); - - if (start == 0) - return; - - /* fill dummy pages */ - for (; start < F2FS_IO_SIZE(sbi); start++) { - struct page *page = - mempool_alloc(sbi->write_io_dummy, - GFP_NOIO | __GFP_NOFAIL); - f2fs_bug_on(sbi, !page); - - lock_page(page); - - zero_user_segment(page, 0, PAGE_SIZE); - set_page_private_dummy(page); - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) - f2fs_bug_on(sbi, 1); - } -} - static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { WARN_ON_ONCE(is_read_io(bio_op(bio))); - - if (type == DATA || type == NODE) { - if (f2fs_lfs_mode(sbi) && current->plug) - blk_finish_plug(current->plug); - - if (F2FS_IO_ALIGNED(sbi)) { - f2fs_align_write_bio(sbi, bio); - /* - * In the NODE case, we lose next block address chain. - * So, we need to do checkpoint in f2fs_sync_file. - */ - if (type == NODE) - set_sbi_flag(sbi, SBI_NEED_CP); - } - } - trace_f2fs_submit_write_bio(sbi->sb, type, bio); iostat_update_submit_ctx(bio, type); submit_bio(bio); @@ -592,34 +543,33 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) } static bool __has_merged_page(struct bio *bio, struct inode *inode, - struct page *page, nid_t ino) + struct folio *folio, nid_t ino) { - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + struct folio_iter fi; if (!bio) return false; - if (!inode && !page && !ino) + if (!inode && !folio && !ino) return true; - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *target = bvec->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *target = fi.folio; - if (fscrypt_is_bounce_page(target)) { - target = fscrypt_pagecache_page(target); + if (fscrypt_is_bounce_folio(target)) { + target = fscrypt_pagecache_folio(target); if (IS_ERR(target)) continue; } if (f2fs_is_compressed_page(target)) { - target = f2fs_compress_control_page(target); + target = f2fs_compress_control_folio(target); if (IS_ERR(target)) continue; } if (inode && inode == target->mapping->host) return true; - if (page && page == target) + if (folio && folio == target) return true; if (ino && ino == ino_of_node(target)) return true; @@ -643,17 +593,20 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi) return -ENOMEM; for (j = HOT; j < n; j++) { - init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem); - sbi->write_io[i][j].sbi = sbi; - sbi->write_io[i][j].bio = NULL; - spin_lock_init(&sbi->write_io[i][j].io_lock); - INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); - INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); - init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); + struct f2fs_bio_info *io = &sbi->write_io[i][j]; + + init_f2fs_rwsem(&io->io_rwsem); + io->sbi = sbi; + io->bio = NULL; + io->last_block_in_bio = 0; + spin_lock_init(&io->io_lock); + INIT_LIST_HEAD(&io->io_list); + INIT_LIST_HEAD(&io->bio_list); + init_f2fs_rwsem(&io->bio_list_lock); #ifdef CONFIG_BLK_DEV_ZONED - init_completion(&sbi->write_io[i][j].zone_wait); - sbi->write_io[i][j].zone_pending_bio = NULL; - sbi->write_io[i][j].bi_private = NULL; + init_completion(&io->zone_wait); + io->zone_pending_bio = NULL; + io->bi_private = NULL; #endif } } @@ -685,7 +638,7 @@ unlock_out: } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type, bool force) { enum temp_type temp; @@ -697,7 +650,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct f2fs_bio_info *io = sbi->write_io[btype] + temp; f2fs_down_read(&io->io_rwsem); - ret = __has_merged_page(io->bio, inode, page, ino); + ret = __has_merged_page(io->bio, inode, folio, ino); f2fs_up_read(&io->io_rwsem); } if (ret) @@ -715,10 +668,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type) { - __submit_merged_write_cond(sbi, inode, page, ino, type, false); + __submit_merged_write_cond(sbi, inode, folio, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) @@ -735,34 +688,29 @@ void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; - struct page *page = fio->encrypted_page ? - fio->encrypted_page : fio->page; + struct folio *fio_folio = fio->folio; + struct folio *data_folio = fio->encrypted_page ? + page_folio(fio->encrypted_page) : fio_folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? - META_GENERIC : DATA_GENERIC_ENHANCE))) { - f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); + META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - } - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(data_folio, fio); /* Allocate a new bio */ bio = __bio_alloc(fio, 1); - f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_put(bio); - return -EFAULT; - } + f2fs_set_bio_crypt_ctx(bio, fio_folio->mapping->host, + fio_folio->index, fio, GFP_NOIO); + bio_add_folio_nofail(bio, data_folio, folio_size(data_folio), 0); if (fio->io_wbc && !is_read_io(fio->op)) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(page) : WB_DATA_TYPE(fio->page)); + __read_io_type(data_folio) : WB_DATA_TYPE(fio->folio, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); @@ -785,9 +733,11 @@ static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, static bool io_type_is_mergeable(struct f2fs_bio_info *io, struct f2fs_io_info *fio) { + blk_opf_t mask = ~(REQ_PREFLUSH | REQ_FUA); + if (io->fio.op != fio->op) return false; - return io->fio.op_flags == fio->op_flags; + return (io->fio.op_flags & mask) == (fio->op_flags & mask); } static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, @@ -796,23 +746,13 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, block_t last_blkaddr, block_t cur_blkaddr) { - if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) { - unsigned int filled_blocks = - F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size); - unsigned int io_size = F2FS_IO_SIZE(sbi); - unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt; - - /* IOs in bio is aligned and left space of vectors is not enough */ - if (!(filled_blocks % io_size) && left_vecs < io_size) - return false; - } if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr)) return false; return io_type_is_mergeable(io, fio); } static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, - struct page *page, enum temp_type temp) + struct folio *folio, enum temp_type temp) { struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; struct bio_entry *be; @@ -821,8 +761,7 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, be->bio = bio; bio_get(bio); - if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) - f2fs_bug_on(sbi, 1); + bio_add_folio_nofail(bio, folio, folio_size(folio), 0); f2fs_down_write(&io->bio_list_lock); list_add_tail(&be->list, &io->bio_list); @@ -836,8 +775,9 @@ static void del_bio_entry(struct bio_entry *be) } static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, - struct page *page) + struct folio *folio) { + struct folio *fio_folio = fio->folio; struct f2fs_sb_info *sbi = fio->sbi; enum temp_type temp; bool found = false; @@ -859,10 +799,9 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, *fio->last_block, fio->new_blkaddr)); if (f2fs_crypt_mergeable_bio(*bio, - fio->page->mapping->host, - fio->page->index, fio) && - bio_add_page(*bio, page, PAGE_SIZE, 0) == - PAGE_SIZE) { + fio_folio->mapping->host, + fio_folio->index, fio) && + bio_add_folio(*bio, folio, folio_size(folio), 0)) { ret = 0; break; } @@ -884,13 +823,13 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, } void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, - struct bio **bio, struct page *page) + struct bio **bio, struct folio *folio) { enum temp_type temp; bool found = false; struct bio *target = bio ? *bio : NULL; - f2fs_bug_on(sbi, !target && !page); + f2fs_bug_on(sbi, !target && !folio); for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; @@ -906,7 +845,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - page, 0); + folio, 0); if (found) break; } @@ -923,7 +862,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - page, 0); + folio, 0); if (found) { target = be->bio; del_bio_entry(be); @@ -944,16 +883,15 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; - struct page *page = fio->encrypted_page ? - fio->encrypted_page : fio->page; + struct folio *data_folio = fio->encrypted_page ? + page_folio(fio->encrypted_page) : fio->folio; + struct folio *folio = fio->folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, - __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) { - f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; - } - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(data_folio, fio); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, fio->new_blkaddr)) @@ -961,19 +899,19 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_VECS); - f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); + f2fs_set_bio_crypt_ctx(bio, folio->mapping->host, + folio->index, fio, GFP_NOIO); - add_bio_entry(fio->sbi, bio, page, fio->temp); + add_bio_entry(fio->sbi, bio, data_folio, fio->temp); } else { - if (add_ipu_page(fio, &bio, page)) + if (add_ipu_page(fio, &bio, data_folio)) goto alloc_new; } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio)); - inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + inc_page_count(fio->sbi, WB_DATA_TYPE(folio, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; @@ -984,6 +922,7 @@ alloc_new: #ifdef CONFIG_BLK_DEV_ZONED static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) { + struct block_device *bdev = sbi->sb->s_bdev; int devi = 0; if (f2fs_is_multi_device(sbi)) { @@ -994,8 +933,9 @@ static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) return false; } blkaddr -= FDEV(devi).start_blk; + bdev = FDEV(devi).bdev; } - return bdev_zoned_model(FDEV(devi).bdev) == BLK_ZONED_HM && + return bdev_is_zoned(bdev) && f2fs_blkz_is_seq(sbi, devi, blkaddr) && (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1); } @@ -1006,12 +946,13 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; - struct page *bio_page; + struct folio *bio_folio; + enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); f2fs_down_write(&io->io_rwsem); - +next: #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) { wait_for_completion_io(&io->zone_wait); @@ -1021,7 +962,6 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) } #endif -next: if (fio->in_list) { spin_lock(&io->io_lock); if (list_empty(&io->io_list)) { @@ -1037,53 +977,44 @@ next: verify_fio_blkaddr(fio); if (fio->encrypted_page) - bio_page = fio->encrypted_page; + bio_folio = page_folio(fio->encrypted_page); else if (fio->compressed_page) - bio_page = fio->compressed_page; + bio_folio = page_folio(fio->compressed_page); else - bio_page = fio->page; + bio_folio = fio->folio; /* set submitted = true as a return value */ fio->submitted = 1; - inc_page_count(sbi, WB_DATA_TYPE(bio_page)); + type = WB_DATA_TYPE(bio_folio, fio->compressed_page); + inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || - !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, - bio_page->index, fio))) + !f2fs_crypt_mergeable_bio(io->bio, fio_inode(fio), + bio_folio->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { - if (F2FS_IO_ALIGNED(sbi) && - (fio->type == DATA || fio->type == NODE) && - fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { - dec_page_count(sbi, WB_DATA_TYPE(bio_page)); - fio->retry = 1; - goto skip; - } io->bio = __bio_alloc(fio, BIO_MAX_VECS); - f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, - bio_page->index, fio, GFP_NOIO); + f2fs_set_bio_crypt_ctx(io->bio, fio_inode(fio), + bio_folio->index, fio, GFP_NOIO); io->fio = *fio; } - if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { + if (!bio_add_folio(io->bio, bio_folio, folio_size(bio_folio), 0)) { __submit_merged_bio(io); goto alloc_new; } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio->folio, + folio_size(fio->folio)); io->last_block_in_bio = fio->new_blkaddr; - trace_f2fs_submit_page_write(fio->page, fio); -skip: - if (fio->in_list) - goto next; -out: + trace_f2fs_submit_folio_write(fio->folio, fio); #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { @@ -1096,6 +1027,9 @@ out: __submit_merged_bio(io); } #endif + if (fio->in_list) + goto next; +out: if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); @@ -1116,8 +1050,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages), REQ_OP_READ | op_flag, for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset); - if (!bio) - return ERR_PTR(-ENOMEM); bio->bi_iter.bi_sector = sector; f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); bio->bi_end_io = f2fs_read_end_io; @@ -1151,7 +1083,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, } /* This can handle encryption stuffs */ -static int f2fs_submit_page_read(struct inode *inode, struct page *page, +static void f2fs_submit_page_read(struct inode *inode, struct folio *folio, block_t blkaddr, blk_opf_t op_flags, bool for_write) { @@ -1159,55 +1091,44 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct bio *bio; bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, - page->index, for_write); - if (IS_ERR(bio)) - return PTR_ERR(bio); + folio->index, for_write); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, blkaddr); - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_put(bio); - return -EFAULT; - } + if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) + f2fs_bug_on(sbi, 1); + inc_page_count(sbi, F2FS_RD_DATA); f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE); f2fs_submit_read_bio(sbi, bio, DATA); - return 0; } -static void __set_data_blkaddr(struct dnode_of_data *dn) +static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { - struct f2fs_node *rn = F2FS_NODE(dn->node_page); - __le32 *addr_array; - int base = 0; - - if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) - base = get_extra_isize(dn->inode); + __le32 *addr = get_dnode_addr(dn->inode, dn->node_folio); - /* Get physical address of data block */ - addr_array = blkaddr_in_node(rn); - addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); + dn->data_blkaddr = blkaddr; + addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); } /* * Lock ordering for the change of data block address: * ->data_page - * ->node_page + * ->node_folio * update block addresses in the node page */ -void f2fs_set_data_blkaddr(struct dnode_of_data *dn) +void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { - f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); - __set_data_blkaddr(dn); - if (set_page_dirty(dn->node_page)) + f2fs_folio_wait_writeback(dn->node_folio, NODE, true, true); + __set_data_blkaddr(dn, blkaddr); + if (folio_mark_dirty(dn->node_folio)) dn->node_changed = true; } void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { - dn->data_blkaddr = blkaddr; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, blkaddr); f2fs_update_read_extent_cache(dn); } @@ -1222,25 +1143,25 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + err = inc_valid_block_count(sbi, dn->inode, &count, true); + if (unlikely(err)) return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); - f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); + f2fs_folio_wait_writeback(dn->node_folio, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = f2fs_data_blkaddr(dn); if (blkaddr == NULL_ADDR) { - dn->data_blkaddr = NEW_ADDR; - __set_data_blkaddr(dn); + __set_data_blkaddr(dn, NEW_ADDR); count--; } } - if (set_page_dirty(dn->node_page)) + if (folio_mark_dirty(dn->node_folio)) dn->node_changed = true; return 0; } @@ -1258,7 +1179,7 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn) int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) { - bool need_put = dn->inode_page ? false : true; + bool need_put = dn->inode_folio ? false : true; int err; err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE); @@ -1272,26 +1193,23 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) return err; } -struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - blk_opf_t op_flags, bool for_write, - pgoff_t *next_pgofs) +struct folio *f2fs_get_read_data_folio(struct inode *inode, pgoff_t index, + blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; - struct page *page; + struct folio *folio; int err; - page = f2fs_grab_cache_page(mapping, index, for_write); - if (!page) - return ERR_PTR(-ENOMEM); + folio = f2fs_grab_cache_folio(mapping, index, for_write); + if (IS_ERR(folio)) + return folio; if (f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), - ERROR_INVALID_BLKADDR); goto put_err; } goto got_it; @@ -1317,66 +1235,65 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, dn.data_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), - ERROR_INVALID_BLKADDR); goto put_err; } got_it: - if (PageUptodate(page)) { - unlock_page(page); - return page; + if (folio_test_uptodate(folio)) { + folio_unlock(folio); + return folio; } /* * A new dentry page is allocated but not able to be written, since its * new inode page couldn't be allocated due to -ENOSPC. * In such the case, its blkaddr can be remained as NEW_ADDR. - * see, f2fs_add_link -> f2fs_get_new_data_page -> + * see, f2fs_add_link -> f2fs_get_new_data_folio -> * f2fs_init_inode_metadata. */ if (dn.data_blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); - unlock_page(page); - return page; + folio_zero_segment(folio, 0, folio_size(folio)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + folio_unlock(folio); + return folio; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, + f2fs_submit_page_read(inode, folio, dn.data_blkaddr, op_flags, for_write); - if (err) - goto put_err; - return page; + return folio; put_err: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return ERR_PTR(err); } -struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, +struct folio *f2fs_find_data_folio(struct inode *inode, pgoff_t index, pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; - struct page *page; - - page = find_get_page(mapping, index); - if (page && PageUptodate(page)) - return page; - f2fs_put_page(page, 0); - - page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs); - if (IS_ERR(page)) - return page; - - if (PageUptodate(page)) - return page; - - wait_on_page_locked(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 0); + struct folio *folio; + + folio = f2fs_filemap_get_folio(mapping, index, FGP_ACCESSED, 0); + if (IS_ERR(folio)) + goto read; + if (folio_test_uptodate(folio)) + return folio; + f2fs_folio_put(folio, false); + +read: + folio = f2fs_get_read_data_folio(inode, index, 0, false, next_pgofs); + if (IS_ERR(folio)) + return folio; + + if (folio_test_uptodate(folio)) + return folio; + + folio_wait_locked(folio); + if (unlikely(!folio_test_uptodate(folio))) { + f2fs_folio_put(folio, false); return ERR_PTR(-EIO); } - return page; + return folio; } /* @@ -1384,27 +1301,23 @@ struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, * Because, the callers, functions in dir.c and GC, should be able to know * whether this page exists or not. */ -struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, +struct folio *f2fs_get_lock_data_folio(struct inode *inode, pgoff_t index, bool for_write) { struct address_space *mapping = inode->i_mapping; - struct page *page; -repeat: - page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL); - if (IS_ERR(page)) - return page; + struct folio *folio; + + folio = f2fs_get_read_data_folio(inode, index, 0, for_write, NULL); + if (IS_ERR(folio)) + return folio; /* wait for read completion */ - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); - goto repeat; - } - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); + folio_lock(folio); + if (unlikely(folio->mapping != mapping || !folio_test_uptodate(folio))) { + f2fs_folio_put(folio, true); return ERR_PTR(-EIO); } - return page; + return folio; } /* @@ -1413,57 +1326,57 @@ repeat: * * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). - * Note that, ipage is set only by make_empty_dir, and if any error occur, - * ipage should be released by this function. + * Note that, ifolio is set only by make_empty_dir, and if any error occur, + * ifolio should be released by this function. */ -struct page *f2fs_get_new_data_page(struct inode *inode, - struct page *ipage, pgoff_t index, bool new_i_size) +struct folio *f2fs_get_new_data_folio(struct inode *inode, + struct folio *ifolio, pgoff_t index, bool new_i_size) { struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; struct dnode_of_data dn; int err; - page = f2fs_grab_cache_page(mapping, index, true); - if (!page) { + folio = f2fs_grab_cache_folio(mapping, index, true); + if (IS_ERR(folio)) { /* - * before exiting, we should make sure ipage will be released + * before exiting, we should make sure ifolio will be released * if any error occur. */ - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return ERR_PTR(-ENOMEM); } - set_new_dnode(&dn, inode, ipage, NULL, 0); + set_new_dnode(&dn, inode, ifolio, NULL, 0); err = f2fs_reserve_block(&dn, index); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return ERR_PTR(err); } - if (!ipage) + if (!ifolio) f2fs_put_dnode(&dn); - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) goto got_it; if (dn.data_blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); + folio_zero_segment(folio, 0, folio_size(folio)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); } else { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); - /* if ipage exists, blkaddr should be NEW_ADDR */ - f2fs_bug_on(F2FS_I_SB(inode), ipage); - page = f2fs_get_lock_data_page(inode, index, true); - if (IS_ERR(page)) - return page; + /* if ifolio exists, blkaddr should be NEW_ADDR */ + f2fs_bug_on(F2FS_I_SB(inode), ifolio); + folio = f2fs_get_lock_data_folio(inode, index, true); + if (IS_ERR(folio)) + return folio; } got_it: if (new_i_size && i_size_read(inode) < ((loff_t)(index + 1) << PAGE_SHIFT)) f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); - return page; + return folio; } static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) @@ -1484,26 +1397,28 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = f2fs_data_blkaddr(dn); if (dn->data_blkaddr == NULL_ADDR) { - err = inc_valid_block_count(sbi, dn->inode, &count); + err = inc_valid_block_count(sbi, dn->inode, &count, true); if (unlikely(err)) return err; } set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; - f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, - &sum, seg_type, NULL); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { - invalidate_mapping_pages(META_MAPPING(sbi), - old_blkaddr, old_blkaddr); - f2fs_invalidate_compress_page(sbi, old_blkaddr); - } + err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr, + &dn->data_blkaddr, &sum, seg_type, NULL); + if (err) + return err; + + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); + f2fs_update_data_blkaddr(dn, dn->data_blkaddr); return 0; } static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag) { + f2fs_down_read(&sbi->cp_enable_rwsem); if (flag == F2FS_GET_BLOCK_PRE_AIO) f2fs_down_read(&sbi->node_change); else @@ -1516,6 +1431,7 @@ static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag) f2fs_up_read(&sbi->node_change); else f2fs_unlock_op(sbi); + f2fs_up_read(&sbi->cp_enable_rwsem); } int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index) @@ -1581,14 +1497,33 @@ static bool f2fs_map_blocks_cached(struct inode *inode, struct f2fs_dev_info *dev = &sbi->devs[bidx]; map->m_bdev = dev->bdev; - map->m_pblk -= dev->start_blk; map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); + map->m_pblk -= dev->start_blk; } else { map->m_bdev = inode->i_sb->s_bdev; } return true; } +static bool map_is_mergeable(struct f2fs_sb_info *sbi, + struct f2fs_map_blocks *map, + block_t blkaddr, int flag, int bidx, + int ofs) +{ + if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) + return false; + if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) + return true; + if (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) + return true; + if (flag == F2FS_GET_BLOCK_PRE_DIO) + return true; + if (flag == F2FS_GET_BLOCK_DIO && + map->m_pblk == NULL_ADDR && blkaddr == NULL_ADDR) + return true; + return false; +} + /* * f2fs_map_blocks() tries to find or build mapping relationship which * maps continuous logical blocks to physical blocks, and return such @@ -1608,10 +1543,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) unsigned int start_pgofs; int bidx = 0; bool is_hole; + bool lfs_dio_write; if (!maxblocks) return 0; + lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create); + if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) goto out; @@ -1626,9 +1565,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) pgofs = (pgoff_t)map->m_lblk; end = pgofs + maxblocks; + if (flag == F2FS_GET_BLOCK_PRECACHE) + mode = LOOKUP_NODE_RA; + next_dnode: - if (map->m_may_create) + if (map->m_may_create) { + if (f2fs_lfs_mode(sbi)) + f2fs_balance_fs(sbi, true); f2fs_map_lock(sbi, flag); + } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -1644,7 +1589,7 @@ next_dnode: start_pgofs = pgofs; prealloc = 0; last_ofs_in_node = ofs_in_node = dn.ofs_in_node; - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); next_block: blkaddr = f2fs_data_blkaddr(&dn); @@ -1652,13 +1597,13 @@ next_block: if (!is_hole && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto sync_out; } /* use out-place-update for direct IO under LFS mode */ - if (map->m_may_create && - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { + if (map->m_may_create && (is_hole || + (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + !f2fs_is_pinned_file(inode) && map->m_last_pblk != blkaddr))) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto sync_out; @@ -1691,9 +1636,7 @@ next_block: map->m_flags |= F2FS_MAP_NEW; } else if (is_hole) { if (f2fs_compressed_file(inode) && - f2fs_sanity_check_cluster(&dn) && - (flag != F2FS_GET_BLOCK_FIEMAP || - IS_ENABLED(CONFIG_F2FS_CHECK_FS))) { + f2fs_sanity_check_cluster(&dn)) { err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_CORRUPTED_CLUSTER); @@ -1713,6 +1656,10 @@ next_block: goto sync_out; } break; + case F2FS_GET_BLOCK_DIO: + if (map->m_next_pgofs) + *map->m_next_pgofs = pgofs + 1; + break; default: /* for defragment case */ if (map->m_next_pgofs) @@ -1731,22 +1678,24 @@ next_block: /* reserved delalloc block should be mapped for fiemap. */ if (blkaddr == NEW_ADDR) map->m_flags |= F2FS_MAP_DELALLOC; - map->m_flags |= F2FS_MAP_MAPPED; + /* DIO READ and hole case, should not map the blocks. */ + if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create)) + map->m_flags |= F2FS_MAP_MAPPED; map->m_pblk = blkaddr; map->m_len = 1; if (map->m_multidev_dio) map->m_bdev = FDEV(bidx).bdev; - } else if ((map->m_pblk != NEW_ADDR && - blkaddr == (map->m_pblk + ofs)) || - (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || - flag == F2FS_GET_BLOCK_PRE_DIO) { - if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) - goto sync_out; + + if (lfs_dio_write) + map->m_last_pblk = NULL_ADDR; + } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) { ofs++; map->m_len++; } else { + if (lfs_dio_write && !f2fs_is_pinned_file(inode)) + map->m_last_pblk = blkaddr; goto sync_out; } @@ -1825,12 +1774,13 @@ sync_out: if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; - f2fs_update_read_extent_cache_range(&dn, - start_pgofs, map->m_pblk + ofs, - map->m_len - ofs); + if (map->m_len > ofs) + f2fs_update_read_extent_cache_range(&dn, + start_pgofs, map->m_pblk + ofs, + map->m_len - ofs); } if (map->m_next_extent) - *map->m_next_extent = pgofs + 1; + *map->m_next_extent = is_hole ? pgofs + 1 : pgofs; } f2fs_put_dnode(&dn); unlock_out: @@ -1869,21 +1819,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) return true; } -static inline u64 bytes_to_blks(struct inode *inode, u64 bytes) -{ - return (bytes >> inode->i_blkbits); -} - -static inline u64 blks_to_bytes(struct inode *inode, u64 blks) -{ - return (blks << inode->i_blkbits); -} - static int f2fs_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; struct node_info ni; __u64 phys = 0, len; __u32 flags; @@ -1892,19 +1831,19 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (f2fs_has_inline_xattr(inode)) { int offset; + struct folio *folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), + inode->i_ino, false); - page = f2fs_grab_cache_page(NODE_MAPPING(sbi), - inode->i_ino, false); - if (!page) - return -ENOMEM; + if (IS_ERR(folio)) + return PTR_ERR(folio); err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } - phys = blks_to_bytes(inode, ni.blk_addr); + phys = F2FS_BLK_TO_BYTES(ni.blk_addr); offset = offsetof(struct f2fs_inode, i_addr) + sizeof(__le32) * (DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)); @@ -1912,7 +1851,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, phys += offset; len = inline_xattr_size(inode); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED; @@ -1926,20 +1865,22 @@ static int f2fs_xattr_fiemap(struct inode *inode, } if (xnid) { - page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false); - if (!page) - return -ENOMEM; + struct folio *folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), + xnid, false); + + if (IS_ERR(folio)) + return PTR_ERR(folio); err = f2fs_get_node_info(sbi, xnid, &ni, false); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } - phys = blks_to_bytes(inode, ni.blk_addr); + phys = F2FS_BLK_TO_BYTES(ni.blk_addr); len = inode->i_sb->s_blocksize; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); flags = FIEMAP_EXTENT_LAST; } @@ -1952,30 +1893,11 @@ static int f2fs_xattr_fiemap(struct inode *inode, return (err < 0 ? err : 0); } -static loff_t max_inode_blocks(struct inode *inode) -{ - loff_t result = ADDRS_PER_INODE(inode); - loff_t leaf_count = ADDRS_PER_BLOCK(inode); - - /* two direct node blocks */ - result += (leaf_count * 2); - - /* two indirect node blocks */ - leaf_count *= NIDS_PER_BLOCK; - result += (leaf_count * 2); - - /* one double indirect node block */ - leaf_count *= NIDS_PER_BLOCK; - result += leaf_count; - - return result; -} - int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { struct f2fs_map_blocks map; - sector_t start_blk, last_blk; + sector_t start_blk, last_blk, blk_len, max_len; pgoff_t next_pgofs; u64 logical = 0, phys = 0, size = 0; u32 flags = 0; @@ -1995,9 +1917,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (ret) return ret; - inode_lock(inode); + inode_lock_shared(inode); - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); if (start > maxbytes) { ret = -EFBIG; goto out; @@ -2017,16 +1939,15 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - if (bytes_to_blks(inode, len) == 0) - len = blks_to_bytes(inode, 1); - - start_blk = bytes_to_blks(inode, start); - last_blk = bytes_to_blks(inode, start + len - 1); + start_blk = F2FS_BYTES_TO_BLK(start); + last_blk = F2FS_BYTES_TO_BLK(start + len - 1); + blk_len = last_blk - start_blk + 1; + max_len = F2FS_BYTES_TO_BLK(maxbytes) - start_blk; next: memset(&map, 0, sizeof(map)); map.m_lblk = start_blk; - map.m_len = bytes_to_blks(inode, len); + map.m_len = blk_len; map.m_next_pgofs = &next_pgofs; map.m_seg_type = NO_CHECK_TYPE; @@ -2043,13 +1964,23 @@ next: if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) { start_blk = next_pgofs; - if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode, - max_inode_blocks(inode))) + if (F2FS_BLK_TO_BYTES(start_blk) < maxbytes) goto prep_next; flags |= FIEMAP_EXTENT_LAST; } + /* + * current extent may cross boundary of inquiry, increase len to + * requery. + */ + if (!compr_cluster && (map.m_flags & F2FS_MAP_MAPPED) && + map.m_lblk + map.m_len - 1 == last_blk && + blk_len != max_len) { + blk_len = max_len; + goto next; + } + compr_appended = false; /* In a case of compressed cluster, append this to the last extent */ if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) || @@ -2081,14 +2012,14 @@ skip_fill: } else if (compr_appended) { unsigned int appended_blks = cluster_size - count_in_cluster + 1; - size += blks_to_bytes(inode, appended_blks); + size += F2FS_BLK_TO_BYTES(appended_blks); start_blk += appended_blks; compr_cluster = false; } else { - logical = blks_to_bytes(inode, start_blk); + logical = F2FS_BLK_TO_BYTES(start_blk); phys = __is_valid_data_blkaddr(map.m_pblk) ? - blks_to_bytes(inode, map.m_pblk) : 0; - size = blks_to_bytes(inode, map.m_len); + F2FS_BLK_TO_BYTES(map.m_pblk) : 0; + size = F2FS_BLK_TO_BYTES(map.m_len); flags = 0; if (compr_cluster) { @@ -2096,13 +2027,13 @@ skip_fill: count_in_cluster += map.m_len; if (count_in_cluster == cluster_size) { compr_cluster = false; - size += blks_to_bytes(inode, 1); + size += F2FS_BLKSIZE; } } else if (map.m_flags & F2FS_MAP_DELALLOC) { flags = FIEMAP_EXTENT_UNWRITTEN; } - start_blk += bytes_to_blks(inode, size); + start_blk += F2FS_BYTES_TO_BLK(size); } prep_next: @@ -2115,37 +2046,43 @@ out: if (ret == 1) ret = 0; - inode_unlock(inode); + inode_unlock_shared(inode); return ret; } static inline loff_t f2fs_readpage_limit(struct inode *inode) { if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) - return inode->i_sb->s_maxbytes; + return F2FS_BLK_TO_BYTES(max_file_blocks(inode)); return i_size_read(inode); } -static int f2fs_read_single_page(struct inode *inode, struct page *page, +static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac) +{ + return rac ? REQ_RAHEAD : 0; +} + +static int f2fs_read_single_page(struct inode *inode, struct folio *folio, unsigned nr_pages, struct f2fs_map_blocks *map, struct bio **bio_ret, sector_t *last_block_in_bio, - bool is_readahead) + struct readahead_control *rac) { struct bio *bio = *bio_ret; - const unsigned blocksize = blks_to_bytes(inode, 1); + const unsigned int blocksize = F2FS_BLKSIZE; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; sector_t block_nr; + pgoff_t index = folio->index; int ret = 0; - block_in_file = (sector_t)page_index(page); + block_in_file = (sector_t)index; last_block = block_in_file + nr_pages; - last_block_in_file = bytes_to_blks(inode, - f2fs_readpage_limit(inode) + blocksize - 1); + last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) + + blocksize - 1); if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -2173,26 +2110,24 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, got_it: if ((map->m_flags & F2FS_MAP_MAPPED)) { block_nr = map->m_pblk + block_in_file - map->m_lblk; - SetPageMappedToDisk(page); + folio_set_mappedtodisk(folio); if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { ret = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), - ERROR_INVALID_BLKADDR); goto out; } } else { zero_out: - zero_user_segment(page, 0, PAGE_SIZE); - if (f2fs_need_verity(inode, page->index) && - !fsverity_verify_page(page)) { + folio_zero_segment(folio, 0, folio_size(folio)); + if (f2fs_need_verity(inode, index) && + !fsverity_verify_folio(folio)) { ret = -EIO; goto out; } - if (!PageUptodate(page)) - SetPageUptodate(page); - unlock_page(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + folio_unlock(folio); goto out; } @@ -2202,21 +2137,15 @@ zero_out: */ if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio, *last_block_in_bio, block_nr) || - !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { + !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } - if (bio == NULL) { + if (bio == NULL) bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, page->index, + f2fs_ra_op_flags(rac), index, false); - if (IS_ERR(bio)) { - ret = PTR_ERR(bio); - bio = NULL; - goto out; - } - } /* * If the page is under writeback, we need to wait for @@ -2224,7 +2153,7 @@ submit_and_realloc: */ f2fs_wait_on_block_writeback(inode, block_nr); - if (bio_add_page(bio, page, blocksize, 0) < blocksize) + if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); @@ -2239,7 +2168,7 @@ out: #ifdef CONFIG_F2FS_FS_COMPRESSION int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead, bool for_write) + struct readahead_control *rac, bool for_write) { struct dnode_of_data dn; struct inode *inode = cc->inode; @@ -2247,34 +2176,43 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, struct bio *bio = *bio_ret; unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; sector_t last_block_in_file; - const unsigned blocksize = blks_to_bytes(inode, 1); + const unsigned int blocksize = F2FS_BLKSIZE; struct decompress_io_ctx *dic = NULL; struct extent_info ei = {}; bool from_dnode = true; int i; int ret = 0; + if (unlikely(f2fs_cp_error(sbi))) { + ret = -EIO; + from_dnode = false; + goto out_put_dnode; + } + f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); - last_block_in_file = bytes_to_blks(inode, - f2fs_readpage_limit(inode) + blocksize - 1); + last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) + + blocksize - 1); /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { struct page *page = cc->rpages[i]; + struct folio *folio; if (!page) continue; - if ((sector_t)page->index >= last_block_in_file) { - zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); - } else if (!PageUptodate(page)) { + + folio = page_folio(page); + if ((sector_t)folio->index >= last_block_in_file) { + folio_zero_segment(folio, 0, folio_size(folio)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + } else if (!folio_test_uptodate(folio)) { continue; } - unlock_page(page); + folio_unlock(folio); if (for_write) - put_page(page); + folio_put(folio); cc->rpages[i] = NULL; cc->nr_rpages--; } @@ -2294,17 +2232,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (ret) goto out; - if (unlikely(f2fs_cp_error(sbi))) { - ret = -EIO; - goto out_put_dnode; - } f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR); skip_reading_dnode: for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; - blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, + blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_folio, dn.ofs_in_node + i) : ei.blk + i - 1; @@ -2334,44 +2268,38 @@ skip_reading_dnode: } for (i = 0; i < cc->nr_cpages; i++) { - struct page *page = dic->cpages[i]; + struct folio *folio = page_folio(dic->cpages[i]); block_t blkaddr; struct bio_post_read_ctx *ctx; - blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, + blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_folio, dn.ofs_in_node + i + 1) : ei.blk + i; f2fs_wait_on_block_writeback(inode, blkaddr); - if (f2fs_load_compressed_page(sbi, page, blkaddr)) { - if (atomic_dec_and_test(&dic->remaining_pages)) + if (f2fs_load_compressed_folio(sbi, folio, blkaddr)) { + if (atomic_dec_and_test(&dic->remaining_pages)) { f2fs_decompress_cluster(dic, true); + break; + } continue; } if (bio && (!page_is_mergeable(sbi, bio, *last_block_in_bio, blkaddr) || - !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { + !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(sbi, bio, DATA); bio = NULL; } - if (!bio) { - bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, - page->index, for_write); - if (IS_ERR(bio)) { - ret = PTR_ERR(bio); - f2fs_decompress_end_io(dic, ret, true); - f2fs_put_dnode(&dn); - *bio_ret = NULL; - return ret; - } - } + if (!bio) + bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i, + f2fs_ra_op_flags(rac), + folio->index, for_write); - if (bio_add_page(bio, page, blocksize, 0) < blocksize) + if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; ctx = get_post_read_ctx(bio); @@ -2409,7 +2337,7 @@ out: * Major change was from block_size == page_size in f2fs by default. */ static int f2fs_mpage_readpages(struct inode *inode, - struct readahead_control *rac, struct page *page) + struct readahead_control *rac, struct folio *folio) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -2426,11 +2354,20 @@ static int f2fs_mpage_readpages(struct inode *inode, .nr_cpages = 0, }; pgoff_t nc_cluster_idx = NULL_CLUSTER; + pgoff_t index; #endif unsigned nr_pages = rac ? readahead_count(rac) : 1; unsigned max_nr_pages = nr_pages; int ret = 0; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + index = rac ? readahead_index(rac) : folio->index; + max_nr_pages = round_up(index + nr_pages, cc.cluster_size) - + round_down(index, cc.cluster_size); + } +#endif + map.m_pblk = 0; map.m_lblk = 0; map.m_len = 0; @@ -2442,64 +2379,63 @@ static int f2fs_mpage_readpages(struct inode *inode, for (; nr_pages; nr_pages--) { if (rac) { - page = readahead_page(rac); - prefetchw(&page->flags); + folio = readahead_folio(rac); + prefetchw(&folio->flags); } #ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_compressed_file(inode)) { - /* there are remained compressed pages, submit them */ - if (!f2fs_cluster_can_merge_page(&cc, page->index)) { - ret = f2fs_read_multi_pages(&cc, &bio, - max_nr_pages, - &last_block_in_bio, - rac != NULL, false); - f2fs_destroy_compress_ctx(&cc, false); - if (ret) - goto set_error_page; - } - if (cc.cluster_idx == NULL_CLUSTER) { - if (nc_cluster_idx == - page->index >> cc.log_cluster_size) { - goto read_single_page; - } - - ret = f2fs_is_compressed_cluster(inode, page->index); - if (ret < 0) - goto set_error_page; - else if (!ret) { - nc_cluster_idx = - page->index >> cc.log_cluster_size; - goto read_single_page; - } - - nc_cluster_idx = NULL_CLUSTER; - } - ret = f2fs_init_compress_ctx(&cc); + index = folio->index; + + if (!f2fs_compressed_file(inode)) + goto read_single_page; + + /* there are remained compressed pages, submit them */ + if (!f2fs_cluster_can_merge_page(&cc, index)) { + ret = f2fs_read_multi_pages(&cc, &bio, + max_nr_pages, + &last_block_in_bio, + rac, false); + f2fs_destroy_compress_ctx(&cc, false); if (ret) goto set_error_page; + } + if (cc.cluster_idx == NULL_CLUSTER) { + if (nc_cluster_idx == index >> cc.log_cluster_size) + goto read_single_page; - f2fs_compress_ctx_add_page(&cc, page); + ret = f2fs_is_compressed_cluster(inode, index); + if (ret < 0) + goto set_error_page; + else if (!ret) { + nc_cluster_idx = + index >> cc.log_cluster_size; + goto read_single_page; + } - goto next_page; + nc_cluster_idx = NULL_CLUSTER; } + ret = f2fs_init_compress_ctx(&cc); + if (ret) + goto set_error_page; + + f2fs_compress_ctx_add_page(&cc, folio); + + goto next_page; read_single_page: #endif - ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, + ret = f2fs_read_single_page(inode, folio, max_nr_pages, &map, &bio, &last_block_in_bio, rac); if (ret) { #ifdef CONFIG_F2FS_FS_COMPRESSION set_error_page: #endif - zero_user_segment(page, 0, PAGE_SIZE); - unlock_page(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_unlock(folio); } #ifdef CONFIG_F2FS_FS_COMPRESSION next_page: #endif - if (rac) - put_page(page); #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { @@ -2508,7 +2444,7 @@ next_page: ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - rac != NULL, false); + rac, false); f2fs_destroy_compress_ctx(&cc, false); } } @@ -2521,22 +2457,21 @@ next_page: static int f2fs_read_data_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = folio->mapping->host; int ret = -EAGAIN; - trace_f2fs_readpage(page, DATA); + trace_f2fs_readpage(folio, DATA); if (!f2fs_is_compress_backend_ready(inode)) { - unlock_page(page); + folio_unlock(folio); return -EOPNOTSUPP; } /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) - ret = f2fs_read_inline_data(inode, page); + ret = f2fs_read_inline_data(inode, folio); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(inode, NULL, page); + ret = f2fs_mpage_readpages(inode, NULL, folio); return ret; } @@ -2558,8 +2493,9 @@ static void f2fs_readahead(struct readahead_control *rac) int f2fs_encrypt_one_page(struct f2fs_io_info *fio) { - struct inode *inode = fio->page->mapping->host; - struct page *mpage, *page; + struct inode *inode = fio_inode(fio); + struct folio *mfolio; + struct page *page; gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) @@ -2567,14 +2503,11 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) page = fio->compressed_page ? fio->compressed_page : fio->page; - /* wait for GCed page writeback via META_MAPPING */ - f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); - if (fscrypt_inode_uses_inline_crypto(inode)) return 0; retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page, + fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page_folio(page), PAGE_SIZE, 0, gfp_flags); if (IS_ERR(fio->encrypted_page)) { /* flush pending IOs and wait for a while in the ENOMEM case */ @@ -2587,12 +2520,12 @@ retry_encrypt: return PTR_ERR(fio->encrypted_page); } - mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr); - if (mpage) { - if (PageUptodate(mpage)) - memcpy(page_address(mpage), + mfolio = filemap_lock_folio(META_MAPPING(fio->sbi), fio->old_blkaddr); + if (!IS_ERR(mfolio)) { + if (folio_test_uptodate(mfolio)) + memcpy(folio_address(mfolio), page_address(fio->encrypted_page), PAGE_SIZE); - f2fs_put_page(mpage, 1); + f2fs_folio_put(mfolio, true); } return 0; } @@ -2664,7 +2597,12 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_NOQUOTA(inode)) return true; - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) + return true; + /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */ + if (f2fs_compressed_file(inode) && + F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER && + is_inode_flag_set(inode, FI_ENABLE_COMPRESS)) return true; /* swap file is migrating in aligned write mode */ @@ -2677,8 +2615,6 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) if (fio) { if (page_private_gcing(fio->page)) return true; - if (page_private_dummy(fio->page)) - return true; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) return true; @@ -2688,7 +2624,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) static inline bool need_inplace_update(struct f2fs_io_info *fio) { - struct inode *inode = fio->page->mapping->host; + struct inode *inode = fio_inode(fio); if (f2fs_should_update_outplace(inode, fio)) return false; @@ -2698,28 +2634,28 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) int f2fs_do_write_data_page(struct f2fs_io_info *fio) { - struct page *page = fio->page; - struct inode *inode = page->mapping->host; + struct folio *folio = fio->folio; + struct inode *inode = folio->mapping->host; struct dnode_of_data dn; struct node_info ni; bool ipu_force = false; + bool atomic_commit; int err = 0; /* Use COW inode to make dnode_of_data for atomic write */ - if (f2fs_is_atomic_file(inode)) + atomic_commit = f2fs_is_atomic_file(inode) && + folio_test_f2fs_atomic(folio); + if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_inplace_update(fio) && - f2fs_lookup_read_extent_cache_block(inode, page->index, + f2fs_lookup_read_extent_cache_block(inode, folio->index, &fio->old_blkaddr)) { if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC_ENHANCE)) { - f2fs_handle_error(fio->sbi, - ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE)) return -EFSCORRUPTED; - } ipu_force = true; fio->need_lock = LOCK_DONE; @@ -2730,7 +2666,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) return -EAGAIN; - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); if (err) goto out; @@ -2738,8 +2674,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { - ClearPageUptodate(page); - clear_page_private_gcing(page); + folio_clear_uptodate(folio); + folio_clear_f2fs_gcing(folio); goto out_writepage; } got_it: @@ -2747,10 +2683,13 @@ got_it: !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; - f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); goto out_writepage; } + /* wait for GCed page writeback via META_MAPPING */ + if (fio->meta_gc) + f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); + /* * If current allocation needs SSR, * it had better in-place writes for updated data. @@ -2762,7 +2701,7 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_put_dnode(&dn); if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); @@ -2770,12 +2709,11 @@ got_it: if (err) { if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); - if (PageWriteback(page)) - end_page_writeback(page); + folio_end_writeback(folio); } else { set_inode_flag(inode, FI_UPDATE_WRITE); } - trace_f2fs_do_write_data_page(fio->page, IPU); + trace_f2fs_do_write_data_page(folio, IPU); return err; } @@ -2797,17 +2735,17 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); - trace_f2fs_do_write_data_page(page, OPU); + trace_f2fs_do_write_data_page(folio, OPU); set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + if (atomic_commit) + folio_clear_f2fs_atomic(folio); out_writepage: f2fs_put_dnode(&dn); out: @@ -2816,7 +2754,7 @@ out: return err; } -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, @@ -2824,12 +2762,12 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, int compr_blocks, bool allow_balance) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long)i_size) >> PAGE_SHIFT; - loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; bool quota_inode = IS_NOQUOTA(inode); @@ -2841,23 +2779,23 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), .old_blkaddr = NULL_ADDR, - .page = page, + .folio = folio, .encrypted_page = NULL, .submitted = 0, .compr_blocks = compr_blocks, - .need_lock = LOCK_RETRY, - .post_read = f2fs_post_read_required(inode) ? 1 : 0, + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY, + .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, .bio = bio, .last_block = last_block, }; - trace_f2fs_writepage(page, DATA); + trace_f2fs_writepage(folio, DATA); /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(page->mapping, -EIO); + mapping_set_error(folio->mapping, -EIO); /* * don't drop any dirty dentry pages for keeping lastest * directory structure. @@ -2875,7 +2813,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index || + if (folio->index < end_index || f2fs_verity_in_progress(inode) || compr_blocks) goto write; @@ -2885,14 +2823,11 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, * this page does not have to be written to disk. */ offset = i_size & (PAGE_SIZE - 1); - if ((page->index >= end_index + 1) || !offset) + if ((folio->index >= end_index + 1) || !offset) goto out; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); write: - if (f2fs_is_drop_cache(inode)) - goto out; - /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || quota_inode) { /* @@ -2912,16 +2847,10 @@ write: goto done; } - if (!wbc->for_reclaim) - need_balance_fs = true; - else if (has_not_enough_free_secs(sbi, 0, 0)) - goto redirty_out; - else - set_inode_flag(inode, FI_HOT_DATA); - + need_balance_fs = true; err = -EAGAIN; if (f2fs_has_inline_data(inode)) { - err = f2fs_write_inline_data(inode, page); + err = f2fs_write_inline_data(inode, folio); if (!err) goto out; } @@ -2929,6 +2858,7 @@ write: if (err == -EAGAIN) { err = f2fs_do_write_data_page(&fio); if (err == -EAGAIN) { + f2fs_bug_on(sbi, compr_blocks); fio.need_lock = LOCK_REQ; err = f2fs_do_write_data_page(&fio); } @@ -2950,17 +2880,10 @@ done: out: inode_dec_dirty_pages(inode); if (err) { - ClearPageUptodate(page); - clear_page_private_gcing(page); - } - - if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); - clear_inode_flag(inode, FI_HOT_DATA); - f2fs_remove_dirty_inode(inode); - submitted = NULL; + folio_clear_uptodate(folio); + folio_clear_f2fs_gcing(folio); } - unlock_page(page); + folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && !F2FS_I(inode)->wb_task && allow_balance) f2fs_balance_fs(sbi, need_balance_fs); @@ -2978,41 +2901,19 @@ out: return 0; redirty_out: - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); /* * pageout() in MM translates EAGAIN, so calls handle_write_error() * -> mapping_set_error() -> set_bit(AS_EIO, ...). * file_write_and_wait_range() will see EIO error, which is critical * to return value of fsync() followed by atomic_write failure to user. */ - if (!err || wbc->for_reclaim) - return AOP_WRITEPAGE_ACTIVATE; - unlock_page(page); + folio_unlock(folio); + if (!err) + return 1; return err; } -static int f2fs_write_data_page(struct page *page, - struct writeback_control *wbc) -{ -#ifdef CONFIG_F2FS_FS_COMPRESSION - struct inode *inode = page->mapping->host; - - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) - goto out; - - if (f2fs_compressed_file(inode)) { - if (f2fs_is_compressed_cluster(inode, page->index)) { - redirty_page_for_writepage(wbc, page); - return AOP_WRITEPAGE_ACTIVATE; - } - } -out: -#endif - - return f2fs_write_single_data_page(page, NULL, NULL, NULL, - wbc, FS_DATA_IO, 0, true); -} - /* * This function was copied from write_cache_pages from mm/page-writeback.c. * The major change is making write step of cold data page separately from @@ -3024,7 +2925,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, { int ret = 0; int done = 0, retry = 0; - struct page *pages[F2FS_ONSTACK_PAGES]; + struct page *pages_local[F2FS_ONSTACK_PAGES]; + struct page **pages = pages_local; struct folio_batch fbatch; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); struct bio *bio = NULL; @@ -3048,6 +2950,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, #endif int nr_folios, p, idx; int nr_pages; + unsigned int max_pages = F2FS_ONSTACK_PAGES; pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; @@ -3057,6 +2960,15 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int submitted = 0; int i; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode) && + 1 << cc.log_cluster_size > F2FS_ONSTACK_PAGES) { + pages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc.log_cluster_size, GFP_NOFS | __GFP_NOFAIL); + max_pages = 1 << cc.log_cluster_size; + } +#endif + folio_batch_init(&fbatch); if (get_dirty_pages(mapping->host) <= @@ -3074,10 +2986,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; } - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) - tag = PAGECACHE_TAG_TOWRITE; - else - tag = PAGECACHE_TAG_DIRTY; + tag = wbc_to_tag(wbc); retry: retry = 0; if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) @@ -3102,7 +3011,7 @@ again: add_more: pages[nr_pages] = folio_page(folio, idx); folio_get(folio); - if (++nr_pages == F2FS_ONSTACK_PAGES) { + if (++nr_pages == max_pages) { index = folio->index + idx + 1; folio_batch_release(&fbatch); goto write; @@ -3195,7 +3104,7 @@ continue_unlock: if (folio_test_writeback(folio)) { if (wbc->sync_mode == WB_SYNC_NONE) goto continue_unlock; - f2fs_wait_on_page_writeback(&folio->page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); } if (!folio_clear_dirty_for_io(folio)) @@ -3204,15 +3113,14 @@ continue_unlock: #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { folio_get(folio); - f2fs_compress_ctx_add_page(&cc, &folio->page); + f2fs_compress_ctx_add_page(&cc, folio); continue; } #endif - ret = f2fs_write_single_data_page(&folio->page, + submitted = 0; + ret = f2fs_write_single_data_page(folio, &submitted, &bio, &last_block, wbc, io_type, 0, true); - if (ret == AOP_WRITEPAGE_ACTIVATE) - folio_unlock(folio); #ifdef CONFIG_F2FS_FS_COMPRESSION result: #endif @@ -3224,20 +3132,19 @@ result: * keep nr_to_write, since vfs uses this to * get # of written pages. */ - if (ret == AOP_WRITEPAGE_ACTIVATE) { + if (ret == 1) { ret = 0; goto next; } else if (ret == -EAGAIN) { ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) { - f2fs_io_schedule_timeout( - DEFAULT_IO_TIMEOUT); + f2fs_schedule_timeout( + DEFAULT_SCHEDULE_TIMEOUT); goto retry_write; } goto next; } - done_index = folio->index + - folio_nr_pages(folio); + done_index = folio_next_index(folio); done = 1; break; } @@ -3285,6 +3192,11 @@ next: if (bio) f2fs_submit_merged_ipu_write(sbi, &bio, NULL); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (pages != pages_local) + kfree(pages); +#endif + return ret; } @@ -3309,6 +3221,19 @@ static inline bool __should_serialize_io(struct inode *inode, return false; } +static inline void account_writeback(struct inode *inode, bool inc) +{ + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return; + + f2fs_down_read(&F2FS_I(inode)->i_sem); + if (inc) + atomic_inc(&F2FS_I(inode)->writeback); + else + atomic_dec(&F2FS_I(inode)->writeback); + f2fs_up_read(&F2FS_I(inode)->i_sem); +} + static int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type) @@ -3319,10 +3244,6 @@ static int __f2fs_write_data_pages(struct address_space *mapping, int ret; bool locked = false; - /* deal with chardevs and other special file */ - if (!mapping->a_ops->writepage) - return 0; - /* skip writing if there is no dirty page in this inode */ if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) return 0; @@ -3358,10 +3279,14 @@ static int __f2fs_write_data_pages(struct address_space *mapping, locked = true; } + account_writeback(inode, true); + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug); + account_writeback(inode, false); + if (locked) mutex_unlock(&sbi->writepages); @@ -3412,13 +3337,13 @@ void f2fs_write_failed(struct inode *inode, loff_t to) } static int prepare_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed) { - struct inode *inode = page->mapping->host; - pgoff_t index = page->index; + struct inode *inode = folio->mapping->host; + pgoff_t index = folio->index; struct dnode_of_data dn; - struct page *ipage; + struct folio *ifolio; bool locked = false; int flag = F2FS_GET_BLOCK_PRE_AIO; int err = 0; @@ -3443,29 +3368,34 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, restart: /* check inline_data */ - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); goto unlock_out; } - set_new_dnode(&dn, inode, ipage, ipage, 0); + set_new_dnode(&dn, inode, ifolio, ifolio, 0); if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { - f2fs_do_read_inline_data(page, ipage); + f2fs_do_read_inline_data(folio, ifolio); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) - set_page_private_inline(ipage); + folio_set_f2fs_inline(ifolio); goto out; } - err = f2fs_convert_inline_page(&dn, page); + err = f2fs_convert_inline_folio(&dn, folio); if (err || dn.data_blkaddr != NULL_ADDR) goto out; } if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { + if (IS_DEVICE_ALIASING(inode)) { + err = -ENODATA; + goto out; + } + if (locked) { err = f2fs_reserve_block(&dn, index); goto out; @@ -3498,14 +3428,14 @@ static int __find_data_block(struct inode *inode, pgoff_t index, block_t *blk_addr) { struct dnode_of_data dn; - struct page *ipage; + struct folio *ifolio; int err = 0; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); - set_new_dnode(&dn, inode, ipage, ipage, 0); + set_new_dnode(&dn, inode, ifolio, ifolio, 0); if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { @@ -3526,17 +3456,17 @@ static int __reserve_data_block(struct inode *inode, pgoff_t index, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - struct page *ipage; + struct folio *ifolio; int err = 0; f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO); - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); goto unlock_out; } - set_new_dnode(&dn, inode, ipage, ipage, 0); + set_new_dnode(&dn, inode, ifolio, ifolio, 0); if (!f2fs_lookup_read_extent_cache_block(dn.inode, index, &dn.data_blkaddr)) @@ -3552,12 +3482,12 @@ unlock_out: } static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned int len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed, bool *use_cow) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct inode *cow_inode = F2FS_I(inode)->cow_inode; - pgoff_t index = page->index; + pgoff_t index = folio->index; int err = 0; block_t ori_blk_addr = NULL_ADDR; @@ -3594,13 +3524,15 @@ reserve_block: return 0; } -static int f2fs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, struct page **pagep, void **fsdata) +static int f2fs_write_begin(const struct kiocb *iocb, + struct address_space *mapping, + loff_t pos, unsigned len, struct folio **foliop, + void **fsdata) { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page = NULL; - pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; + struct folio *folio; + pgoff_t index = pos >> PAGE_SHIFT; bool need_balance = false; bool use_cow = false; block_t blkaddr = NULL_ADDR; @@ -3616,7 +3548,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, /* * We should check this at this moment to avoid deadlock on inode page * and #0 page. The locking rule for inline_data conversion should be: - * lock_page(page #0) -> lock_page(inode_page) + * folio_lock(folio #0) -> folio_lock(inode_page) */ if (index != 0) { err = f2fs_convert_inline_inode(inode); @@ -3627,18 +3559,20 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { int ret; + struct page *page; *fsdata = NULL; if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode))) goto repeat; - ret = f2fs_prepare_compress_overwrite(inode, pagep, + ret = f2fs_prepare_compress_overwrite(inode, &page, index, fsdata); if (ret < 0) { err = ret; goto fail; } else if (ret) { + *foliop = page_folio(page); return 0; } } @@ -3646,92 +3580,93 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, repeat: /* - * Do not use grab_cache_page_write_begin() to avoid deadlock due to - * wait_for_stable_page. Will wait that below with our IO control. + * Do not use FGP_STABLE to avoid deadlock. + * Will wait that below with our IO control. */ - page = f2fs_pagecache_get_page(mapping, index, - FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); - if (!page) { - err = -ENOMEM; + folio = f2fs_filemap_get_folio(mapping, index, + FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto fail; } /* TODO: cluster can be compressed due to race with .writepage */ - *pagep = page; + *foliop = folio; if (f2fs_is_atomic_file(inode)) - err = prepare_atomic_write_begin(sbi, page, pos, len, + err = prepare_atomic_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance, &use_cow); else - err = prepare_write_begin(sbi, page, pos, len, + err = prepare_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); if (err) - goto fail; + goto put_folio; if (need_balance && !IS_NOQUOTA(inode) && has_not_enough_free_secs(sbi, 0, 0)) { - unlock_page(page); + folio_unlock(folio); f2fs_balance_fs(sbi, true); - lock_page(page); - if (page->mapping != mapping) { - /* The page got truncated from under us */ - f2fs_put_page(page, 1); + folio_lock(folio); + if (folio->mapping != mapping) { + /* The folio got truncated from under us */ + folio_unlock(folio); + folio_put(folio); goto repeat; } } - f2fs_wait_on_page_writeback(page, DATA, false, true); + f2fs_folio_wait_writeback(folio, DATA, false, true); - if (len == PAGE_SIZE || PageUptodate(page)) + if (len == folio_size(folio) || folio_test_uptodate(folio)) return 0; if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && !f2fs_verity_in_progress(inode)) { - zero_user_segment(page, len, PAGE_SIZE); + folio_zero_segment(folio, len, folio_size(folio)); return 0; } if (blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); } else { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); - goto fail; + goto put_folio; } - err = f2fs_submit_page_read(use_cow ? - F2FS_I(inode)->cow_inode : inode, page, - blkaddr, 0, true); - if (err) - goto fail; - - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); + f2fs_submit_page_read(use_cow ? + F2FS_I(inode)->cow_inode : inode, + folio, blkaddr, 0, true); + + folio_lock(folio); + if (unlikely(folio->mapping != mapping)) { + folio_unlock(folio); + folio_put(folio); goto repeat; } - if (unlikely(!PageUptodate(page))) { + if (unlikely(!folio_test_uptodate(folio))) { err = -EIO; - goto fail; + goto put_folio; } } return 0; +put_folio: + f2fs_folio_put(folio, true); fail: - f2fs_put_page(page, 1); f2fs_write_failed(inode, pos + len); return err; } -static int f2fs_write_end(struct file *file, +static int f2fs_write_end(const struct kiocb *iocb, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; trace_f2fs_write_end(inode, pos, len, copied); @@ -3740,17 +3675,17 @@ static int f2fs_write_end(struct file *file, * should be PAGE_SIZE. Otherwise, we treat it with zero copied and * let generic_perform_write() try to copy data again through copied=0. */ - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { if (unlikely(copied != len)) copied = 0; else - SetPageUptodate(page); + folio_mark_uptodate(folio); } #ifdef CONFIG_F2FS_FS_COMPRESSION /* overwrite compressed file */ if (f2fs_compressed_file(inode) && fsdata) { - f2fs_compress_write_end(inode, fsdata, page->index, copied); + f2fs_compress_write_end(inode, fsdata, folio->index, copied); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (pos + copied > i_size_read(inode) && @@ -3763,7 +3698,10 @@ static int f2fs_write_end(struct file *file, if (!copied) goto unlock_out; - set_page_dirty(page); + folio_mark_dirty(folio); + + if (f2fs_is_atomic_file(inode)) + folio_set_f2fs_atomic(folio); if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { @@ -3773,7 +3711,7 @@ static int f2fs_write_end(struct file *file, pos + copied); } unlock_out: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } @@ -3797,7 +3735,7 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length) f2fs_remove_dirty_inode(inode); } } - clear_page_private_all(&folio->page); + folio_detach_private(folio); } bool f2fs_release_folio(struct folio *folio, gfp_t wait) @@ -3806,7 +3744,7 @@ bool f2fs_release_folio(struct folio *folio, gfp_t wait) if (folio_test_dirty(folio)) return false; - clear_page_private_all(&folio->page); + folio_detach_private(folio); return true; } @@ -3815,7 +3753,7 @@ static bool f2fs_dirty_data_folio(struct address_space *mapping, { struct inode *inode = mapping->host; - trace_f2fs_set_page_dirty(&folio->page, DATA); + trace_f2fs_set_page_dirty(folio, DATA); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); @@ -3900,38 +3838,48 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int blkofs; unsigned int blk_per_sec = BLKS_PER_SEC(sbi); + unsigned int end_blk = start_blk + blkcnt - 1; unsigned int secidx = start_blk / blk_per_sec; - unsigned int end_sec = secidx + blkcnt / blk_per_sec; + unsigned int end_sec; int ret = 0; + if (!blkcnt) + return 0; + end_sec = end_blk / blk_per_sec; + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); set_inode_flag(inode, FI_OPU_WRITE); - for (; secidx < end_sec; secidx++) { + for (; secidx <= end_sec; secidx++) { + unsigned int blkofs_end = secidx == end_sec ? + end_blk % blk_per_sec : blk_per_sec - 1; + f2fs_down_write(&sbi->pin_sem); - f2fs_lock_op(sbi); - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); - f2fs_unlock_op(sbi); + ret = f2fs_allocate_pinning_section(sbi); + if (ret) { + f2fs_up_write(&sbi->pin_sem); + break; + } set_inode_flag(inode, FI_SKIP_WRITES); - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { - struct page *page; + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { + struct folio *folio; unsigned int blkidx = secidx * blk_per_sec + blkofs; - page = f2fs_get_lock_data_page(inode, blkidx, true); - if (IS_ERR(page)) { + folio = f2fs_get_lock_data_folio(inode, blkidx, true); + if (IS_ERR(folio)) { f2fs_up_write(&sbi->pin_sem); - ret = PTR_ERR(page); + ret = PTR_ERR(folio); goto done; } - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } clear_inode_flag(inode, FI_SKIP_WRITES); @@ -3961,15 +3909,14 @@ static int check_swap_activate(struct swap_info_struct *sis, struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - sector_t cur_lblock; - sector_t last_lblock; - sector_t pblock; - sector_t lowest_pblock = -1; - sector_t highest_pblock = 0; + block_t cur_lblock; + block_t last_lblock; + block_t pblock; + block_t lowest_pblock = -1; + block_t highest_pblock = 0; int nr_extents = 0; - unsigned long nr_pblocks; + unsigned int nr_pblocks; unsigned int blks_per_sec = BLKS_PER_SEC(sbi); - unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1; unsigned int not_aligned = 0; int ret = 0; @@ -3978,7 +3925,7 @@ static int check_swap_activate(struct swap_info_struct *sis, * to be very smart. */ cur_lblock = 0; - last_lblock = bytes_to_blks(inode, i_size_read(inode)); + last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode)); while (cur_lblock < last_lblock && cur_lblock < sis->max) { struct f2fs_map_blocks map; @@ -4007,28 +3954,35 @@ retry: pblock = map.m_pblk; nr_pblocks = map.m_len; - if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || - nr_pblocks & sec_blks_mask) { + if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec || + nr_pblocks % blks_per_sec || + f2fs_is_sequential_zone_area(sbi, pblock)) { + bool last_extent = false; + not_aligned++; nr_pblocks = roundup(nr_pblocks, blks_per_sec); if (cur_lblock + nr_pblocks > sis->max) nr_pblocks -= blks_per_sec; + /* this extent is last one */ if (!nr_pblocks) { - /* this extent is last one */ - nr_pblocks = map.m_len; - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); - goto next; + nr_pblocks = last_lblock - cur_lblock; + last_extent = true; } ret = f2fs_migrate_blocks(inode, cur_lblock, nr_pblocks); - if (ret) + if (ret) { + if (ret == -ENOENT) + ret = -EINVAL; goto out; - goto retry; + } + + if (!last_extent) + goto retry; } -next: + if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; @@ -4054,10 +4008,9 @@ next: cur_lblock = 1; /* force Empty message */ sis->max = cur_lblock; sis->pages = cur_lblock - 1; - sis->highest_bit = cur_lblock - 1; out: if (not_aligned) - f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)", + f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)", not_aligned, blks_per_sec * F2FS_BLKSIZE); return ret; } @@ -4066,17 +4019,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { struct inode *inode = file_inode(file); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + if (f2fs_readonly(sbi->sb)) return -EROFS; - if (f2fs_lfs_mode(F2FS_I_SB(inode))) { - f2fs_err(F2FS_I_SB(inode), - "Swapfile not supported in LFS mode"); + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { + f2fs_err(sbi, "Swapfile not supported in LFS mode"); return -EINVAL; } @@ -4087,6 +4040,10 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (!f2fs_disable_compressed_file(inode)) return -EINVAL; + ret = filemap_fdatawrite(inode->i_mapping); + if (ret < 0) + return ret; + f2fs_precache_extents(inode); ret = check_swap_activate(sis, file, span); @@ -4095,7 +4052,7 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, stat_inc_swapfile_inode(inode); set_inode_flag(inode, FI_PIN_FILE); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); return ret; } @@ -4121,7 +4078,6 @@ static void f2fs_swap_deactivate(struct file *file) const struct address_space_operations f2fs_dblock_aops = { .read_folio = f2fs_read_data_folio, .readahead = f2fs_readahead, - .writepage = f2fs_write_data_page, .writepages = f2fs_write_data_pages, .write_begin = f2fs_write_begin, .write_end = f2fs_write_end, @@ -4134,13 +4090,13 @@ const struct address_space_operations f2fs_dblock_aops = { .swap_deactivate = f2fs_swap_deactivate, }; -void f2fs_clear_page_cache_dirty_tag(struct page *page) +void f2fs_clear_page_cache_dirty_tag(struct folio *folio) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio->mapping; unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - __xa_clear_mark(&mapping->i_pages, page_index(page), + __xa_clear_mark(&mapping->i_pages, folio->index, PAGECACHE_TAG_DIRTY); xa_unlock_irqrestore(&mapping->i_pages, flags); } @@ -4206,22 +4162,33 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { - struct f2fs_map_blocks map = {}; + struct f2fs_map_blocks map = { NULL, }; pgoff_t next_pgofs = 0; int err; - map.m_lblk = bytes_to_blks(inode, offset); - map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1; + map.m_lblk = F2FS_BYTES_TO_BLK(offset); + map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1; map.m_next_pgofs = &next_pgofs; - map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); - if (flags & IOMAP_WRITE) + map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), + inode->i_write_hint); + if (flags & IOMAP_WRITE && iomap->private) { + map.m_last_pblk = (unsigned long)iomap->private; + iomap->private = NULL; + } + + /* + * If the blocks being overwritten are already allocated, + * f2fs_map_lock and f2fs_balance_fs are not necessary. + */ + if ((flags & IOMAP_WRITE) && + !f2fs_overwrite_io(inode, offset, length)) map.m_may_create = true; err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO); if (err) return err; - iomap->offset = blks_to_bytes(inode, map.m_lblk); + iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk); /* * When inline encryption is enabled, sometimes I/O to an encrypted file @@ -4234,29 +4201,41 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, * We should never see delalloc or compressed extents here based on * prior flushing and checks. */ - if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR)) - return -EINVAL; if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR)) return -EINVAL; - if (map.m_pblk != NULL_ADDR) { - iomap->length = blks_to_bytes(inode, map.m_len); + if (map.m_flags & F2FS_MAP_MAPPED) { + if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR)) + return -EINVAL; + + iomap->length = F2FS_BLK_TO_BYTES(map.m_len); iomap->type = IOMAP_MAPPED; iomap->flags |= IOMAP_F_MERGED; iomap->bdev = map.m_bdev; - iomap->addr = blks_to_bytes(inode, map.m_pblk); + iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk); + + if (flags & IOMAP_WRITE && map.m_last_pblk) + iomap->private = (void *)map.m_last_pblk; } else { if (flags & IOMAP_WRITE) return -ENOTBLK; - iomap->length = blks_to_bytes(inode, next_pgofs) - - iomap->offset; - iomap->type = IOMAP_HOLE; + + if (map.m_pblk == NULL_ADDR) { + iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) - + iomap->offset; + iomap->type = IOMAP_HOLE; + } else if (map.m_pblk == NEW_ADDR) { + iomap->length = F2FS_BLK_TO_BYTES(map.m_len); + iomap->type = IOMAP_UNWRITTEN; + } else { + f2fs_bug_on(F2FS_I_SB(inode), 1); + } iomap->addr = IOMAP_NULL_ADDR; } if (map.m_flags & F2FS_MAP_NEW) iomap->flags |= IOMAP_F_NEW; - if ((inode->i_state & I_DIRTY_DATASYNC) || + if ((inode_state_read_once(inode) & I_DIRTY_DATASYNC) || offset + length > i_size_read(inode)) iomap->flags |= IOMAP_F_DIRTY; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 61c35b59126e..032683835569 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,7 +21,7 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static DEFINE_RAW_SPINLOCK(f2fs_stat_lock); +static DEFINE_SPINLOCK(f2fs_stat_lock); #ifdef CONFIG_DEBUG_FS static struct dentry *f2fs_debugfs_root; #endif @@ -41,7 +41,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi) total_vblocks = 0; blks_per_sec = CAP_BLKS_PER_SEC(sbi); hblks_per_sec = blks_per_sec / 2; - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { vblocks = get_valid_blocks(sbi, segno, true); dist = abs(vblocks - hblks_per_sec); bimodal += dist * dist; @@ -60,6 +60,70 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi) } #ifdef CONFIG_DEBUG_FS +static void update_multidevice_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + struct f2fs_dev_stats *dev_stats = si->dev_stats; + int i, j; + + if (!f2fs_is_multi_device(sbi)) + return; + + memset(dev_stats, 0, sizeof(struct f2fs_dev_stats) * sbi->s_ndevs); + for (i = 0; i < sbi->s_ndevs; i++) { + unsigned int start_segno, end_segno; + block_t start_blk, end_blk; + + if (i == 0) { + start_blk = MAIN_BLKADDR(sbi); + end_blk = FDEV(i).end_blk + 1 - SEG0_BLKADDR(sbi); + } else { + start_blk = FDEV(i).start_blk; + end_blk = FDEV(i).end_blk + 1; + } + + start_segno = GET_SEGNO(sbi, start_blk); + end_segno = GET_SEGNO(sbi, end_blk); + + for (j = start_segno; j < end_segno; j++) { + unsigned int seg_blks, sec_blks; + + seg_blks = get_seg_entry(sbi, j)->valid_blocks; + + /* update segment stats */ + if (is_curseg(sbi, j)) + dev_stats[i].devstats[0][DEVSTAT_INUSE]++; + else if (seg_blks == BLKS_PER_SEG(sbi)) + dev_stats[i].devstats[0][DEVSTAT_FULL]++; + else if (seg_blks != 0) + dev_stats[i].devstats[0][DEVSTAT_DIRTY]++; + else if (!test_bit(j, FREE_I(sbi)->free_segmap)) + dev_stats[i].devstats[0][DEVSTAT_FREE]++; + else + dev_stats[i].devstats[0][DEVSTAT_PREFREE]++; + + if (!__is_large_section(sbi) || + (j % SEGS_PER_SEC(sbi)) != 0) + continue; + + sec_blks = get_sec_entry(sbi, j)->valid_blocks; + + /* update section stats */ + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, j))) + dev_stats[i].devstats[1][DEVSTAT_INUSE]++; + else if (sec_blks == BLKS_PER_SEC(sbi)) + dev_stats[i].devstats[1][DEVSTAT_FULL]++; + else if (sec_blks != 0) + dev_stats[i].devstats[1][DEVSTAT_DIRTY]++; + else if (!test_bit(GET_SEC_FROM_SEG(sbi, j), + FREE_I(sbi)->free_secmap)) + dev_stats[i].devstats[1][DEVSTAT_FREE]++; + else + dev_stats[i].devstats[1][DEVSTAT_PREFREE]++; + } + } +} + static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); @@ -100,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; + si->ndonate_files = sbi->donate_files; si->nquota_files = sbi->nquota_files; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->aw_cnt = atomic_read(&sbi->atomic_files); @@ -135,7 +200,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->cur_ckpt_time = sbi->cprc_info.cur_time; si->peak_ckpt_time = sbi->cprc_info.peak_time; spin_unlock(&sbi->cprc_info.stat_lock); - si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; + si->total_count = BLKS_TO_SEGS(sbi, (int)sbi->user_block_count); si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); si->valid_count = valid_user_blocks(sbi); @@ -176,17 +241,17 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->io_skip_bggc = sbi->io_skip_bggc; si->other_skip_bggc = sbi->other_skip_bggc; - si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) + si->util_free = (int)(BLKS_TO_SEGS(sbi, free_user_blocks(sbi))) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) / 2; - si->util_valid = (int)(written_block_count(sbi) >> - sbi->log_blocks_per_seg) + si->util_valid = (int)(BLKS_TO_SEGS(sbi, written_block_count(sbi))) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) / 2; si->util_invalid = 50 - si->util_free - si->util_valid; for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); + si->blkoff[i] = curseg->next_blkoff; si->curseg[i] = curseg->segno; si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno); si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); @@ -208,13 +273,18 @@ static void update_general_status(struct f2fs_sb_info *sbi) if (!blks) continue; - if (blks == sbi->blocks_per_seg) + if (blks == BLKS_PER_SEG(sbi)) si->full_seg[type]++; else si->dirty_seg[type]++; si->valid_blks[type] += blks; } + update_multidevice_stats(sbi); + + for (i = 0; i < MAX_CALL_TYPE; i++) + si->cp_call_count[i] = atomic_read(&sbi->cp_call_count[i]); + for (i = 0; i < 2; i++) { si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; @@ -273,7 +343,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); - si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); + si->base_mem += F2FS_BLK_TO_BYTES(NM_I(sbi)->nat_bits_blocks); si->base_mem += NM_I(sbi)->nat_blocks * f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK); si->base_mem += NM_I(sbi)->nat_blocks / 8; @@ -370,9 +440,8 @@ static int stat_show(struct seq_file *s, void *v) { struct f2fs_stat_info *si; int i = 0, j = 0; - unsigned long flags; - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_for_each_entry(si, &f2fs_stat_list, stat_list) { struct f2fs_sb_info *sbi = si->sbi; @@ -433,60 +502,70 @@ static int stat_show(struct seq_file *s, void *v) si->compr_inode, si->compr_blocks); seq_printf(s, " - Swapfile Inode: %u\n", si->swapfile_inode); + seq_printf(s, " - Donate Inode: %u\n", + si->ndonate_files); seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n", si->orphans, si->append, si->update); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); - seq_printf(s, " TYPE %8s %8s %8s %10s %10s %10s\n", - "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk"); - seq_printf(s, " - COLD data: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " TYPE %8s %8s %8s %8s %10s %10s %10s\n", + "blkoff", "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk"); + seq_printf(s, " - COLD data: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_COLD_DATA], si->curseg[CURSEG_COLD_DATA], si->cursec[CURSEG_COLD_DATA], si->curzone[CURSEG_COLD_DATA], si->dirty_seg[CURSEG_COLD_DATA], si->full_seg[CURSEG_COLD_DATA], si->valid_blks[CURSEG_COLD_DATA]); - seq_printf(s, " - WARM data: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - WARM data: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_WARM_DATA], si->curseg[CURSEG_WARM_DATA], si->cursec[CURSEG_WARM_DATA], si->curzone[CURSEG_WARM_DATA], si->dirty_seg[CURSEG_WARM_DATA], si->full_seg[CURSEG_WARM_DATA], si->valid_blks[CURSEG_WARM_DATA]); - seq_printf(s, " - HOT data: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - HOT data: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_HOT_DATA], si->curseg[CURSEG_HOT_DATA], si->cursec[CURSEG_HOT_DATA], si->curzone[CURSEG_HOT_DATA], si->dirty_seg[CURSEG_HOT_DATA], si->full_seg[CURSEG_HOT_DATA], si->valid_blks[CURSEG_HOT_DATA]); - seq_printf(s, " - Dir dnode: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - Dir dnode: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_HOT_NODE], si->curseg[CURSEG_HOT_NODE], si->cursec[CURSEG_HOT_NODE], si->curzone[CURSEG_HOT_NODE], si->dirty_seg[CURSEG_HOT_NODE], si->full_seg[CURSEG_HOT_NODE], si->valid_blks[CURSEG_HOT_NODE]); - seq_printf(s, " - File dnode: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - File dnode: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_WARM_NODE], si->curseg[CURSEG_WARM_NODE], si->cursec[CURSEG_WARM_NODE], si->curzone[CURSEG_WARM_NODE], si->dirty_seg[CURSEG_WARM_NODE], si->full_seg[CURSEG_WARM_NODE], si->valid_blks[CURSEG_WARM_NODE]); - seq_printf(s, " - Indir nodes: %8d %8d %8d %10u %10u %10u\n", + seq_printf(s, " - Indir nodes: %8d %8d %8d %8d %10u %10u %10u\n", + si->blkoff[CURSEG_COLD_NODE], si->curseg[CURSEG_COLD_NODE], si->cursec[CURSEG_COLD_NODE], si->curzone[CURSEG_COLD_NODE], si->dirty_seg[CURSEG_COLD_NODE], si->full_seg[CURSEG_COLD_NODE], si->valid_blks[CURSEG_COLD_NODE]); - seq_printf(s, " - Pinned file: %8d %8d %8d\n", + seq_printf(s, " - Pinned file: %8d %8d %8d %8d\n", + si->blkoff[CURSEG_COLD_DATA_PINNED], si->curseg[CURSEG_COLD_DATA_PINNED], si->cursec[CURSEG_COLD_DATA_PINNED], si->curzone[CURSEG_COLD_DATA_PINNED]); - seq_printf(s, " - ATGC data: %8d %8d %8d\n", + seq_printf(s, " - ATGC data: %8d %8d %8d %8d\n", + si->blkoff[CURSEG_ALL_DATA_ATGC], si->curseg[CURSEG_ALL_DATA_ATGC], si->cursec[CURSEG_ALL_DATA_ATGC], si->curzone[CURSEG_ALL_DATA_ATGC]); @@ -496,8 +575,40 @@ static int stat_show(struct seq_file *s, void *v) si->dirty_count); seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", si->prefree_count, si->free_segs, si->free_secs); + if (f2fs_is_multi_device(sbi)) { + seq_puts(s, "Multidevice stats:\n"); + seq_printf(s, " [seg: %8s %8s %8s %8s %8s]", + "inuse", "dirty", "full", "free", "prefree"); + if (__is_large_section(sbi)) + seq_printf(s, " [sec: %8s %8s %8s %8s %8s]\n", + "inuse", "dirty", "full", "free", "prefree"); + else + seq_puts(s, "\n"); + + for (i = 0; i < sbi->s_ndevs; i++) { + seq_printf(s, " #%-2d %8u %8u %8u %8u %8u", i, + si->dev_stats[i].devstats[0][DEVSTAT_INUSE], + si->dev_stats[i].devstats[0][DEVSTAT_DIRTY], + si->dev_stats[i].devstats[0][DEVSTAT_FULL], + si->dev_stats[i].devstats[0][DEVSTAT_FREE], + si->dev_stats[i].devstats[0][DEVSTAT_PREFREE]); + if (!__is_large_section(sbi)) { + seq_puts(s, "\n"); + continue; + } + seq_printf(s, " %8u %8u %8u %8u %8u\n", + si->dev_stats[i].devstats[1][DEVSTAT_INUSE], + si->dev_stats[i].devstats[1][DEVSTAT_DIRTY], + si->dev_stats[i].devstats[1][DEVSTAT_FULL], + si->dev_stats[i].devstats[1][DEVSTAT_FREE], + si->dev_stats[i].devstats[1][DEVSTAT_PREFREE]); + } + seq_puts(s, "\n"); + } seq_printf(s, "CP calls: %d (BG: %d)\n", - si->cp_count, si->bg_cp_count); + si->cp_call_count[TOTAL_CALL], + si->cp_call_count[BACKGROUND]); + seq_printf(s, "CP count: %d\n", si->cp_count); seq_printf(s, " - cp blocks : %u\n", si->meta_count[META_CP]); seq_printf(s, " - sit blocks : %u\n", si->meta_count[META_SIT]); @@ -511,12 +622,24 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Total : %4d\n", si->nr_total_ckpt); seq_printf(s, " - Cur time : %4d(ms)\n", si->cur_ckpt_time); seq_printf(s, " - Peak time : %4d(ms)\n", si->peak_ckpt_time); - seq_printf(s, "GC calls: %d (BG: %d)\n", - si->call_count, si->bg_gc); - seq_printf(s, " - data segments : %d (%d)\n", - si->data_segs, si->bg_data_segs); - seq_printf(s, " - node segments : %d (%d)\n", - si->node_segs, si->bg_node_segs); + seq_printf(s, "GC calls: %d (gc_thread: %d)\n", + si->gc_call_count[BACKGROUND] + + si->gc_call_count[FOREGROUND], + si->gc_call_count[BACKGROUND]); + if (__is_large_section(sbi)) { + seq_printf(s, " - data sections : %d (BG: %d)\n", + si->gc_secs[DATA][BG_GC] + si->gc_secs[DATA][FG_GC], + si->gc_secs[DATA][BG_GC]); + seq_printf(s, " - node sections : %d (BG: %d)\n", + si->gc_secs[NODE][BG_GC] + si->gc_secs[NODE][FG_GC], + si->gc_secs[NODE][BG_GC]); + } + seq_printf(s, " - data segments : %d (BG: %d)\n", + si->gc_segs[DATA][BG_GC] + si->gc_segs[DATA][FG_GC], + si->gc_segs[DATA][BG_GC]); + seq_printf(s, " - node segments : %d (BG: %d)\n", + si->gc_segs[NODE][BG_GC] + si->gc_segs[NODE][FG_GC], + si->gc_segs[NODE][BG_GC]); seq_puts(s, " - Reclaimed segs :\n"); seq_printf(s, " - Normal : %d\n", sbi->gc_reclaimed_segs[GC_NORMAL]); seq_printf(s, " - Idle CB : %d\n", sbi->gc_reclaimed_segs[GC_IDLE_CB]); @@ -582,9 +705,9 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); - seq_printf(s, " - datas: %4d in files:%4d\n", + seq_printf(s, " - data: %4d in files:%4d\n", si->ndirty_data, si->ndirty_files); - seq_printf(s, " - quota datas: %4d in quota files:%4d\n", + seq_printf(s, " - quota data: %4d in quota files:%4d\n", si->ndirty_qdata, si->nquota_files); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); @@ -638,7 +761,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); return 0; } @@ -649,13 +772,22 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; - unsigned long flags; + struct f2fs_dev_stats *dev_stats; int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); if (!si) return -ENOMEM; + dev_stats = f2fs_kzalloc(sbi, sizeof(struct f2fs_dev_stats) * + sbi->s_ndevs, GFP_KERNEL); + if (!dev_stats) { + kfree(si); + return -ENOMEM; + } + + si->dev_stats = dev_stats; + si->all_area_segs = le32_to_cpu(raw_super->segment_count); si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); @@ -687,12 +819,14 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inplace_count, 0); for (i = META_CP; i < META_MAX; i++) atomic_set(&sbi->meta_count[i], 0); + for (i = 0; i < MAX_CALL_TYPE; i++) + atomic_set(&sbi->cp_call_count[i], 0); atomic_set(&sbi->max_aw_cnt, 0); - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_add_tail(&si->stat_list, &f2fs_stat_list); - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); return 0; } @@ -700,12 +834,12 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned long flags; - raw_spin_lock_irqsave(&f2fs_stat_lock, flags); + spin_lock(&f2fs_stat_lock); list_del(&si->stat_list); - raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + spin_unlock(&f2fs_stat_lock); + kfree(si->dev_stats); kfree(si); } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 8aa29fe2e87b..48f4f98afb01 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -5,7 +5,7 @@ * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/signal.h> @@ -16,6 +16,21 @@ #include "xattr.h" #include <trace/events/f2fs.h> +static inline bool f2fs_should_fallback_to_linear(struct inode *dir) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + switch (F2FS_OPTION(sbi).lookup_mode) { + case LOOKUP_PERF: + return false; + case LOOKUP_COMPAT: + return true; + case LOOKUP_AUTO: + return !sb_no_casefold_compat_fallback(sbi->sb); + } + return false; +} + #if IS_ENABLED(CONFIG_UNICODE) extern struct kmem_cache *f2fs_cf_name_slab; #endif @@ -42,35 +57,49 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } +#if IS_ENABLED(CONFIG_UNICODE) /* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */ int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname) { -#if IS_ENABLED(CONFIG_UNICODE) struct super_block *sb = dir->i_sb; + unsigned char *buf; + int len; if (IS_CASEFOLDED(dir) && !is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) { - fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab, - GFP_NOFS, false, F2FS_SB(sb)); - if (!fname->cf_name.name) + buf = f2fs_kmem_cache_alloc(f2fs_cf_name_slab, + GFP_NOFS, false, F2FS_SB(sb)); + if (!buf) return -ENOMEM; - fname->cf_name.len = utf8_casefold(sb->s_encoding, - fname->usr_fname, - fname->cf_name.name, - F2FS_NAME_LEN); - if ((int)fname->cf_name.len <= 0) { - kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); - fname->cf_name.name = NULL; + + len = utf8_casefold(sb->s_encoding, fname->usr_fname, + buf, F2FS_NAME_LEN); + if (len <= 0) { + kmem_cache_free(f2fs_cf_name_slab, buf); if (sb_has_strict_encoding(sb)) return -EINVAL; /* fall back to treating name as opaque byte sequence */ + return 0; } + fname->cf_name.name = buf; + fname->cf_name.len = len; } -#endif + return 0; } +void f2fs_free_casefolded_name(struct f2fs_filename *fname) +{ + unsigned char *buf = (unsigned char *)fname->cf_name.name; + + if (buf) { + kmem_cache_free(f2fs_cf_name_slab, buf); + fname->cf_name.name = NULL; + } +} +#endif /* CONFIG_UNICODE */ + static int __f2fs_setup_filename(const struct inode *dir, const struct fscrypt_name *crypt_name, struct f2fs_filename *fname) @@ -142,12 +171,7 @@ void f2fs_free_filename(struct f2fs_filename *fname) kfree(fname->crypto_buf.name); fname->crypto_buf.name = NULL; #endif -#if IS_ENABLED(CONFIG_UNICODE) - if (fname->cf_name.name) { - kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); - fname->cf_name.name = NULL; - } -#endif + f2fs_free_casefolded_name(fname); } static unsigned long dir_block_index(unsigned int level, @@ -157,77 +181,27 @@ static unsigned long dir_block_index(unsigned int level, unsigned long bidx = 0; for (i = 0; i < level; i++) - bidx += dir_buckets(i, dir_level) * bucket_blocks(i); + bidx += mul_u32_u32(dir_buckets(i, dir_level), + bucket_blocks(i)); bidx += idx * bucket_blocks(level); return bidx; } static struct f2fs_dir_entry *find_in_block(struct inode *dir, - struct page *dentry_page, + struct folio *dentry_folio, const struct f2fs_filename *fname, - int *max_slots) + int *max_slots, + bool use_hash) { struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; - dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(dir, &d, dentry_blk); - return f2fs_find_target_dentry(&d, fname, max_slots); + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); } -#if IS_ENABLED(CONFIG_UNICODE) -/* - * Test whether a case-insensitive directory entry matches the filename - * being searched for. - * - * Returns 1 for a match, 0 for no match, and -errno on an error. - */ -static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, - const u8 *de_name, u32 de_name_len) -{ - const struct super_block *sb = dir->i_sb; - const struct unicode_map *um = sb->s_encoding; - struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); - struct qstr entry = QSTR_INIT(de_name, de_name_len); - int res; - - if (IS_ENCRYPTED(dir)) { - const struct fscrypt_str encrypted_name = - FSTR_INIT((u8 *)de_name, de_name_len); - - if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir))) - return -EINVAL; - - decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); - if (!decrypted_name.name) - return -ENOMEM; - res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name, - &decrypted_name); - if (res < 0) - goto out; - entry.name = decrypted_name.name; - entry.len = decrypted_name.len; - } - - res = utf8_strncasecmp_folded(um, name, &entry); - /* - * In strict mode, ignore invalid names. In non-strict mode, - * fall back to treating them as opaque byte sequences. - */ - if (res < 0 && !sb_has_strict_encoding(sb)) { - res = name->len == entry.len && - memcmp(name->name, entry.name, name->len) == 0; - } else { - /* utf8_strncasecmp_folded returns 0 on match */ - res = (res == 0); - } -out: - kfree(decrypted_name.name); - return res; -} -#endif /* CONFIG_UNICODE */ - static inline int f2fs_match_name(const struct inode *dir, const struct f2fs_filename *fname, const u8 *de_name, u32 de_name_len) @@ -235,11 +209,11 @@ static inline int f2fs_match_name(const struct inode *dir, struct fscrypt_name f; #if IS_ENABLED(CONFIG_UNICODE) - if (fname->cf_name.name) { - struct qstr cf = FSTR_TO_QSTR(&fname->cf_name); + if (fname->cf_name.name) + return generic_ci_match(dir, fname->usr_fname, + &fname->cf_name, + de_name, de_name_len); - return f2fs_match_ci_name(dir, &cf, de_name, de_name_len); - } #endif f.usr_fname = fname->usr_fname; f.disk_name = fname->disk_name; @@ -250,7 +224,8 @@ static inline int f2fs_match_name(const struct inode *dir, } struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots) + const struct f2fs_filename *fname, int *max_slots, + bool use_hash) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; @@ -273,7 +248,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, continue; } - if (de->hash_code == fname->hash) { + if (!use_hash || de->hash_code == fname->hash) { res = f2fs_match_name(d->inode, fname, d->filename[bit_pos], le16_to_cpu(de->name_len)); @@ -300,12 +275,12 @@ found: static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, const struct f2fs_filename *fname, - struct page **res_page) + struct folio **res_folio, + bool use_hash) { int s = GET_DENTRY_SLOTS(fname->disk_name.len); unsigned int nbucket, nblock; - unsigned int bidx, end_block; - struct page *dentry_page; + unsigned int bidx, end_block, bucket_no; struct f2fs_dir_entry *de = NULL; pgoff_t next_pgofs; bool room = false; @@ -314,62 +289,76 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; + +start_find_bucket: bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, - le32_to_cpu(fname->hash) % nbucket); + bucket_no); end_block = bidx + nblock; while (bidx < end_block) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs); - if (IS_ERR(dentry_page)) { - if (PTR_ERR(dentry_page) == -ENOENT) { + struct folio *dentry_folio; + dentry_folio = f2fs_find_data_folio(dir, bidx, &next_pgofs); + if (IS_ERR(dentry_folio)) { + if (PTR_ERR(dentry_folio) == -ENOENT) { room = true; bidx = next_pgofs; continue; } else { - *res_page = dentry_page; + *res_folio = dentry_folio; break; } } - de = find_in_block(dir, dentry_page, fname, &max_slots); + de = find_in_block(dir, dentry_folio, fname, &max_slots, use_hash); if (IS_ERR(de)) { - *res_page = ERR_CAST(de); + *res_folio = ERR_CAST(de); de = NULL; break; } else if (de) { - *res_page = dentry_page; + *res_folio = dentry_folio; break; } if (max_slots >= s) room = true; - f2fs_put_page(dentry_page, 0); + f2fs_folio_put(dentry_folio, false); bidx++; } - if (!de && room && F2FS_I(dir)->chash != fname->hash) { - F2FS_I(dir)->chash = fname->hash; - F2FS_I(dir)->clevel = level; - } + if (de) + return de; - return de; + if (likely(use_hash)) { + if (room && F2FS_I(dir)->chash != fname->hash) { + F2FS_I(dir)->chash = fname->hash; + F2FS_I(dir)->clevel = level; + } + } else if (++bucket_no < nbucket) { + goto start_find_bucket; + } + return NULL; } struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page) + struct folio **res_folio) { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; unsigned int max_depth; unsigned int level; + bool use_hash = true; - *res_page = NULL; + *res_folio = NULL; +#if IS_ENABLED(CONFIG_UNICODE) +start_find_entry: +#endif if (f2fs_has_inline_dentry(dir)) { - de = f2fs_find_in_inline_dir(dir, fname, res_page); + de = f2fs_find_in_inline_dir(dir, fname, res_folio, use_hash); goto out; } @@ -385,11 +374,19 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, } for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, fname, res_page); - if (de || IS_ERR(*res_page)) + de = find_in_level(dir, level, fname, res_folio, use_hash); + if (de || IS_ERR(*res_folio)) break; } + out: +#if IS_ENABLED(CONFIG_UNICODE) + if (f2fs_should_fallback_to_linear(dir) && + IS_CASEFOLDED(dir) && !de && use_hash) { + use_hash = false; + goto start_find_entry; + } +#endif /* This is to increase the speed of f2fs_create */ if (!de) F2FS_I(dir)->task = current; @@ -403,7 +400,7 @@ out: * Entry is guaranteed to be valid. */ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, - const struct qstr *child, struct page **res_page) + const struct qstr *child, struct folio **res_folio) { struct f2fs_dir_entry *de = NULL; struct f2fs_filename fname; @@ -412,67 +409,67 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, err = f2fs_setup_filename(dir, child, 1, &fname); if (err) { if (err == -ENOENT) - *res_page = NULL; + *res_folio = NULL; else - *res_page = ERR_PTR(err); + *res_folio = ERR_PTR(err); return NULL; } - de = __f2fs_find_entry(dir, &fname, res_page); + de = __f2fs_find_entry(dir, &fname, res_folio); f2fs_free_filename(&fname); return de; } -struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct folio **f) { - return f2fs_find_entry(dir, &dotdot_name, p); + return f2fs_find_entry(dir, &dotdot_name, f); } ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, - struct page **page) + struct folio **folio) { ino_t res = 0; struct f2fs_dir_entry *de; - de = f2fs_find_entry(dir, qstr, page); + de = f2fs_find_entry(dir, qstr, folio); if (de) { res = le32_to_cpu(de->ino); - f2fs_put_page(*page, 0); + f2fs_folio_put(*folio, false); } return res; } void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, - struct page *page, struct inode *inode) + struct folio *folio, struct inode *inode) { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; - lock_page(page); - f2fs_wait_on_page_writeback(page, type, true, true); + folio_lock(folio); + f2fs_folio_wait_writeback(folio, type, true, true); de->ino = cpu_to_le32(inode->i_ino); de->file_type = fs_umode_to_ftype(inode->i_mode); - set_page_dirty(page); + folio_mark_dirty(folio); - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } static void init_dent_inode(struct inode *dir, struct inode *inode, const struct f2fs_filename *fname, - struct page *ipage) + struct folio *ifolio) { struct f2fs_inode *ri; if (!fname) /* tmpfile case? */ return; - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); - /* copy name info. to this inode page */ - ri = F2FS_INODE(ipage); + /* copy name info. to this inode folio */ + ri = F2FS_INODE(ifolio); ri->i_namelen = cpu_to_le32(fname->disk_name.len); memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len); if (IS_ENCRYPTED(dir)) { @@ -493,7 +490,7 @@ static void init_dent_inode(struct inode *dir, struct inode *inode, file_lost_pino(inode); } } - set_page_dirty(ipage); + folio_mark_dirty(ifolio); } void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, @@ -510,72 +507,73 @@ void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, } static int make_empty_dir(struct inode *inode, - struct inode *parent, struct page *page) + struct inode *parent, struct folio *folio) { - struct page *dentry_page; + struct folio *dentry_folio; struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; if (f2fs_has_inline_dentry(inode)) - return f2fs_make_empty_inline_dir(inode, parent, page); + return f2fs_make_empty_inline_dir(inode, parent, folio); - dentry_page = f2fs_get_new_data_page(inode, page, 0, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); + dentry_folio = f2fs_get_new_data_folio(inode, folio, 0, true); + if (IS_ERR(dentry_folio)) + return PTR_ERR(dentry_folio); - dentry_blk = page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_do_make_empty_dir(inode, parent, &d); - set_page_dirty(dentry_page); - f2fs_put_page(dentry_page, 1); + folio_mark_dirty(dentry_folio); + f2fs_folio_put(dentry_folio, true); return 0; } -struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, - const struct f2fs_filename *fname, struct page *dpage) +struct folio *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, + const struct f2fs_filename *fname, struct folio *dfolio) { - struct page *page; + struct folio *folio; int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { - page = f2fs_new_inode_page(inode); - if (IS_ERR(page)) - return page; + folio = f2fs_new_inode_folio(inode); + if (IS_ERR(folio)) + return folio; if (S_ISDIR(inode->i_mode)) { /* in order to handle error case */ - get_page(page); - err = make_empty_dir(inode, dir, page); + folio_get(folio); + err = make_empty_dir(inode, dir, folio); if (err) { - lock_page(page); + folio_lock(folio); goto put_error; } - put_page(page); + folio_put(folio); } - err = f2fs_init_acl(inode, dir, page, dpage); + err = f2fs_init_acl(inode, dir, folio, dfolio); if (err) goto put_error; err = f2fs_init_security(inode, dir, - fname ? fname->usr_fname : NULL, page); + fname ? fname->usr_fname : NULL, + folio); if (err) goto put_error; if (IS_ENCRYPTED(inode)) { - err = fscrypt_set_context(inode, page); + err = fscrypt_set_context(inode, folio); if (err) goto put_error; } } else { - page = f2fs_get_node_page(F2FS_I_SB(dir), inode->i_ino); - if (IS_ERR(page)) - return page; + folio = f2fs_get_inode_folio(F2FS_I_SB(dir), inode->i_ino); + if (IS_ERR(folio)) + return folio; } - init_dent_inode(dir, inode, fname, page); + init_dent_inode(dir, inode, fname, folio); /* * This file should be checkpointed during fsync. @@ -592,12 +590,12 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, f2fs_remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); f2fs_i_links_write(inode, true); } - return page; + return folio; put_error: clear_nlink(inode); - f2fs_update_inode(inode, page); - f2fs_put_page(page, 1); + f2fs_update_inode(inode, folio); + f2fs_folio_put(folio, true); return ERR_PTR(err); } @@ -609,7 +607,7 @@ void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, f2fs_i_links_write(dir, true); clear_inode_flag(inode, FI_NEW_INODE); } - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); if (F2FS_I(dir)->i_current_depth != current_depth) @@ -639,14 +637,14 @@ next: goto next; } -bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, +bool f2fs_has_enough_room(struct inode *dir, struct folio *ifolio, const struct f2fs_filename *fname) { struct f2fs_dentry_ptr d; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(fname->disk_name.len); - make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ipage)); + make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ifolio)); bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); @@ -683,10 +681,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname, unsigned int current_depth; unsigned long bidx, block; unsigned int nbucket, nblock; - struct page *dentry_page = NULL; + struct folio *dentry_folio = NULL; struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dentry_ptr d; - struct page *page = NULL; + struct folio *folio = NULL; int slots, err = 0; level = 0; @@ -716,30 +714,30 @@ start: (le32_to_cpu(fname->hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { - dentry_page = f2fs_get_new_data_page(dir, NULL, block, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); + dentry_folio = f2fs_get_new_data_folio(dir, NULL, block, true); + if (IS_ERR(dentry_folio)) + return PTR_ERR(dentry_folio); - dentry_blk = page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); bit_pos = f2fs_room_for_filename(&dentry_blk->dentry_bitmap, slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; - f2fs_put_page(dentry_page, 1); + f2fs_folio_put(dentry_folio, true); } /* Move to next level to find the empty slot for new dentry */ ++level; goto start; add_dentry: - f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); + f2fs_folio_wait_writeback(dentry_folio, DATA, true, true); if (inode) { f2fs_down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, fname, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_init_inode_metadata(inode, dir, fname, NULL); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto fail; } } @@ -748,16 +746,16 @@ add_dentry: f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash, bit_pos); - set_page_dirty(dentry_page); + folio_mark_dirty(dentry_folio); if (inode) { f2fs_i_pino_write(inode, dir->i_ino); /* synchronize inode page's data from inode cache */ if (is_inode_flag_set(inode, FI_NEW_INODE)) - f2fs_update_inode(inode, page); + f2fs_update_inode(inode, folio); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } f2fs_update_parent_metadata(dir, inode, current_depth); @@ -765,7 +763,7 @@ fail: if (inode) f2fs_up_write(&F2FS_I(inode)->i_sem); - f2fs_put_page(dentry_page, 1); + f2fs_folio_put(dentry_folio, true); return err; } @@ -799,7 +797,7 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_filename fname; - struct page *page = NULL; + struct folio *folio = NULL; struct f2fs_dir_entry *de = NULL; int err; @@ -815,14 +813,14 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, * consistency more. */ if (current != F2FS_I(dir)->task) { - de = __f2fs_find_entry(dir, &fname, &page); + de = __f2fs_find_entry(dir, &fname, &folio); F2FS_I(dir)->task = NULL; } if (de) { - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); err = -EEXIST; - } else if (IS_ERR(page)) { - err = PTR_ERR(page); + } else if (IS_ERR(folio)) { + err = PTR_ERR(folio); } else { err = f2fs_add_dentry(dir, &fname, inode, ino, mode); } @@ -830,18 +828,19 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, return err; } -int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, + struct f2fs_filename *fname) { - struct page *page; + struct folio *folio; int err = 0; f2fs_down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, NULL, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_init_inode_metadata(inode, dir, fname, NULL); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto fail; } - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); clear_inode_flag(inode, FI_NEW_INODE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -877,12 +876,13 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ -void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio, struct inode *dir, struct inode *inode) { - struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + pgoff_t index = folio->index; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); @@ -891,12 +891,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) - return f2fs_delete_inline_entry(dentry, page, dir, inode); + return f2fs_delete_inline_entry(dentry, folio, dir, inode); - lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true, true); + folio_lock(folio); + f2fs_folio_wait_writeback(folio, DATA, true, true); - dentry_blk = page_address(page); + dentry_blk = folio_address(folio); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); @@ -905,21 +905,21 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, 0); - set_page_dirty(page); + folio_mark_dirty(folio); if (bit_pos == NR_DENTRY_IN_BLOCK && - !f2fs_truncate_hole(dir, page->index, page->index + 1)) { - f2fs_clear_page_cache_dirty_tag(page); - clear_page_dirty_for_io(page); - ClearPageUptodate(page); - clear_page_private_all(page); + !f2fs_truncate_hole(dir, index, index + 1)) { + f2fs_clear_page_cache_dirty_tag(folio); + folio_clear_dirty_for_io(folio); + folio_clear_uptodate(folio); + folio_detach_private(folio); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); } - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); if (inode) @@ -929,7 +929,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bool f2fs_empty_dir(struct inode *dir) { unsigned long bidx = 0; - struct page *dentry_page; unsigned int bit_pos; struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); @@ -939,10 +938,11 @@ bool f2fs_empty_dir(struct inode *dir) while (bidx < nblock) { pgoff_t next_pgofs; + struct folio *dentry_folio; - dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs); - if (IS_ERR(dentry_page)) { - if (PTR_ERR(dentry_page) == -ENOENT) { + dentry_folio = f2fs_find_data_folio(dir, bidx, &next_pgofs); + if (IS_ERR(dentry_folio)) { + if (PTR_ERR(dentry_folio) == -ENOENT) { bidx = next_pgofs; continue; } else { @@ -950,7 +950,7 @@ bool f2fs_empty_dir(struct inode *dir) } } - dentry_blk = page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); if (bidx == 0) bit_pos = 2; else @@ -959,7 +959,7 @@ bool f2fs_empty_dir(struct inode *dir) NR_DENTRY_IN_BLOCK, bit_pos); - f2fs_put_page(dentry_page, 0); + f2fs_folio_put(dentry_folio, false); if (bit_pos < NR_DENTRY_IN_BLOCK) return false; @@ -995,9 +995,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, de = &d->dentry[bit_pos]; if (de->name_len == 0) { if (found_valid_dirent || !bit_pos) { - printk_ratelimited( - "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", - KERN_WARNING, sbi->sb->s_id, + f2fs_warn_ratelimited(sbi, + "invalid namelen(0), ino:%u, run fsck to fix.", le32_to_cpu(de->ino)); set_sbi_flag(sbi, SBI_NEED_FSCK); } @@ -1059,7 +1058,6 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); struct f2fs_dentry_block *dentry_blk = NULL; - struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; loff_t start_pos = ctx->pos; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); @@ -1083,6 +1081,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) } for (; n < npages; ctx->pos = n * NR_DENTRY_IN_BLOCK) { + struct folio *dentry_folio; pgoff_t next_pgofs; /* allow readdir() to be interrupted */ @@ -1097,9 +1096,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - dentry_page = f2fs_find_data_page(inode, n, &next_pgofs); - if (IS_ERR(dentry_page)) { - err = PTR_ERR(dentry_page); + dentry_folio = f2fs_find_data_folio(inode, n, &next_pgofs); + if (IS_ERR(dentry_folio)) { + err = PTR_ERR(dentry_folio); if (err == -ENOENT) { err = 0; n = next_pgofs; @@ -1109,18 +1108,15 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) } } - dentry_blk = page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); - if (err) { - f2fs_put_page(dentry_page, 0); + f2fs_folio_put(dentry_folio, false); + if (err) break; - } - - f2fs_put_page(dentry_page, 0); n++; } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 0e2d49140c07..0ed84cc065a7 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -19,37 +19,56 @@ #include "node.h" #include <trace/events/f2fs.h> -bool sanity_check_extent_cache(struct inode *inode) +bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); - struct extent_tree *et = fi->extent_tree[EX_READ]; - struct extent_info *ei; - - if (!et) - return true; + struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext; + struct extent_info ei; + int devi; - ei = &et->largest; - if (!ei->len) - return true; + get_read_extent_info(&ei, i_ext); - /* Let's drop, if checkpoint got corrupted. */ - if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) { - ei->len = 0; - et->largest_updated = true; + if (!ei.len) return true; - } - if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) || - !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, + if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) || + !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1, DATA_GENERIC_ENHANCE)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", __func__, inode->i_ino, - ei->blk, ei->fofs, ei->len); + ei.blk, ei.fofs, ei.len); return false; } - return true; + + if (!IS_DEVICE_ALIASING(inode)) + return true; + + for (devi = 0; devi < sbi->s_ndevs; devi++) { + if (FDEV(devi).start_blk != ei.blk || + FDEV(devi).end_blk != ei.blk + ei.len - 1) + continue; + + if (devi == 0) { + f2fs_warn(sbi, + "%s: inode (ino=%lx) is an alias of meta device", + __func__, inode->i_ino); + return false; + } + + if (bdev_is_zoned(FDEV(devi).bdev)) { + f2fs_warn(sbi, + "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] maps to zoned block device", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; + } + return true; + } + + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] is inconsistent w/ any devices", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; } static void __set_extent_info(struct extent_info *ei, @@ -74,45 +93,22 @@ static void __set_extent_info(struct extent_info *ei, } } -static bool __may_read_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, READ_EXTENT_CACHE)) - return false; - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - return false; - if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && - !f2fs_sb_has_readonly(sbi)) - return false; - return S_ISREG(inode->i_mode); -} - -static bool __may_age_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, AGE_EXTENT_CACHE)) - return false; - if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) - return false; - if (file_is_cold(inode)) - return false; - - return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); -} - static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) { if (type == EX_READ) - return __may_read_extent_tree(inode); - else if (type == EX_BLOCK_AGE) - return __may_age_extent_tree(inode); + return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) && + S_ISREG(inode->i_mode); + if (type == EX_BLOCK_AGE) + return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)); return false; } static bool __may_extent_tree(struct inode *inode, enum extent_type type) { + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) + return true; + /* * for recovered files during mount do not create extents * if shrinker is not registered. @@ -120,7 +116,22 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type) if (list_empty(&F2FS_I_SB(inode)->s_list)) return false; - return __init_may_extent_tree(inode, type); + if (!__init_may_extent_tree(inode, type)) + return false; + + if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(F2FS_I_SB(inode))) + return false; + } else if (type == EX_BLOCK_AGE) { + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) + return false; + if (file_is_cold(inode)) + return false; + } + return true; } static void __try_update_largest_extent(struct extent_tree *et, @@ -368,62 +379,66 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode, } static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et) + struct extent_tree *et, unsigned int nr_shrink) { struct rb_node *node, *next; struct extent_node *en; - unsigned int count = atomic_read(&et->node_cnt); + unsigned int count; node = rb_first_cached(&et->root); - while (node) { + + for (count = 0; node && count < nr_shrink; count++) { next = rb_next(node); en = rb_entry(node, struct extent_node, rb_node); __release_extent_node(sbi, et, en); node = next; } - return count - atomic_read(&et->node_cnt); + return count; } static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - if (fofs < et->largest.fofs + et->largest.len && + if (fofs < (pgoff_t)et->largest.fofs + et->largest.len && fofs + len > et->largest.fofs) { et->largest.len = 0; et->largest_updated = true; } } -void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) +void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; - struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext; struct extent_tree *et; struct extent_node *en; - struct extent_info ei; + struct extent_info ei = {0}; if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ - if (i_ext && i_ext->len) { - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + if (i_ext->len) { + f2fs_folio_wait_writeback(ifolio, NODE, true, true); i_ext->len = 0; - set_page_dirty(ipage); + folio_mark_dirty(ifolio); } - goto out; + set_inode_flag(inode, FI_NO_EXTENT); + return; } et = __grab_extent_tree(inode, EX_READ); - if (!i_ext || !i_ext->len) - goto out; - get_read_extent_info(&ei, i_ext); write_lock(&et->lock); - if (atomic_read(&et->node_cnt)) - goto unlock_out; + if (atomic_read(&et->node_cnt) || !ei.len) + goto skip; + + if (IS_DEVICE_ALIASING(inode)) { + et->largest = ei; + goto skip; + } en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); @@ -435,11 +450,13 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) list_add_tail(&en->list, &eti->extent_list); spin_unlock(&eti->extent_lock); } -unlock_out: +skip: + /* Let's drop, if checkpoint got corrupted. */ + if (f2fs_cp_error(sbi)) { + et->largest.len = 0; + et->largest_updated = true; + } write_unlock(&et->lock); -out: - if (!F2FS_I(inode)->extent_tree[EX_READ]) - set_inode_flag(inode, FI_NO_EXTENT); } void f2fs_init_age_extent_tree(struct inode *inode) @@ -478,13 +495,18 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, if (type == EX_READ && et->largest.fofs <= pgofs && - et->largest.fofs + et->largest.len > pgofs) { + (pgoff_t)et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; stat_inc_largest_node_hit(sbi); goto out; } + if (IS_DEVICE_ALIASING(inode)) { + ret = false; + goto out; + } + en = __lookup_extent_node(&et->root, et->cached_en, pgofs); if (!en) goto out; @@ -582,7 +604,13 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, p = &(*p)->rb_right; leftmost = false; } else { + f2fs_err_ratelimited(sbi, "%s: corrupted extent, type: %d, " + "extent node in rb tree [%u, %u, %u], age [%llu, %llu], " + "extent node to insert [%u, %u, %u], age [%llu, %llu]", + __func__, et->type, en->ei.fofs, en->ei.blk, en->ei.len, en->ei.age, + en->ei.last_blocks, ei->fofs, ei->blk, ei->len, ei->age, ei->last_blocks); f2fs_bug_on(sbi, 1); + return NULL; } } @@ -601,6 +629,30 @@ do_insert: return en; } +static unsigned int __destroy_extent_node(struct inode *inode, + enum extent_type type) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; + unsigned int nr_shrink = type == EX_READ ? + READ_EXTENT_CACHE_SHRINK_NUMBER : + AGE_EXTENT_CACHE_SHRINK_NUMBER; + unsigned int node_cnt = 0; + + if (!et || !atomic_read(&et->node_cnt)) + return 0; + + while (atomic_read(&et->node_cnt)) { + write_lock(&et->lock); + node_cnt += __free_extent_tree(sbi, et, nr_shrink); + write_unlock(&et->lock); + } + + f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); + + return node_cnt; +} + static void __update_extent_tree_range(struct inode *inode, struct extent_info *tei, enum extent_type type) { @@ -618,6 +670,15 @@ static void __update_extent_tree_range(struct inode *inode, if (!et) return; + if (unlikely(len == 0)) { + f2fs_err_ratelimited(sbi, "%s: extent len is zero, type: %d, " + "extent [%u, %u, %u], age [%llu, %llu]", + __func__, type, tei->fofs, tei->blk, tei->len, + tei->age, tei->last_blocks); + f2fs_bug_on(sbi, 1); + return; + } + if (type == EX_READ) trace_f2fs_update_read_extent_tree_range(inode, fofs, len, tei->blk, 0); @@ -671,7 +732,9 @@ static void __update_extent_tree_range(struct inode *inode, } if (end < org_end && (type != EX_READ || - org_end - end >= F2FS_MIN_EXTENT_LEN)) { + (org_end - end >= F2FS_MIN_EXTENT_LEN && + atomic_read(&et->node_cnt) < + sbi->max_read_extent_count))) { if (parts) { __set_extent_info(&ei, end, org_end - end, @@ -739,16 +802,13 @@ static void __update_extent_tree_range(struct inode *inode, } } - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - __free_extent_tree(sbi, et); - if (et->largest_updated) { et->largest_updated = false; updated = true; } goto out_read_extent_cache; update_age_extent_cache: - if (!tei->last_blocks) + if (tei->last_blocks == F2FS_EXTENT_AGE_INVALID) goto out_read_extent_cache; __set_extent_info(&ei, fofs, len, 0, false, @@ -759,6 +819,9 @@ update_age_extent_cache: out_read_extent_cache: write_unlock(&et->lock); + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + __destroy_extent_node(inode, EX_READ); + if (updated) f2fs_mark_inode_dirty_sync(inode, true); } @@ -849,7 +912,7 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, cur_age = cur_blocks - tei.last_blocks; else /* allocated_data_blocks overflow */ - cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; + cur_age = (ULLONG_MAX - 1) - tei.last_blocks + cur_blocks; if (tei.age) ei->age = __calculate_block_age(sbi, cur_age, tei.age); @@ -867,10 +930,8 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, goto out; if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { - f2fs_bug_on(sbi, 1); + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) return -EINVAL; - } out: /* * init block age with zero, this can happen when the block age extent @@ -888,7 +949,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ if (!__may_extent_tree(dn->inode, type)) return; - ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + + ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) + dn->ofs_in_node; ei.len = 1; @@ -923,10 +984,14 @@ static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink list_for_each_entry_safe(et, next, &eti->zombie_list, list) { if (atomic_read(&et->node_cnt)) { write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et); + node_cnt += __free_extent_tree(sbi, et, + nr_shrink - node_cnt - tree_cnt); write_unlock(&et->lock); } - f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); + + if (atomic_read(&et->node_cnt)) + goto unlock_out; + list_del_init(&et->list); radix_tree_delete(&eti->extent_tree_root, et->ino); kmem_cache_free(extent_tree_slab, et); @@ -1049,6 +1114,7 @@ void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, struct extent_info ei = { .fofs = fofs, .len = len, + .last_blocks = F2FS_EXTENT_AGE_INVALID, }; if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) @@ -1065,23 +1131,6 @@ unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); } -static unsigned int __destroy_extent_node(struct inode *inode, - enum extent_type type) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; - unsigned int node_cnt = 0; - - if (!et || !atomic_read(&et->node_cnt)) - return 0; - - write_lock(&et->lock); - node_cnt = __free_extent_tree(sbi, et); - write_unlock(&et->lock); - - return node_cnt; -} - void f2fs_destroy_extent_node(struct inode *inode) { __destroy_extent_node(inode, EX_READ); @@ -1090,7 +1139,6 @@ void f2fs_destroy_extent_node(struct inode *inode) static void __drop_extent_tree(struct inode *inode, enum extent_type type) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; bool updated = false; @@ -1098,7 +1146,6 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type) return; write_lock(&et->lock); - __free_extent_tree(sbi, et); if (type == EX_READ) { set_inode_flag(inode, FI_NO_EXTENT); if (et->largest.len) { @@ -1107,6 +1154,9 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type) } } write_unlock(&et->lock); + + __destroy_extent_node(inode, type); + if (updated) f2fs_mark_inode_dirty_sync(inode, true); } @@ -1180,6 +1230,7 @@ void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; sbi->last_age_weight = LAST_AGE_WEIGHT; + sbi->max_read_extent_count = DEF_MAX_READ_EXTENT_COUNT; } int __init f2fs_create_extent_cache(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 613132339d72..20edbb99b814 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -11,7 +11,6 @@ #include <linux/uio.h> #include <linux/types.h> #include <linux/page-flags.h> -#include <linux/buffer_head.h> #include <linux/slab.h> #include <linux/crc32.h> #include <linux/magic.h> @@ -24,7 +23,7 @@ #include <linux/blkdev.h> #include <linux/quotaops.h> #include <linux/part_stat.h> -#include <crypto/hash.h> +#include <linux/rw_hint.h> #include <linux/fscrypt.h> #include <linux/fsverity.h> @@ -60,59 +59,89 @@ enum { FAULT_SLAB_ALLOC, FAULT_DQUOT_INIT, FAULT_LOCK_OP, - FAULT_BLKADDR, + FAULT_BLKADDR_VALIDITY, + FAULT_BLKADDR_CONSISTENCE, + FAULT_NO_SEGMENT, + FAULT_INCONSISTENT_FOOTER, + FAULT_TIMEOUT, + FAULT_VMALLOC, FAULT_MAX, }; -#ifdef CONFIG_F2FS_FAULT_INJECTION -#define F2FS_ALL_FAULT_TYPE (GENMASK(FAULT_MAX - 1, 0)) +/* indicate which option to update */ +enum fault_option { + FAULT_RATE = 1, /* only update fault rate */ + FAULT_TYPE = 2, /* only update fault type */ + FAULT_ALL = 4, /* reset all fault injection options/stats */ +}; +#ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info { atomic_t inject_ops; - unsigned int inject_rate; + int inject_rate; unsigned int inject_type; + /* Used to account total count of injection for each type */ + unsigned int inject_count[FAULT_MAX]; }; extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type)) + +/* maximum retry count for injected failure */ +#define DEFAULT_FAILURE_RETRY_COUNT 8 +#else +#define DEFAULT_FAILURE_RETRY_COUNT 1 #endif /* * For mount options */ -#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000001 -#define F2FS_MOUNT_DISCARD 0x00000002 -#define F2FS_MOUNT_NOHEAP 0x00000004 -#define F2FS_MOUNT_XATTR_USER 0x00000008 -#define F2FS_MOUNT_POSIX_ACL 0x00000010 -#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000020 -#define F2FS_MOUNT_INLINE_XATTR 0x00000040 -#define F2FS_MOUNT_INLINE_DATA 0x00000080 -#define F2FS_MOUNT_INLINE_DENTRY 0x00000100 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 -#define F2FS_MOUNT_NOBARRIER 0x00000400 -#define F2FS_MOUNT_FASTBOOT 0x00000800 -#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00001000 -#define F2FS_MOUNT_DATA_FLUSH 0x00002000 -#define F2FS_MOUNT_FAULT_INJECTION 0x00004000 -#define F2FS_MOUNT_USRQUOTA 0x00008000 -#define F2FS_MOUNT_GRPQUOTA 0x00010000 -#define F2FS_MOUNT_PRJQUOTA 0x00020000 -#define F2FS_MOUNT_QUOTA 0x00040000 -#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00080000 -#define F2FS_MOUNT_RESERVE_ROOT 0x00100000 -#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x00200000 -#define F2FS_MOUNT_NORECOVERY 0x00400000 -#define F2FS_MOUNT_ATGC 0x00800000 -#define F2FS_MOUNT_MERGE_CHECKPOINT 0x01000000 -#define F2FS_MOUNT_GC_MERGE 0x02000000 -#define F2FS_MOUNT_COMPRESS_CACHE 0x04000000 -#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x08000000 +enum f2fs_mount_opt { + F2FS_MOUNT_DISABLE_ROLL_FORWARD, + F2FS_MOUNT_DISCARD, + F2FS_MOUNT_NOHEAP, + F2FS_MOUNT_XATTR_USER, + F2FS_MOUNT_POSIX_ACL, + F2FS_MOUNT_DISABLE_EXT_IDENTIFY, + F2FS_MOUNT_INLINE_XATTR, + F2FS_MOUNT_INLINE_DATA, + F2FS_MOUNT_INLINE_DENTRY, + F2FS_MOUNT_FLUSH_MERGE, + F2FS_MOUNT_NOBARRIER, + F2FS_MOUNT_FASTBOOT, + F2FS_MOUNT_READ_EXTENT_CACHE, + F2FS_MOUNT_DATA_FLUSH, + F2FS_MOUNT_FAULT_INJECTION, + F2FS_MOUNT_USRQUOTA, + F2FS_MOUNT_GRPQUOTA, + F2FS_MOUNT_PRJQUOTA, + F2FS_MOUNT_QUOTA, + F2FS_MOUNT_INLINE_XATTR_SIZE, + F2FS_MOUNT_RESERVE_ROOT, + F2FS_MOUNT_DISABLE_CHECKPOINT, + F2FS_MOUNT_NORECOVERY, + F2FS_MOUNT_ATGC, + F2FS_MOUNT_MERGE_CHECKPOINT, + F2FS_MOUNT_GC_MERGE, + F2FS_MOUNT_COMPRESS_CACHE, + F2FS_MOUNT_AGE_EXTENT_CACHE, + F2FS_MOUNT_NAT_BITS, + F2FS_MOUNT_INLINECRYPT, + /* + * Some f2fs environments expect to be able to pass the "lazytime" option + * string rather than using the MS_LAZYTIME flag, so this must remain. + */ + F2FS_MOUNT_LAZYTIME, + F2FS_MOUNT_RESERVE_NODE, +}; #define F2FS_OPTION(sbi) ((sbi)->mount_opt) -#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) (F2FS_OPTION(sbi).opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) (F2FS_OPTION(sbi).opt & F2FS_MOUNT_##option) +#define clear_opt(sbi, option) \ + (F2FS_OPTION(sbi).opt &= ~BIT(F2FS_MOUNT_##option)) +#define set_opt(sbi, option) \ + (F2FS_OPTION(sbi).opt |= BIT(F2FS_MOUNT_##option)) +#define test_opt(sbi, option) \ + (F2FS_OPTION(sbi).opt & BIT(F2FS_MOUNT_##option)) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -126,6 +155,24 @@ typedef u32 nid_t; #define COMPRESS_EXT_NUM 16 +enum blkzone_allocation_policy { + BLKZONE_ALLOC_PRIOR_SEQ, /* Prioritize writing to sequential zones */ + BLKZONE_ALLOC_ONLY_SEQ, /* Only allow writing to sequential zones */ + BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */ +}; + +enum bggc_io_aware_policy { + AWARE_ALL_IO, /* skip background GC if there is any kind of pending IO */ + AWARE_READ_IO, /* skip background GC if there is pending read IO */ + AWARE_NONE, /* don't aware IO for background GC */ +}; + +enum device_allocation_policy { + ALLOCATE_FORWARD_NOHINT, + ALLOCATE_FORWARD_WITHIN_HINT, + ALLOCATE_FORWARD_FROM_HINT, +}; + /* * An implementation of an rwsem that is explicitly unfair to readers. This * prevents priority inversion when a low-priority reader acquires the read lock @@ -141,9 +188,9 @@ struct f2fs_rwsem { }; struct f2fs_mount_info { - unsigned int opt; - int write_io_size_bits; /* Write IO size bits */ + unsigned long long opt; block_t root_reserved_blocks; /* root reserved blocks */ + block_t root_reserved_nodes; /* root reserved nodes */ kuid_t s_resuid; /* reserved blocks for uid */ kgid_t s_resgid; /* reserved blocks for gid */ int active_logs; /* # of active logs */ @@ -184,6 +231,7 @@ struct f2fs_mount_info { int compress_mode; /* compression mode */ unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ + unsigned int lookup_mode; }; #define F2FS_FEATURE_ENCRYPT 0x00000001 @@ -201,6 +249,8 @@ struct f2fs_mount_info { #define F2FS_FEATURE_CASEFOLD 0x00001000 #define F2FS_FEATURE_COMPRESSION 0x00002000 #define F2FS_FEATURE_RO 0x00004000 +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 +#define F2FS_FEATURE_PACKED_SSA 0x00010000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -237,14 +287,42 @@ enum { #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ #define DEF_DISABLE_INTERVAL 5 /* 5 secs */ +#define DEF_ENABLE_INTERVAL 5 /* 5 secs */ #define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */ #define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */ +enum cp_time { + CP_TIME_START, /* begin */ + CP_TIME_LOCK, /* after cp_global_sem */ + CP_TIME_OP_LOCK, /* after block_operation */ + CP_TIME_FLUSH_META, /* after flush sit/nat */ + CP_TIME_SYNC_META, /* after sync_meta_pages */ + CP_TIME_SYNC_CP_META, /* after sync cp meta pages */ + CP_TIME_WAIT_DIRTY_META,/* after wait on dirty meta */ + CP_TIME_WAIT_CP_DATA, /* after wait on cp data */ + CP_TIME_FLUSH_DEVICE, /* after flush device cache */ + CP_TIME_WAIT_LAST_CP, /* after wait on last cp pack */ + CP_TIME_END, /* after unblock_operation */ + CP_TIME_MAX, +}; + +/* time cost stats of checkpoint */ +struct cp_stats { + ktime_t times[CP_TIME_MAX]; +}; + struct cp_control { int reason; __u64 trim_start; __u64 trim_end; __u64 trim_minlen; + struct cp_stats stats; +}; + +enum f2fs_cp_phase { + CP_PHASE_START_BLOCK_OPS, + CP_PHASE_FINISH_BLOCK_OPS, + CP_PHASE_FINISH_CHECKPOINT, }; /* @@ -278,6 +356,7 @@ enum { APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ TRANS_DIR_INO, /* for transactions dir ino list */ + XATTR_DIR_INO, /* for xattr updated dir ino list */ FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; @@ -296,7 +375,7 @@ struct inode_entry { struct fsync_node_entry { struct list_head list; /* list head */ - struct page *page; /* warm node page pointer */ + struct folio *folio; /* warm node folio pointer */ unsigned int seq_id; /* sequence id */ }; @@ -304,7 +383,10 @@ struct ckpt_req { struct completion wait; /* completion for checkpoint done */ struct llist_node llnode; /* llist_node to be linked in wait queue */ int ret; /* return code of checkpoint */ - ktime_t queue_time; /* request queued time */ + union { + ktime_t queue_time; /* request queued time */ + ktime_t delta_time; /* time in queue */ + }; }; struct ckpt_req_control { @@ -320,6 +402,9 @@ struct ckpt_req_control { unsigned int peak_time; /* peak wait time in msec until now */ }; +/* a time threshold that checkpoint was blocked for, unit: ms */ +#define CP_LONG_LATENCY_THRESHOLD 5000 + /* for the bitmap indicate blocks to be discarded */ struct discard_entry { struct list_head list; /* list head */ @@ -333,6 +418,8 @@ struct discard_entry { #define DEFAULT_DISCARD_GRANULARITY 16 /* default maximum discard granularity of ordered discard, unit: block count */ #define DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY 16 +/* default interval of periodical discard submission */ +#define DEFAULT_DISCARD_INTERVAL (msecs_to_jiffies(20)) /* max discard pend list number */ #define MAX_PLIST_NUM 512 @@ -356,7 +443,7 @@ struct discard_cmd { struct rb_node rb_node; /* rb node located in rb-tree */ struct discard_info di; /* discard info */ struct list_head list; /* command list */ - struct completion wait; /* compleation */ + struct completion wait; /* completion */ struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ @@ -374,6 +461,12 @@ enum { MAX_DPOLICY, }; +enum { + DPOLICY_IO_AWARE_DISABLE, /* force to not be aware of IO */ + DPOLICY_IO_AWARE_ENABLE, /* force to be aware of IO */ + DPOLICY_IO_AWARE_MAX, +}; + struct discard_policy { int type; /* type of discard */ unsigned int min_interval; /* used for candidates exist */ @@ -406,6 +499,7 @@ struct discard_cmd_control { unsigned int discard_urgent_util; /* utilization which issue discard proactively */ unsigned int discard_granularity; /* discard granularity */ unsigned int max_ordered_discard; /* maximum discard granularity issued by lba order */ + unsigned int discard_io_aware; /* io_aware policy */ unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ @@ -517,7 +611,7 @@ struct f2fs_filename { * internal operation where usr_fname is also NULL. In all these cases * we fall back to treating the name as an opaque byte sequence. */ - struct fscrypt_str cf_name; + struct qstr cf_name; #endif }; @@ -575,8 +669,11 @@ enum { #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO or flush count */ -/* congestion wait timeout value, default: 20ms */ -#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20)) +/* IO/non-IO congestion wait timeout value, default: 1ms */ +#define DEFAULT_SCHEDULE_TIMEOUT (msecs_to_jiffies(1)) + +/* timeout value injected, default: 1000ms */ +#define DEFAULT_FAULT_TIMEOUT (msecs_to_jiffies(1000)) /* maximum retry quota flush count */ #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 @@ -614,6 +711,9 @@ enum { #define DEF_HOT_DATA_AGE_THRESHOLD 262144 #define DEF_WARM_DATA_AGE_THRESHOLD 2621440 +/* default max read extent count per inode */ +#define DEF_MAX_READ_EXTENT_COUNT 10240 + /* extent cache type */ enum extent_type { EX_READ, @@ -621,6 +721,12 @@ enum extent_type { NR_EXTENT_CACHES, }; +/* + * Reserved value to mark invalid age extents, hence valid block range + * from 0 to ULLONG_MAX-1 + */ +#define F2FS_EXTENT_AGE_INVALID ULLONG_MAX + struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */ @@ -689,6 +795,7 @@ struct f2fs_map_blocks { block_t m_lblk; unsigned int m_len; unsigned int m_flags; + unsigned long m_last_pblk; /* last allocated block, only used for DIO in LFS mode */ pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; @@ -751,11 +858,6 @@ enum { #define DEF_DIR_LEVEL 0 -enum { - GC_FAILURE_PIN, - MAX_GC_FAILURE -}; - /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -774,10 +876,7 @@ enum { FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ - FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ - FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_SKIP_WRITES, /* should skip data page writeback */ FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ @@ -795,7 +894,10 @@ enum { FI_ALIGNED_WRITE, /* enable aligned write */ FI_COW_FILE, /* indicate COW file */ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ + FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */ FI_ATOMIC_REPLACE, /* indicate atomic replace */ + FI_OPENED_FILE, /* indicate file has been opened */ + FI_DONATE_FINISHED, /* indicate page donation of file has been finished */ FI_MAX, /* max flag, never be used */ }; @@ -804,14 +906,16 @@ struct f2fs_inode_info { unsigned long i_flags; /* keep an inode flags for ioctl */ unsigned char i_advise; /* use to give file attribute hints */ unsigned char i_dir_level; /* use for dentry level for large dir */ - unsigned int i_current_depth; /* only for directory depth */ - /* for gc failure statistic */ - unsigned int i_gc_failures[MAX_GC_FAILURE]; + union { + unsigned int i_current_depth; /* only for directory depth */ + unsigned short i_gc_failures; /* for gc failure statistic */ + }; unsigned int i_pino; /* parent inode number */ umode_t i_acl_mode; /* keep file acl mode temporarily */ /* Use below internally in f2fs*/ unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ + unsigned int ioprio_hint; /* hint for IO priority */ struct f2fs_rwsem i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ @@ -824,17 +928,27 @@ struct f2fs_inode_info { spinlock_t i_size_lock; /* protect last_disk_size */ #ifdef CONFIG_QUOTA - struct dquot *i_dquot[MAXQUOTAS]; + struct dquot __rcu *i_dquot[MAXQUOTAS]; /* quota space reservation, managed internally by quota code */ qsize_t i_reserved_quota; #endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ + + /* linked in global inode list for cache donation */ + struct list_head gdonate_list; + pgoff_t donate_start, donate_end; /* inclusive */ + atomic_t open_count; /* # of open files */ + struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; /* cached extent_tree entry */ - struct inode *cow_inode; /* copy-on-write inode for atomic write */ + union { + struct inode *cow_inode; /* copy-on-write inode for atomic write */ + struct inode *atomic_inode; + /* point to atomic_inode, available only for cow_inode */ + }; /* avoid racing between foreground op and gc */ struct f2fs_rwsem i_gc_rwsem[2]; @@ -853,9 +967,16 @@ struct f2fs_inode_info { unsigned char i_compress_level; /* compress level (lz4hc,zstd) */ unsigned char i_compress_flag; /* compress flag */ unsigned int i_cluster_size; /* cluster size */ + atomic_t writeback; /* count # of writeback thread */ unsigned int atomic_write_cnt; loff_t original_i_size; /* original i_size before atomic write */ +#ifdef CONFIG_FS_ENCRYPTION + struct fscrypt_inode_info *i_crypt_info; /* filesystem encryption info */ +#endif +#ifdef CONFIG_FS_VERITY + struct fsverity_info *i_verity_info; /* filesystem verity info */ +#endif }; static inline void get_read_extent_info(struct extent_info *ext, @@ -958,11 +1079,11 @@ struct f2fs_nm_info { */ struct dnode_of_data { struct inode *inode; /* vfs inode pointer */ - struct page *inode_page; /* its inode page, NULL is possible */ - struct page *node_page; /* cached direct node page */ + struct folio *inode_folio; /* its inode folio, NULL is possible */ + struct folio *node_folio; /* cached direct node folio */ nid_t nid; /* node id of the direct node block */ unsigned int ofs_in_node; /* data offset in the node page */ - bool inode_page_locked; /* inode page is locked or not */ + bool inode_folio_locked; /* inode folio is locked or not */ bool node_changed; /* is node block changed */ char cur_level; /* level of hole node page */ char max_level; /* level of current page located */ @@ -970,12 +1091,12 @@ struct dnode_of_data { }; static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, - struct page *ipage, struct page *npage, nid_t nid) + struct folio *ifolio, struct folio *nfolio, nid_t nid) { memset(dn, 0, sizeof(*dn)); dn->inode = inode; - dn->inode_page = ipage; - dn->node_page = npage; + dn->inode_folio = ifolio; + dn->node_folio = nfolio; dn->nid = nid; } @@ -999,7 +1120,7 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, #define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) #define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE) -enum { +enum log_type { CURSEG_HOT_DATA = 0, /* directory entry blocks */ CURSEG_WARM_DATA, /* data blocks */ CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ @@ -1044,7 +1165,6 @@ struct f2fs_sm_info { unsigned int segment_count; /* total # of segments */ unsigned int main_segments; /* # of segments in main area */ unsigned int reserved_segments; /* # of reserved segments */ - unsigned int additional_reserved_segments;/* reserved segs for IO align feature */ unsigned int ovp_segments; /* # of overprovision segments */ /* a threshold to reclaim prefree segments */ @@ -1075,7 +1195,8 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(folio, f) \ + (f || f2fs_is_cp_guaranteed(folio) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -1105,6 +1226,7 @@ enum count_type { * ... Only can be used with META. */ #define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type)) +#define PAGE_TYPE_ON_MAIN(type) ((type) == DATA || (type) == NODE) enum page_type { DATA = 0, NODE = 1, /* should not change this */ @@ -1140,6 +1262,7 @@ enum cp_reason_type { CP_FASTBOOT_MODE, CP_SPEC_LOG_NUM, CP_RECOVER_DIR, + CP_XATTR_DIR, }; enum iostat_type { @@ -1189,7 +1312,10 @@ struct f2fs_io_info { blk_opf_t op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ - struct page *page; /* page to be written */ + union { + struct page *page; /* page to be written */ + struct folio *folio; + }; struct page *encrypted_page; /* encrypted page */ struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ @@ -1199,9 +1325,8 @@ struct f2fs_io_info { unsigned int submitted:1; /* indicate IO submission */ unsigned int in_list:1; /* indicate fio is in io_list */ unsigned int is_por:1; /* indicate IO is from recovery or not */ - unsigned int retry:1; /* need to reallocate block address */ unsigned int encrypted:1; /* indicate file is encrypted */ - unsigned int post_read:1; /* require post read */ + unsigned int meta_gc:1; /* require meta inode GC */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ @@ -1234,8 +1359,9 @@ struct f2fs_bio_info { #define FDEV(i) (sbi->devs[i]) #define RDEV(i) (raw_super->devs[i]) struct f2fs_dev_info { + struct file *bdev_file; struct block_device *bdev; - char path[MAX_PATH_LEN]; + char path[MAX_PATH_LEN + 1]; unsigned int total_segments; block_t start_blk; block_t end_blk; @@ -1249,6 +1375,7 @@ enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ DIRTY_META, /* for all dirtied inode metadata */ + DONATE_INODE, /* for all inode to donate pages */ NR_INODE_TYPE, }; @@ -1278,6 +1405,7 @@ struct f2fs_gc_control { bool no_bg_gc; /* check the space and stop bg_gc */ bool should_migrate_blocks; /* should migrate blocks */ bool err_gc_skipped; /* return EAGAIN if GC skipped */ + bool one_time; /* require one time GC in one migration unit */ unsigned int nr_free_secs; /* # of free sections to do GC */ }; @@ -1311,6 +1439,7 @@ enum { DISCARD_TIME, GC_TIME, DISABLE_TIME, + ENABLE_TIME, UMOUNT_DISCARD_TIMEOUT, MAX_TIME, }; @@ -1374,7 +1503,7 @@ enum { enum { MEMORY_MODE_NORMAL, /* memory mode for normal devices */ - MEMORY_MODE_LOW, /* memory mode for low memry devices */ + MEMORY_MODE_LOW, /* memory mode for low memory devices */ }; enum errors_option { @@ -1383,6 +1512,19 @@ enum errors_option { MOUNT_ERRORS_PANIC, /* panic on errors */ }; +enum { + BACKGROUND, + FOREGROUND, + MAX_CALL_TYPE, + TOTAL_CALL = FOREGROUND, +}; + +enum f2fs_lookup_mode { + LOOKUP_PERF, + LOOKUP_COMPAT, + LOOKUP_AUTO, +}; + static inline int f2fs_test_bit(unsigned int nr, char *addr); static inline void f2fs_set_bit(unsigned int nr, char *addr); static inline void f2fs_clear_bit(unsigned int nr, char *addr); @@ -1393,10 +1535,10 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr); * Layout A: lowest bit should be 1 * | bit0 = 1 | bit1 | bit2 | ... | bit MAX | private data .... | * bit 0 PAGE_PRIVATE_NOT_POINTER - * bit 1 PAGE_PRIVATE_DUMMY_WRITE - * bit 2 PAGE_PRIVATE_ONGOING_MIGRATION - * bit 3 PAGE_PRIVATE_INLINE_INODE - * bit 4 PAGE_PRIVATE_REF_RESOURCE + * bit 1 PAGE_PRIVATE_ONGOING_MIGRATION + * bit 2 PAGE_PRIVATE_INLINE_INODE + * bit 3 PAGE_PRIVATE_REF_RESOURCE + * bit 4 PAGE_PRIVATE_ATOMIC_WRITE * bit 5- f2fs private data * * Layout B: lowest bit should be 0 @@ -1404,10 +1546,10 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr); */ enum { PAGE_PRIVATE_NOT_POINTER, /* private contains non-pointer data */ - PAGE_PRIVATE_DUMMY_WRITE, /* data page for padding aligned IO */ PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */ PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */ PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */ + PAGE_PRIVATE_ATOMIC_WRITE, /* data page from atomic write path */ PAGE_PRIVATE_MAX }; @@ -1431,7 +1573,7 @@ enum compress_flag { #define COMPRESS_DATA_RESERVED_SIZE 4 struct compress_data { __le32 clen; /* compressed data size */ - __le32 chksum; /* compressed data chksum */ + __le32 chksum; /* compressed data checksum */ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ u8 cdata[]; /* compressed data */ }; @@ -1476,6 +1618,7 @@ struct compress_io_ctx { struct decompress_io_ctx { u32 magic; /* magic number to indicate page is compressed */ struct inode *inode; /* inode the context belong to */ + struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ pgoff_t cluster_idx; /* cluster index number */ unsigned int cluster_size; /* page count in cluster */ unsigned int log_cluster_size; /* log of cluster size */ @@ -1516,6 +1659,7 @@ struct decompress_io_ctx { bool failed; /* IO error occurred before decompression? */ bool need_verity; /* need fs-verity verification after decompression? */ + unsigned char compress_algorithm; /* backup algorithm type */ void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ struct work_struct verity_work; /* work to verify the decompressed pages */ @@ -1538,6 +1682,10 @@ struct f2fs_sb_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ + unsigned int unusable_blocks_per_sec; /* unusable blocks per section */ + unsigned int max_open_zones; /* max open zone resources of the zoned device */ + /* For adjust the priority writing position of data in zone UFS */ + unsigned int blkzone_alloc_policy; #endif /* for node-related operations */ @@ -1551,7 +1699,6 @@ struct f2fs_sb_info { struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ /* keep migration IO order for LFS mode */ struct f2fs_rwsem io_order_lock; - mempool_t *write_io_dummy; /* Dummy pages */ pgoff_t page_eio_ofs[NR_PAGE_TYPE]; /* EIO page offset */ int page_eio_cnt[NR_PAGE_TYPE]; /* EIO count */ @@ -1568,6 +1715,8 @@ struct f2fs_sb_info { unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ struct ckpt_req_control cprc_info; /* for checkpoint request control */ + struct cp_stats cp_stats; /* for time stat of checkpoint */ + struct f2fs_rwsem cp_enable_rwsem; /* block cache/dio write */ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ @@ -1587,12 +1736,16 @@ struct f2fs_sb_info { /* for extent tree cache */ struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; atomic64_t allocated_data_blocks; /* for block age extent_cache */ + unsigned int max_read_extent_count; /* max read extent count per inode */ /* The threshold used for hot and warm data seperation*/ unsigned int hot_data_age_threshold; unsigned int warm_data_age_threshold; unsigned int last_age_weight; + /* control donate caches */ + unsigned int donate_files; + /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_blocksize; /* log2 block size */ @@ -1602,7 +1755,6 @@ struct f2fs_sb_info { unsigned int meta_ino_num; /* meta inode number*/ unsigned int log_blocks_per_seg; /* log2 blocks per segment */ unsigned int blocks_per_seg; /* blocks per segment */ - unsigned int unusable_blocks_per_sec; /* unusable blocks per section */ unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ @@ -1624,6 +1776,7 @@ struct f2fs_sb_info { unsigned int nquota_files; /* # of quota sysfile */ struct f2fs_rwsem quota_sem; /* blocking cp for flags */ + struct task_struct *umount_lock_holder; /* s_umount lock holder */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; @@ -1657,14 +1810,19 @@ struct f2fs_sb_info { /* for skip statistic */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ + /* free sections reserved for pinned file */ + unsigned int reserved_pin_section; + /* threshold for gc trials on pinned files */ - u64 gc_pin_file_threshold; + unsigned short gc_pin_file_threshold; struct f2fs_rwsem pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; /* migration granularity of garbage collection, unit: segment */ unsigned int migration_granularity; + /* migration window granularity of garbage collection, unit: segment */ + unsigned int migration_window_granularity; /* * for stat information. @@ -1695,6 +1853,7 @@ struct f2fs_sb_info { unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ + atomic_t cp_call_count[MAX_CALL_TYPE]; /* # of cp call */ #endif spinlock_t stat_lock; /* lock for stat operations */ @@ -1723,14 +1882,15 @@ struct f2fs_sb_info { unsigned int dirty_device; /* for checkpoint data flush */ spinlock_t dev_lock; /* protect dirty_device */ bool aligned_blksize; /* all devices has the same logical blksize */ + unsigned int first_seq_zone_segno; /* first segno in sequential zone */ + unsigned int bggc_io_aware; /* For adjust the BG_GC priority when pending IO */ + unsigned int allocate_section_hint; /* the boundary position between devices */ + unsigned int allocate_section_policy; /* determine the section writing priority */ /* For write statistics */ u64 sectors_written_start; u64 kbytes_written; - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *s_chksum_driver; - /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; @@ -1746,9 +1906,6 @@ struct f2fs_sb_info { spinlock_t error_lock; /* protect errors/stop_reason array */ bool error_dirty; /* errors of sb is dirty */ - struct kmem_cache *inline_xattr_slab; /* inline xattr entry */ - unsigned int inline_xattr_slab_size; /* default inline xattr slab size */ - /* For reclaimed segs statistics per each GC mode */ unsigned int gc_segment_mode; /* GC state for reclaimed segments */ unsigned int gc_reclaimed_segs[MAX_GC_MODE]; /* Reclaimed segs for each mode */ @@ -1764,6 +1921,9 @@ struct f2fs_sb_info { u64 committed_atomic_block; u64 revoked_atomic_block; + /* carve out reserved_blocks from total blocks */ + bool carve_out; + #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ @@ -1796,6 +1956,37 @@ struct f2fs_sb_info { #endif }; +/* Definitions to access f2fs_sb_info */ +#define SEGS_TO_BLKS(sbi, segs) \ + ((segs) << (sbi)->log_blocks_per_seg) +#define BLKS_TO_SEGS(sbi, blks) \ + ((blks) >> (sbi)->log_blocks_per_seg) + +#define BLKS_PER_SEG(sbi) ((sbi)->blocks_per_seg) +#define BLKS_PER_SEC(sbi) (SEGS_TO_BLKS(sbi, (sbi)->segs_per_sec)) +#define SEGS_PER_SEC(sbi) ((sbi)->segs_per_sec) + +__printf(3, 4) +void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...); + +#define f2fs_err(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_notice(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_NOTICE fmt, ##__VA_ARGS__) +#define f2fs_info(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_INFO fmt, ##__VA_ARGS__) +#define f2fs_debug(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_DEBUG fmt, ##__VA_ARGS__) + +#define f2fs_err_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_info_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_INFO fmt, ##__VA_ARGS__) + #ifdef CONFIG_F2FS_FAULT_INJECTION #define time_to_inject(sbi, type) __time_to_inject(sbi, type, __func__, \ __builtin_return_address(0)) @@ -1813,9 +2004,9 @@ static inline bool __time_to_inject(struct f2fs_sb_info *sbi, int type, atomic_inc(&ffi->inject_ops); if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { atomic_set(&ffi->inject_ops, 0); - printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", - KERN_INFO, sbi->sb->s_id, f2fs_fault_name[type], - func, parent_func); + ffi->inject_count[type]++; + f2fs_info_ratelimited(sbi, "inject %s in %s of %pS", + f2fs_fault_name[type], func, parent_func); return true; } return false; @@ -1875,42 +2066,20 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, /* * Inline functions */ -static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc, - const void *address, unsigned int length) -{ - struct { - struct shash_desc shash; - char ctx[4]; - } desc; - int err; - - BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx)); - - desc.shash.tfm = sbi->s_chksum_driver; - *(u32 *)desc.ctx = crc; - - err = crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); - - return *(u32 *)desc.ctx; -} - -static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, - unsigned int length) +static inline u32 __f2fs_crc32(u32 crc, const void *address, + unsigned int length) { - return __f2fs_crc32(sbi, F2FS_SUPER_MAGIC, address, length); + return crc32(crc, address, length); } -static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, - void *buf, size_t buf_size) +static inline u32 f2fs_crc32(const void *address, unsigned int length) { - return f2fs_crc32(sbi, buf, buf_size) == blk_crc; + return __f2fs_crc32(F2FS_SUPER_MAGIC, address, length); } -static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, - const void *address, unsigned int length) +static inline u32 f2fs_chksum(u32 crc, const void *address, unsigned int length) { - return __f2fs_crc32(sbi, crc, address, length); + return __f2fs_crc32(crc, address, length); } static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) @@ -1933,9 +2102,9 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) return F2FS_I_SB(mapping->host); } -static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) +static inline struct f2fs_sb_info *F2FS_F_SB(const struct folio *folio) { - return F2FS_M_SB(page_file_mapping(page)); + return F2FS_M_SB(folio->mapping); } static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) @@ -1943,19 +2112,29 @@ static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) return (struct f2fs_super_block *)(sbi->raw_super); } +static inline struct f2fs_super_block *F2FS_SUPER_BLOCK(struct folio *folio, + pgoff_t index) +{ + pgoff_t idx_in_folio = index % folio_nr_pages(folio); + + return (struct f2fs_super_block *) + (page_address(folio_page(folio, idx_in_folio)) + + F2FS_SUPER_OFFSET); +} + static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) { return (struct f2fs_checkpoint *)(sbi->ckpt); } -static inline struct f2fs_node *F2FS_NODE(struct page *page) +static inline struct f2fs_node *F2FS_NODE(const struct folio *folio) { - return (struct f2fs_node *)page_address(page); + return (struct f2fs_node *)folio_address(folio); } -static inline struct f2fs_inode *F2FS_INODE(struct page *page) +static inline struct f2fs_inode *F2FS_INODE(const struct folio *folio) { - return &((struct f2fs_node *)page_address(page))->i; + return &((struct f2fs_node *)folio_address(folio))->i; } static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) @@ -1993,6 +2172,16 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) return sbi->node_inode->i_mapping; } +static inline bool is_meta_folio(struct folio *folio) +{ + return folio->mapping == META_MAPPING(F2FS_F_SB(folio)); +} + +static inline bool is_node_folio(struct folio *folio) +{ + return folio->mapping == NODE_MAPPING(F2FS_F_SB(folio)); +} + static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type) { return test_bit(type, &sbi->s_flag); @@ -2114,15 +2303,6 @@ static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem) return down_read_trylock(&sem->internal_rwsem); } -#ifdef CONFIG_DEBUG_LOCK_ALLOC -static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) -{ - down_read_nested(&sem->internal_rwsem, subclass); -} -#else -#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) -#endif - static inline void f2fs_up_read(struct f2fs_rwsem *sem) { up_read(&sem->internal_rwsem); @@ -2133,6 +2313,21 @@ static inline void f2fs_down_write(struct f2fs_rwsem *sem) down_write(&sem->internal_rwsem); } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_read_nested(&sem->internal_rwsem, subclass); +} + +static inline void f2fs_down_write_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_write_nested(&sem->internal_rwsem, subclass); +} +#else +#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) +#define f2fs_down_write_nested(sem, subclass) f2fs_down_write(sem) +#endif + static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem) { return down_write_trylock(&sem->internal_rwsem); @@ -2146,6 +2341,36 @@ static inline void f2fs_up_write(struct f2fs_rwsem *sem) #endif } +static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) +{ + unsigned long flags; + unsigned char *nat_bits; + + /* + * In order to re-enable nat_bits we need to call fsck.f2fs by + * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost, + * so let's rely on regular fsck or unclean shutdown. + */ + + if (lock) + spin_lock_irqsave(&sbi->cp_lock, flags); + __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); + nat_bits = NM_I(sbi)->nat_bits; + NM_I(sbi)->nat_bits = NULL; + if (lock) + spin_unlock_irqrestore(&sbi->cp_lock, flags); + + kvfree(nat_bits); +} + +static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, + struct cp_control *cpc) +{ + bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG); + + return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set; +} + static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { f2fs_down_read(&sbi->cp_rwsem); @@ -2210,13 +2435,11 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } -static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, +static inline bool __allow_reserved_root(struct f2fs_sb_info *sbi, struct inode *inode, bool cap) { if (!inode) return true; - if (!test_opt(sbi, RESERVE_ROOT)) - return false; if (IS_NOQUOTA(inode)) return true; if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid())) @@ -2229,11 +2452,32 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, return false; } +static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, bool cap) +{ + block_t avail_user_block_count; + + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks; + + if (test_opt(sbi, RESERVE_ROOT) && !__allow_reserved_root(sbi, inode, cap)) + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; + + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (avail_user_block_count > sbi->unusable_block_count) + avail_user_block_count -= sbi->unusable_block_count; + else + avail_user_block_count = 0; + } + + return avail_user_block_count; +} + static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, blkcnt_t *count) + struct inode *inode, blkcnt_t *count, bool partial) { - blkcnt_t diff = 0, release = 0; + long long diff = 0, release = 0; block_t avail_user_block_count; int ret; @@ -2253,35 +2497,27 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); spin_lock(&sbi->stat_lock); - sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = sbi->user_block_count - - sbi->current_reserved_blocks; - if (!__allow_reserved_blocks(sbi, inode, true)) - avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - - if (F2FS_IO_ALIGNED(sbi)) - avail_user_block_count -= sbi->blocks_per_seg * - SM_I(sbi)->additional_reserved_segments; - - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - if (avail_user_block_count > sbi->unusable_block_count) - avail_user_block_count -= sbi->unusable_block_count; - else - avail_user_block_count = 0; - } - if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { - diff = sbi->total_valid_block_count - avail_user_block_count; + avail_user_block_count = get_available_block_count(sbi, inode, true); + diff = (long long)sbi->total_valid_block_count + *count - + avail_user_block_count; + if (unlikely(diff > 0)) { + if (!partial) { + spin_unlock(&sbi->stat_lock); + release = *count; + goto enospc; + } if (diff > *count) diff = *count; *count -= diff; release = diff; - sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(&sbi->stat_lock); goto enospc; } } + sbi->total_valid_block_count += (block_t)(*count); + spin_unlock(&sbi->stat_lock); if (unlikely(release)) { @@ -2298,21 +2534,14 @@ release_quota: return -ENOSPC; } -__printf(2, 3) -void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...); - -#define f2fs_err(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__) -#define f2fs_warn(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__) -#define f2fs_notice(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__) -#define f2fs_info(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__) -#define f2fs_debug(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__) - #define PAGE_PRIVATE_GET_FUNC(name, flagname) \ +static inline bool folio_test_f2fs_##name(const struct folio *folio) \ +{ \ + unsigned long priv = (unsigned long)folio->private; \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + return (priv & v) == v; \ +} \ static inline bool page_private_##name(struct page *page) \ { \ return PagePrivate(page) && \ @@ -2321,6 +2550,17 @@ static inline bool page_private_##name(struct page *page) \ } #define PAGE_PRIVATE_SET_FUNC(name, flagname) \ +static inline void folio_set_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \ + (1UL << PAGE_PRIVATE_##flagname); \ + if (!folio->private) \ + folio_attach_private(folio, (void *)v); \ + else { \ + v |= (unsigned long)folio->private; \ + folio->private = (void *)v; \ + } \ +} \ static inline void set_page_private_##name(struct page *page) \ { \ if (!PagePrivate(page)) \ @@ -2330,6 +2570,16 @@ static inline void set_page_private_##name(struct page *page) \ } #define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \ +static inline void folio_clear_f2fs_##name(struct folio *folio) \ +{ \ + unsigned long v = (unsigned long)folio->private; \ + \ + v &= ~(1UL << PAGE_PRIVATE_##flagname); \ + if (v == (1UL << PAGE_PRIVATE_NOT_POINTER)) \ + folio_detach_private(folio); \ + else \ + folio->private = (void *)v; \ +} \ static inline void clear_page_private_##name(struct page *page) \ { \ clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ @@ -2340,50 +2590,35 @@ static inline void clear_page_private_##name(struct page *page) \ PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE); +PAGE_PRIVATE_GET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE); +PAGE_PRIVATE_SET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE); +PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); -static inline unsigned long get_page_private_data(struct page *page) +static inline unsigned long folio_get_f2fs_data(struct folio *folio) { - unsigned long data = page_private(page); + unsigned long data = (unsigned long)folio->private; if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data)) return 0; return data >> PAGE_PRIVATE_MAX; } -static inline void set_page_private_data(struct page *page, unsigned long data) -{ - if (!PagePrivate(page)) - attach_page_private(page, (void *)0); - set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); - page_private(page) |= data << PAGE_PRIVATE_MAX; -} - -static inline void clear_page_private_data(struct page *page) +static inline void folio_set_f2fs_data(struct folio *folio, unsigned long data) { - page_private(page) &= GENMASK(PAGE_PRIVATE_MAX - 1, 0); - if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) - detach_page_private(page); -} - -static inline void clear_page_private_all(struct page *page) -{ - clear_page_private_data(page); - clear_page_private_reference(page); - clear_page_private_gcing(page); - clear_page_private_inline(page); + data = (1UL << PAGE_PRIVATE_NOT_POINTER) | (data << PAGE_PRIVATE_MAX); - f2fs_bug_on(F2FS_P_SB(page), page_private(page)); + if (!folio_test_private(folio)) + folio_attach_private(folio, (void *)data); + else + folio->private = (void *)((unsigned long)folio->private | data); } static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, @@ -2393,8 +2628,14 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK; spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - sbi->total_valid_block_count -= (block_t)count; + if (unlikely(sbi->total_valid_block_count < count)) { + f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%lu, count:%u", + sbi->total_valid_block_count, inode->i_ino, count); + sbi->total_valid_block_count = 0; + set_sbi_flag(sbi, SBI_NEED_FSCK); + } else { + sbi->total_valid_block_count -= count; + } if (sbi->reserved_blocks && sbi->current_reserved_blocks < sbi->reserved_blocks) sbi->current_reserved_blocks = min(sbi->reserved_blocks, @@ -2484,11 +2725,8 @@ static inline int get_dirty_pages(struct inode *inode) static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) { - unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; - unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >> - sbi->log_blocks_per_seg; - - return segs / sbi->segs_per_sec; + return div_u64(get_pages(sbi, block_type) + BLKS_PER_SEC(sbi) - 1, + BLKS_PER_SEC(sbi)); } static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) @@ -2552,7 +2790,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); if (sbi->cur_cp_pack == 2) - start_addr += sbi->blocks_per_seg; + start_addr += BLKS_PER_SEG(sbi); return start_addr; } @@ -2561,7 +2799,7 @@ static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi) block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); if (sbi->cur_cp_pack == 1) - start_addr += sbi->blocks_per_seg; + start_addr += BLKS_PER_SEG(sbi); return start_addr; } @@ -2580,7 +2818,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { block_t valid_block_count; - unsigned int valid_node_count, user_block_count; + unsigned int valid_node_count, avail_user_node_count; + unsigned int avail_user_block_count; int err; if (is_inode) { @@ -2600,27 +2839,21 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); - valid_block_count = sbi->total_valid_block_count + - sbi->current_reserved_blocks + 1; - - if (!__allow_reserved_blocks(sbi, inode, false)) - valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; - - if (F2FS_IO_ALIGNED(sbi)) - valid_block_count += sbi->blocks_per_seg * - SM_I(sbi)->additional_reserved_segments; + valid_block_count = sbi->total_valid_block_count + 1; + avail_user_block_count = get_available_block_count(sbi, inode, + test_opt(sbi, RESERVE_NODE)); - user_block_count = sbi->user_block_count; - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - user_block_count -= sbi->unusable_block_count; - - if (unlikely(valid_block_count > user_block_count)) { + if (unlikely(valid_block_count > avail_user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } + avail_user_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + if (test_opt(sbi, RESERVE_NODE) && + !__allow_reserved_root(sbi, inode, true)) + avail_user_node_count -= F2FS_OPTION(sbi).root_reserved_nodes; valid_node_count = sbi->total_valid_node_count + 1; - if (unlikely(valid_node_count > sbi->total_node_count)) { + if (unlikely(valid_node_count > avail_user_node_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } @@ -2705,65 +2938,75 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi) return percpu_counter_sum_positive(&sbi->total_valid_inode_count); } -static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, - pgoff_t index, bool for_write) +static inline struct folio *f2fs_grab_cache_folio(struct address_space *mapping, + pgoff_t index, bool for_write) { - struct page *page; + struct folio *folio; unsigned int flags; if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) { + fgf_t fgf_flags; + if (!for_write) - page = find_get_page_flags(mapping, index, - FGP_LOCK | FGP_ACCESSED); + fgf_flags = FGP_LOCK | FGP_ACCESSED; else - page = find_lock_page(mapping, index); - if (page) - return page; + fgf_flags = FGP_LOCK; + folio = __filemap_get_folio(mapping, index, fgf_flags, 0); + if (!IS_ERR(folio)) + return folio; if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) - return NULL; + return ERR_PTR(-ENOMEM); } if (!for_write) - return grab_cache_page(mapping, index); + return filemap_grab_folio(mapping, index); flags = memalloc_nofs_save(); - page = grab_cache_page_write_begin(mapping, index); + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); memalloc_nofs_restore(flags); - return page; + return folio; } -static inline struct page *f2fs_pagecache_get_page( +static inline struct folio *f2fs_filemap_get_folio( struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp_mask) { if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) - return NULL; + return ERR_PTR(-ENOMEM); - return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); + return __filemap_get_folio(mapping, index, fgp_flags, gfp_mask); } -static inline void f2fs_put_page(struct page *page, int unlock) +static inline void f2fs_folio_put(struct folio *folio, bool unlock) { - if (!page) + if (IS_ERR_OR_NULL(folio)) return; if (unlock) { - f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page)); - unlock_page(page); + f2fs_bug_on(F2FS_F_SB(folio), !folio_test_locked(folio)); + folio_unlock(folio); } - put_page(page); + folio_put(folio); +} + +static inline void f2fs_put_page(struct page *page, bool unlock) +{ + if (!page) + return; + f2fs_folio_put(page_folio(page), unlock); } static inline void f2fs_put_dnode(struct dnode_of_data *dn) { - if (dn->node_page) - f2fs_put_page(dn->node_page, 1); - if (dn->inode_page && dn->node_page != dn->inode_page) - f2fs_put_page(dn->inode_page, 0); - dn->node_page = NULL; - dn->inode_page = NULL; + if (dn->node_folio) + f2fs_folio_put(dn->node_folio, true); + if (dn->inode_folio && dn->node_folio != dn->inode_folio) + f2fs_folio_put(dn->inode_folio, false); + dn->node_folio = NULL; + dn->inode_folio = NULL; } static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, @@ -2814,12 +3057,22 @@ static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) return false; } +static inline bool is_inflight_read_io(struct f2fs_sb_info *sbi) +{ + return get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_DIO_READ); +} + static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { + bool zoned_gc = (type == GC_TIME && + F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_BLKZONED)); + if (sbi->gc_mode == GC_URGENT_HIGH) return true; - if (is_inflight_io(sbi, type)) + if (sbi->bggc_io_aware == AWARE_READ_IO && is_inflight_read_io(sbi)) + return false; + if (sbi->bggc_io_aware == AWARE_ALL_IO && is_inflight_io(sbi, type)) return false; if (sbi->gc_mode == GC_URGENT_MID) @@ -2829,6 +3082,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) (type == DISCARD_TIME || type == GC_TIME)) return true; + if (zoned_gc) + return true; + return f2fs_time_over(sbi, type); } @@ -2841,9 +3097,9 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) -static inline bool IS_INODE(struct page *page) +static inline bool IS_INODE(const struct folio *folio) { - struct f2fs_node *p = F2FS_NODE(page); + struct f2fs_node *p = F2FS_NODE(folio); return RAW_IS_INODE(p); } @@ -2860,31 +3116,32 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) } static inline int f2fs_has_extra_attr(struct inode *inode); -static inline block_t data_blkaddr(struct inode *inode, - struct page *node_page, unsigned int offset) +static inline unsigned int get_dnode_base(struct inode *inode, + struct folio *node_folio) { - struct f2fs_node *raw_node; - __le32 *addr_array; - int base = 0; - bool is_inode = IS_INODE(node_page); + if (!IS_INODE(node_folio)) + return 0; - raw_node = F2FS_NODE(node_page); + return inode ? get_extra_isize(inode) : + offset_in_addr(&F2FS_NODE(node_folio)->i); +} - if (is_inode) { - if (!inode) - /* from GC path only */ - base = offset_in_addr(&raw_node->i); - else if (f2fs_has_extra_attr(inode)) - base = get_extra_isize(inode); - } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct folio *node_folio) +{ + return blkaddr_in_node(F2FS_NODE(node_folio)) + + get_dnode_base(inode, node_folio); +} - addr_array = blkaddr_in_node(raw_node); - return le32_to_cpu(addr_array[base + offset]); +static inline block_t data_blkaddr(struct inode *inode, + struct folio *node_folio, unsigned int offset) +{ + return le32_to_cpu(*(get_dnode_addr(inode, node_folio) + offset)); } static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) { - return data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node); + return data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node); } static inline int f2fs_test_bit(unsigned int nr, char *addr) @@ -2961,6 +3218,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) @@ -2976,6 +3234,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) + static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) { if (S_ISDIR(mode)) @@ -2998,7 +3258,6 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; fallthrough; case FI_DATA_EXIST: - case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: f2fs_mark_inode_dirty_sync(inode, true); @@ -3094,7 +3353,7 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) static inline void f2fs_i_gc_failures_write(struct inode *inode, unsigned int count) { - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = count; + F2FS_I(inode)->i_gc_failures = count; f2fs_mark_inode_dirty_sync(inode, true); } @@ -3122,8 +3381,6 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) set_bit(FI_DATA_EXIST, fi->flags); - if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) @@ -3144,8 +3401,6 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(inode, FI_INLINE_DOTS)) - ri->i_inline |= F2FS_INLINE_DOTS; if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) ri->i_inline |= F2FS_EXTRA_ATTR; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -3186,26 +3441,21 @@ static inline bool f2fs_need_compress_data(struct inode *inode) return false; } -static inline unsigned int addrs_per_inode(struct inode *inode) +static inline unsigned int addrs_per_page(struct inode *inode, + bool is_inode) { - unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - - get_inline_xattr_addrs(inode); + unsigned int addrs = is_inode ? (CUR_ADDRS_PER_INODE(inode) - + get_inline_xattr_addrs(inode)) : DEF_ADDRS_PER_BLOCK; - if (!f2fs_compressed_file(inode)) - return addrs; - return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); -} - -static inline unsigned int addrs_per_block(struct inode *inode) -{ - if (!f2fs_compressed_file(inode)) - return DEF_ADDRS_PER_BLOCK; - return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size); + if (f2fs_compressed_file(inode)) + return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); + return addrs; } -static inline void *inline_xattr_addr(struct inode *inode, struct page *page) +static inline +void *inline_xattr_addr(struct inode *inode, const struct folio *folio) { - struct f2fs_inode *ri = F2FS_INODE(page); + struct f2fs_inode *ri = F2FS_INODE(folio); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)]); @@ -3220,7 +3470,7 @@ static inline int inline_xattr_size(struct inode *inode) /* * Notice: check inline_data flag without inode page lock is unsafe. - * It could change at any time by f2fs_convert_inline_page(). + * It could change at any time by f2fs_convert_inline_folio(). */ static inline int f2fs_has_inline_data(struct inode *inode) { @@ -3232,11 +3482,6 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(inode, FI_DATA_EXIST); } -static inline int f2fs_has_inline_dots(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_INLINE_DOTS); -} - static inline int f2fs_is_mmap_file(struct inode *inode) { return is_inode_flag_set(inode, FI_MMAP_FILE); @@ -3257,22 +3502,11 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline bool f2fs_is_first_block_written(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN); -} - -static inline bool f2fs_is_drop_cache(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_DROP_CACHE); -} - -static inline void *inline_data_addr(struct inode *inode, struct page *page) +static inline void *inline_data_addr(struct inode *inode, struct folio *folio) { - struct f2fs_inode *ri = F2FS_INODE(page); - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, folio); - return (void *)&(ri->i_addr[extra_size + DEF_INLINE_RESERVED_SIZE]); + return (void *)(addr + DEF_INLINE_RESERVED_SIZE); } static inline int f2fs_has_inline_dentry(struct inode *inode) @@ -3303,13 +3537,15 @@ static inline void clear_file(struct inode *inode, int type) static inline bool f2fs_is_time_consistent(struct inode *inode) { - struct timespec64 ctime = inode_get_ctime(inode); + struct timespec64 ts = inode_get_atime(inode); - if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &ts)) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &ctime)) + ts = inode_get_ctime(inode); + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &ts)) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + ts = inode_get_mtime(inode); + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &ts)) return false; return true; } @@ -3351,17 +3587,6 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) return is_set_ckpt_flags(sbi, CP_ERROR_FLAG); } -static inline bool is_dot_dotdot(const u8 *name, size_t len) -{ - if (len == 1 && name[0] == '.') - return true; - - if (len == 2 && name[0] == '.' && name[1] == '.') - return true; - - return false; -} - static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { @@ -3405,6 +3630,14 @@ static inline void *f2fs_kvzalloc(struct f2fs_sb_info *sbi, return f2fs_kvmalloc(sbi, size, flags | __GFP_ZERO); } +static inline void *f2fs_vmalloc(struct f2fs_sb_info *sbi, size_t size) +{ + if (time_to_inject(sbi, FAULT_VMALLOC)) + return NULL; + + return vmalloc(size); +} + static inline int get_extra_isize(struct inode *inode) { return F2FS_I(inode)->i_extra_isize / sizeof(__le32); @@ -3431,7 +3664,7 @@ static inline int get_inline_xattr_addrs(struct inode *inode) sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ -#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) +#define __is_large_section(sbi) (SEGS_PER_SEC(sbi) > 1) #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) @@ -3440,11 +3673,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, static inline void verify_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.", blkaddr, type); - f2fs_bug_on(sbi, 1); - } } static inline bool __is_valid_data_blkaddr(block_t blkaddr) @@ -3468,10 +3699,12 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly, bool need_lock); int f2fs_precache_extents(struct inode *inode); -int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa); int f2fs_fileattr_set(struct mnt_idmap *idmap, - struct dentry *dentry, struct fileattr *fa); + struct dentry *dentry, struct file_kattr *fa); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid); @@ -3481,14 +3714,15 @@ int f2fs_pin_file_control(struct inode *inode, bool inc); * inode.c */ void f2fs_set_inode_flags(struct inode *inode); -bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page); -void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio); +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); -void f2fs_update_inode(struct inode *inode, struct page *node_page); +void f2fs_update_inode(struct inode *inode, struct folio *node_folio); void f2fs_update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); +void f2fs_remove_donate_inode(struct inode *inode); void f2fs_evict_inode(struct inode *inode); void f2fs_handle_failed_inode(struct inode *inode); @@ -3504,36 +3738,50 @@ int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, /* * dir.c */ +#if IS_ENABLED(CONFIG_UNICODE) int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname); +void f2fs_free_casefolded_name(struct f2fs_filename *fname); +#else +static inline int f2fs_init_casefolded_name(const struct inode *dir, + struct f2fs_filename *fname) +{ + return 0; +} + +static inline void f2fs_free_casefolded_name(struct f2fs_filename *fname) +{ +} +#endif /* CONFIG_UNICODE */ + int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct f2fs_filename *fname); int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_filename *fname); void f2fs_free_filename(struct f2fs_filename *fname); struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots); + const struct f2fs_filename *fname, int *max_slots, + bool use_hash); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d); -struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, - const struct f2fs_filename *fname, struct page *dpage); +struct folio *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, + const struct f2fs_filename *fname, struct folio *dfolio); void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth); int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots); void f2fs_drop_nlink(struct inode *dir, struct inode *inode); struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, - const struct f2fs_filename *fname, - struct page **res_page); + const struct f2fs_filename *fname, struct folio **res_folio); struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, - const struct qstr *child, struct page **res_page); -struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p); + const struct qstr *child, struct folio **res_folio); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct folio **f); ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, - struct page **page); + struct folio **folio); void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, - struct page *page, struct inode *inode); -bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, + struct folio *folio, struct inode *inode); +bool f2fs_has_enough_room(struct inode *dir, struct folio *ifolio, const struct f2fs_filename *fname); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct fscrypt_str *name, f2fs_hash_t name_hash, @@ -3544,9 +3792,10 @@ int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode); -void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio, struct inode *dir, struct inode *inode); -int f2fs_do_tmpfile(struct inode *inode, struct inode *dir); +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, + struct f2fs_filename *fname); bool f2fs_empty_dir(struct inode *dir); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) @@ -3564,14 +3813,12 @@ int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); int f2fs_dquot_initialize(struct inode *inode); int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); -int f2fs_quota_sync(struct super_block *sb, int type); +int f2fs_do_quota_sync(struct super_block *sb, int type); loff_t max_file_blocks(struct inode *inode); void f2fs_quota_off_umount(struct super_block *sb); void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag); -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, - bool irq_context); +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason); void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); -void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); @@ -3585,12 +3832,13 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname); * node.c */ struct node_info; +enum node_type; int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); -bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page); +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio); void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); -void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio); void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); @@ -3603,14 +3851,15 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from); int f2fs_truncate_xattr_node(struct inode *inode); int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, unsigned int seq_id); -bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi); int f2fs_remove_inode_page(struct inode *inode); -struct page *f2fs_new_inode_page(struct inode *inode); -struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs); +struct folio *f2fs_new_inode_folio(struct inode *inode); +struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs); void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); -struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); -struct page *f2fs_get_node_page_ra(struct page *parent, int start); -int f2fs_move_node_page(struct page *node_page, int gc_type); +struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid, + enum node_type node_type); +struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino); +struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid); +int f2fs_move_node_folio(struct folio *node_folio, int gc_type); void f2fs_flush_inline_data(struct f2fs_sb_info *sbi); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, @@ -3623,12 +3872,11 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); -int f2fs_recover_inline_xattr(struct inode *inode, struct page *page); -int f2fs_recover_xattr_data(struct inode *inode, struct page *page); -int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); +int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio); +int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio); +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); -void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi); int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); int f2fs_build_node_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); @@ -3647,7 +3895,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); -void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len); bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); int f2fs_start_discard_thread(struct f2fs_sb_info *sbi); void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); @@ -3661,22 +3910,22 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno); -void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); +int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); +int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi); void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi); -void f2fs_get_new_segment(struct f2fs_sb_info *sbi, - unsigned int *newseg, bool new_sec, int dir); -void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); +int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); -struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); +struct folio *f2fs_get_sum_folio(struct f2fs_sb_info *sbi, unsigned int segno); void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type); void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio); void f2fs_outplace_write_data(struct dnode_of_data *dn, @@ -3690,14 +3939,18 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr); -void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, + enum log_type seg_type); +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio); void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, block_t blkaddr, unsigned int blkcnt); -void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered, bool locked); +void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type, + bool ordered, bool locked); +#define f2fs_wait_on_page_writeback(page, type, ordered, locked) \ + f2fs_folio_wait_writeback(page_folio(page), type, ordered, locked) void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len); @@ -3706,17 +3959,24 @@ void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc); void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); -int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi); -int f2fs_check_write_pointer(struct f2fs_sb_info *sbi); +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi); int f2fs_build_segment_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_segment_manager_caches(void); void f2fs_destroy_segment_manager_caches(void); -int f2fs_rw_hint_to_seg_type(enum rw_hint hint); -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno); +int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint); +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp); +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); +unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, + unsigned int segno); + +static inline struct inode *fio_inode(struct f2fs_io_info *fio) +{ + return fio->folio->mapping->host; +} #define DEF_FRAGMENT_SIZE 4 #define MIN_FRAGMENT_SIZE 1 @@ -3734,12 +3994,14 @@ static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi) void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, unsigned char reason); void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi); -struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_grab_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_get_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_get_meta_folio_retry(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_get_tmp_folio(struct f2fs_sb_info *sbi, pgoff_t index); bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, @@ -3780,6 +4042,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); +bool f2fs_is_cp_guaranteed(const struct folio *folio); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, @@ -3787,10 +4050,10 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, + struct inode *inode, struct folio *folio, nid_t ino, enum page_type type); void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, - struct bio **bio, struct page *page); + struct bio **bio, struct folio *folio); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); int f2fs_merge_page_bio(struct f2fs_io_info *fio); @@ -3798,20 +4061,20 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, sector_t *sector); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); -void f2fs_set_data_blkaddr(struct dnode_of_data *dn); +void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); -struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs); -struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, - pgoff_t *next_pgofs); -struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, +struct folio *f2fs_get_read_data_folio(struct inode *inode, pgoff_t index, + blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs); +struct folio *f2fs_find_data_folio(struct inode *inode, pgoff_t index, + pgoff_t *next_pgofs); +struct folio *f2fs_get_lock_data_folio(struct inode *inode, pgoff_t index, bool for_write); -struct page *f2fs_get_new_data_page(struct inode *inode, - struct page *ipage, pgoff_t index, bool new_i_size); +struct folio *f2fs_get_new_data_folio(struct inode *inode, + struct folio *ifolio, pgoff_t index, bool new_i_size); int f2fs_do_write_data_page(struct f2fs_io_info *fio); int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, @@ -3819,7 +4082,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int f2fs_encrypt_one_page(struct f2fs_io_info *fio); bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type, @@ -3828,7 +4091,7 @@ void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool f2fs_release_folio(struct folio *folio, gfp_t wait); bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); -void f2fs_clear_page_cache_dirty_tag(struct page *page); +void f2fs_clear_page_cache_dirty_tag(struct folio *folio); int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); @@ -3843,13 +4106,16 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, + bool dry_run, unsigned int dry_run_sections); int f2fs_resize_fs(struct file *filp, __u64 block_count); int __init f2fs_create_garbage_collection_cache(void); void f2fs_destroy_garbage_collection_cache(void); /* victim selection function for cleaning and SSR */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age); + unsigned long long age, bool one_time); /* * recovery.c @@ -3863,6 +4129,19 @@ void f2fs_destroy_recovery_cache(void); * debug.c */ #ifdef CONFIG_F2FS_STAT_FS +enum { + DEVSTAT_INUSE, + DEVSTAT_DIRTY, + DEVSTAT_FULL, + DEVSTAT_FREE, + DEVSTAT_PREFREE, + DEVSTAT_MAX, +}; + +struct f2fs_dev_stats { + unsigned int devstats[2][DEVSTAT_MAX]; /* 0: segs, 1: secs */ +}; + struct f2fs_stat_info { struct list_head stat_list; struct f2fs_sb_info *sbi; @@ -3883,11 +4162,12 @@ struct f2fs_stat_info { unsigned long long allocated_data_blocks; int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; int ndirty_data, ndirty_qdata; - unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; + unsigned int ndirty_dirs, ndirty_files, ndirty_all; + unsigned int nquota_files, ndonate_files; int nats, dirty_nats, sits, dirty_sits; int free_nids, avail_nids, alloc_nids; int total_count, utilization; - int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_wb_cp_data, nr_wb_data; int nr_rd_data, nr_rd_node, nr_rd_meta; int nr_dio_read, nr_dio_write; unsigned int io_skip_bggc, other_skip_bggc; @@ -3907,11 +4187,14 @@ struct f2fs_stat_info { int rsvd_segs, overp_segs; int dirty_count, node_pages, meta_pages, compress_pages; int compress_page_hit; - int prefree_count, call_count, cp_count, bg_cp_count; - int tot_segs, node_segs, data_segs, free_segs, free_secs; - int bg_node_segs, bg_data_segs; + int prefree_count, free_segs, free_secs; + int cp_call_count[MAX_CALL_TYPE], cp_count; + int gc_call_count[MAX_CALL_TYPE]; + int gc_segs[2][2]; + int gc_secs[2][2]; int tot_blks, data_blks, node_blks; int bg_data_blks, bg_node_blks; + int blkoff[NR_CURSEG_TYPE]; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; @@ -3924,6 +4207,7 @@ struct f2fs_stat_info { unsigned int block_count[2]; unsigned int inplace_count; unsigned long long base_mem, cache_mem, page_mem; + struct f2fs_dev_stats *dev_stats; }; static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) @@ -3931,10 +4215,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) return (struct f2fs_stat_info *)sbi->stat_info; } -#define stat_inc_cp_count(si) ((si)->cp_count++) -#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) -#define stat_inc_call_count(si) ((si)->call_count++) -#define stat_inc_bggc_count(si) ((si)->bg_gc++) +#define stat_inc_cp_call_count(sbi, foreground) \ + atomic_inc(&sbi->cp_call_count[(foreground)]) +#define stat_inc_cp_count(sbi) (F2FS_STAT(sbi)->cp_count++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -4019,18 +4302,12 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (cur > max) \ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ } while (0) -#define stat_inc_seg_count(sbi, type, gc_type) \ - do { \ - struct f2fs_stat_info *si = F2FS_STAT(sbi); \ - si->tot_segs++; \ - if ((type) == SUM_TYPE_DATA) { \ - si->data_segs++; \ - si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ - } else { \ - si->node_segs++; \ - si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \ - } \ - } while (0) +#define stat_inc_gc_call_count(sbi, foreground) \ + (F2FS_STAT(sbi)->gc_call_count[(foreground)]++) +#define stat_inc_gc_sec_count(sbi, type, gc_type) \ + (F2FS_STAT(sbi)->gc_secs[(type)][(gc_type)]++) +#define stat_inc_gc_seg_count(sbi, type, gc_type) \ + (F2FS_STAT(sbi)->gc_segs[(type)][(gc_type)]++) #define stat_inc_tot_blk_count(si, blks) \ ((si)->tot_blks += (blks)) @@ -4057,10 +4334,8 @@ void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #else -#define stat_inc_cp_count(si) do { } while (0) -#define stat_inc_bg_cp_count(si) do { } while (0) -#define stat_inc_call_count(si) do { } while (0) -#define stat_inc_bggc_count(si) do { } while (0) +#define stat_inc_cp_call_count(sbi, foreground) do { } while (0) +#define stat_inc_cp_count(sbi) do { } while (0) #define stat_io_skip_bggc_count(sbi) do { } while (0) #define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) @@ -4088,7 +4363,9 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #define stat_inc_seg_type(sbi, curseg) do { } while (0) #define stat_inc_block_count(sbi, curseg) do { } while (0) #define stat_inc_inplace_blocks(sbi) do { } while (0) -#define stat_inc_seg_count(sbi, type, gc_type) do { } while (0) +#define stat_inc_gc_call_count(sbi, foreground) do { } while (0) +#define stat_inc_gc_sec_count(sbi, type, gc_type) do { } while (0) +#define stat_inc_gc_seg_count(sbi, type, gc_type) do { } while (0) #define stat_inc_tot_blk_count(si, blks) do { } while (0) #define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0) #define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0) @@ -4116,27 +4393,26 @@ extern struct kmem_cache *f2fs_inode_entry_slab; * inline.c */ bool f2fs_may_inline_data(struct inode *inode); -bool f2fs_sanity_check_inline_data(struct inode *inode); +bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio); bool f2fs_may_inline_dentry(struct inode *inode); -void f2fs_do_read_inline_data(struct page *page, struct page *ipage); -void f2fs_truncate_inline_inode(struct inode *inode, - struct page *ipage, u64 from); -int f2fs_read_inline_data(struct inode *inode, struct page *page); -int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); +void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio); +void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio, + u64 from); +int f2fs_read_inline_data(struct inode *inode, struct folio *folio); +int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); -int f2fs_write_inline_data(struct inode *inode, struct page *page); -int f2fs_recover_inline_data(struct inode *inode, struct page *npage); +int f2fs_write_inline_data(struct inode *inode, struct folio *folio); +int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, - const struct f2fs_filename *fname, - struct page **res_page); + const struct f2fs_filename *fname, struct folio **res_folio, + bool use_hash); int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, - struct page *ipage); + struct folio *ifolio); int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, - struct page *page, struct inode *dir, - struct inode *inode); + struct folio *folio, struct inode *dir, struct inode *inode); bool f2fs_empty_inline_dir(struct inode *dir); int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct fscrypt_str *fstr); @@ -4151,13 +4427,15 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, struct shrink_control *sc); unsigned long f2fs_shrink_scan(struct shrinker *shrink, struct shrink_control *sc); +unsigned int f2fs_donate_files(void); +void f2fs_reclaim_caches(unsigned int reclaim_caches_kb); void f2fs_join_shrinker(struct f2fs_sb_info *sbi); void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -bool sanity_check_extent_cache(struct inode *inode); +bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio); void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); @@ -4167,7 +4445,7 @@ int __init f2fs_create_extent_cache(void); void f2fs_destroy_extent_cache(void); /* read extent cache ops */ -void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage); +void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio); bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei); bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, @@ -4228,47 +4506,64 @@ static inline bool f2fs_post_read_required(struct inode *inode) f2fs_compressed_file(inode); } +static inline bool f2fs_used_in_atomic_write(struct inode *inode) +{ + return f2fs_is_atomic_file(inode) || f2fs_is_cow_file(inode); +} + +static inline bool f2fs_meta_inode_gc_required(struct inode *inode) +{ + return f2fs_post_read_required(inode) || f2fs_used_in_atomic_write(inode); +} + /* * compress.c */ #ifdef CONFIG_F2FS_FS_COMPRESSION -bool f2fs_is_compressed_page(struct page *page); -struct page *f2fs_compress_control_page(struct page *page); +enum cluster_check_type { + CLUSTER_IS_COMPR, /* check only if compressed cluster */ + CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ + CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ +}; +bool f2fs_is_compressed_page(struct folio *folio); +struct folio *f2fs_compress_control_folio(struct folio *folio); int f2fs_prepare_compress_overwrite(struct inode *inode, struct page **pagep, pgoff_t index, void **fsdata); bool f2fs_compress_write_end(struct inode *inode, void *fsdata, pgoff_t index, unsigned copied); int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); -void f2fs_compress_write_end_io(struct bio *bio, struct page *page); +void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio); bool f2fs_is_compress_backend_ready(struct inode *inode); bool f2fs_is_compress_level_valid(int alg, int lvl); int __init f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); -void f2fs_end_read_compressed_page(struct page *page, bool failed, +void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate); bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio); int f2fs_write_multi_pages(struct compress_ctx *cc, int *submitted, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index); void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead, bool for_write); + struct readahead_control *rac, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, bool in_task); -void f2fs_put_page_dic(struct page *page, bool in_task); -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); +void f2fs_put_folio_dic(struct folio *folio, bool in_task); +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); @@ -4279,10 +4574,9 @@ void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); int __init f2fs_init_compress_cache(void); void f2fs_destroy_compress_cache(void); struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi); -void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr); -void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, - nid_t ino, block_t blkaddr); -bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len); +bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr); void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); #define inc_compr_inode_stat(inode) \ @@ -4298,7 +4592,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); sbi->compr_saved_block += diff; \ } while (0) #else -static inline bool f2fs_is_compressed_page(struct page *page) { return false; } +static inline bool f2fs_is_compressed_page(struct folio *folio) { return false; } static inline bool f2fs_is_compress_backend_ready(struct inode *inode) { if (!f2fs_compressed_file(inode)) @@ -4307,7 +4601,7 @@ static inline bool f2fs_is_compress_backend_ready(struct inode *inode) return false; } static inline bool f2fs_is_compress_level_valid(int alg, int lvl) { return false; } -static inline struct page *f2fs_compress_control_page(struct page *page) +static inline struct folio *f2fs_compress_control_folio(struct folio *folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); @@ -4316,16 +4610,17 @@ static inline int __init f2fs_init_compress_mempool(void) { return 0; } static inline void f2fs_destroy_compress_mempool(void) { } static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) { } -static inline void f2fs_end_read_compressed_page(struct page *page, +static inline void f2fs_end_read_compressed_page(struct folio *folio, bool failed, block_t blkaddr, bool in_task) { WARN_ON_ONCE(1); } -static inline void f2fs_put_page_dic(struct page *page, bool in_task) +static inline void f2fs_put_folio_dic(struct folio *folio, bool in_task) { WARN_ON_ONCE(1); } -static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; } +static inline unsigned int f2fs_cluster_blocks_are_contiguous( + struct dnode_of_data *dn, unsigned int ofs_in_node) { return 0; } static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; } static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { } @@ -4333,15 +4628,19 @@ static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } -static inline void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, - block_t blkaddr) { } -static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, - struct page *page, nid_t ino, block_t blkaddr) { } -static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, - struct page *page, block_t blkaddr) { return false; } +static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) { } +static inline bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, + struct folio *folio, block_t blkaddr) { return false; } static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) +static inline int f2fs_is_compressed_cluster( + struct inode *inode, + pgoff_t index) { return 0; } +static inline bool f2fs_is_sparse_cluster( + struct inode *inode, + pgoff_t index) { return true; } static inline void f2fs_update_read_extent_tree_range_compressed( struct inode *inode, pgoff_t fofs, block_t blkaddr, @@ -4352,22 +4651,18 @@ static inline int set_compress_context(struct inode *inode) { #ifdef CONFIG_F2FS_FS_COMPRESSION struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); - F2FS_I(inode)->i_compress_algorithm = - F2FS_OPTION(sbi).compress_algorithm; - F2FS_I(inode)->i_log_cluster_size = - F2FS_OPTION(sbi).compress_log_size; - F2FS_I(inode)->i_compress_flag = - F2FS_OPTION(sbi).compress_chksum ? - BIT(COMPRESS_CHKSUM) : 0; - F2FS_I(inode)->i_cluster_size = - BIT(F2FS_I(inode)->i_log_cluster_size); - if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 || - F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) && + fi->i_compress_algorithm = F2FS_OPTION(sbi).compress_algorithm; + fi->i_log_cluster_size = F2FS_OPTION(sbi).compress_log_size; + fi->i_compress_flag = F2FS_OPTION(sbi).compress_chksum ? + BIT(COMPRESS_CHKSUM) : 0; + fi->i_cluster_size = BIT(fi->i_log_cluster_size); + if ((fi->i_compress_algorithm == COMPRESS_LZ4 || + fi->i_compress_algorithm == COMPRESS_ZSTD) && F2FS_OPTION(sbi).compress_level) - F2FS_I(inode)->i_compress_level = - F2FS_OPTION(sbi).compress_level; - F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; + fi->i_flags |= F2FS_COMPR_FL; set_inode_flag(inode, FI_COMPRESSED_FILE); stat_inc_compr_inode(inode); inc_compr_inode_stat(inode); @@ -4382,15 +4677,24 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - if (!f2fs_compressed_file(inode)) + f2fs_down_write(&fi->i_sem); + + if (!f2fs_compressed_file(inode)) { + f2fs_up_write(&fi->i_sem); return true; - if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) + } + if (f2fs_is_mmap_file(inode) || atomic_read(&fi->writeback) || + (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { + f2fs_up_write(&fi->i_sem); return false; + } fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); clear_inode_flag(inode, FI_COMPRESSED_FILE); f2fs_mark_inode_dirty_sync(inode, true); + + f2fs_up_write(&fi->i_sem); return true; } @@ -4414,17 +4718,39 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); +F2FS_FEATURE_FUNCS(packed_ssa, PACKED_SSA); #ifdef CONFIG_BLK_DEV_ZONED -static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, - block_t blkaddr) +static inline bool f2fs_zone_is_seq(struct f2fs_sb_info *sbi, int devi, + unsigned int zone) { - unsigned int zno = blkaddr / sbi->blocks_per_blkz; + return test_bit(zone, FDEV(devi).blkz_seq); +} - return test_bit(zno, FDEV(devi).blkz_seq); +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, + block_t blkaddr) +{ + return f2fs_zone_is_seq(sbi, devi, blkaddr / sbi->blocks_per_blkz); } #endif +static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi, + struct block_device *bdev) +{ + int i; + + if (!f2fs_is_multi_device(sbi)) + return 0; + + for (i = 0; i < sbi->s_ndevs; i++) + if (FDEV(i).bdev == bdev) + return i; + + WARN_ON(1); + return -1; +} + static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) { return f2fs_sb_has_blkzoned(sbi); @@ -4448,6 +4774,18 @@ static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) return false; } +static inline unsigned int f2fs_hw_discard_granularity(struct f2fs_sb_info *sbi) +{ + int i = 1; + unsigned int discard_granularity = bdev_discard_granularity(sbi->sb->s_bdev); + + if (f2fs_is_multi_device(sbi)) + for (; i < sbi->s_ndevs && !bdev_is_zoned(FDEV(i).bdev); i++) + discard_granularity = max_t(unsigned int, discard_granularity, + bdev_discard_granularity(FDEV(i).bdev)); + return discard_granularity; +} + static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) { return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) || @@ -4477,6 +4815,33 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } +static inline bool f2fs_is_sequential_zone_area(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + if (f2fs_sb_has_blkzoned(sbi)) { +#ifdef CONFIG_BLK_DEV_ZONED + int devi = f2fs_target_device_index(sbi, blkaddr); + + if (!bdev_is_zoned(FDEV(devi).bdev)) + return false; + + if (f2fs_is_multi_device(sbi)) { + if (blkaddr < FDEV(devi).start_blk || + blkaddr > FDEV(devi).end_blk) { + f2fs_err(sbi, "Invalid block %x", blkaddr); + return false; + } + blkaddr -= FDEV(devi).start_blk; + } + + return f2fs_blkz_is_seq(sbi, devi, blkaddr); +#else + return false; +#endif + } + return false; +} + static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) { return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; @@ -4485,7 +4850,8 @@ static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) static inline bool f2fs_may_compress(struct inode *inode) { if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || - f2fs_is_atomic_file(inode) || f2fs_has_inline_data(inode)) + f2fs_is_atomic_file(inode) || f2fs_has_inline_data(inode) || + f2fs_is_mmap_file(inode)) return false; return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); } @@ -4527,10 +4893,15 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) } #ifdef CONFIG_F2FS_FAULT_INJECTION -extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, - unsigned int type); +extern int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, + unsigned long type, enum fault_option fo); #else -#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0) +static inline int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, + unsigned long rate, unsigned long type, + enum fault_option fo) +{ + return 0; +} #endif static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) @@ -4551,15 +4922,38 @@ static inline bool f2fs_block_unit_discard(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK; } -static inline void f2fs_io_schedule_timeout(long timeout) +static inline void __f2fs_schedule_timeout(long timeout, bool io) { set_current_state(TASK_UNINTERRUPTIBLE); - io_schedule_timeout(timeout); + if (io) + io_schedule_timeout(timeout); + else + schedule_timeout(timeout); } -static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs, - enum page_type type) +#define f2fs_io_schedule_timeout(timeout) \ + __f2fs_schedule_timeout(timeout, true) +#define f2fs_schedule_timeout(timeout) \ + __f2fs_schedule_timeout(timeout, false) + +static inline void f2fs_io_schedule_timeout_killable(long timeout) +{ + while (timeout) { + if (fatal_signal_pending(current)) + return; + set_current_state(TASK_UNINTERRUPTIBLE); + io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); + if (timeout <= DEFAULT_SCHEDULE_TIMEOUT) + return; + timeout -= DEFAULT_SCHEDULE_TIMEOUT; + } +} + +static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, + struct folio *folio, enum page_type type) { + pgoff_t ofs = folio->index; + if (unlikely(f2fs_cp_error(sbi))) return; @@ -4577,6 +4971,39 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi) return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb); } +static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int cnt) +{ + bool need_submit = false; + int i = 0; + + do { + struct folio *folio; + + folio = filemap_get_folio(META_MAPPING(sbi), blkaddr + i); + if (!IS_ERR(folio)) { + if (folio_test_writeback(folio)) + need_submit = true; + f2fs_folio_put(folio, false); + } + } while (++i < cnt && !need_submit); + + if (need_submit) + f2fs_submit_merged_write_cond(sbi, sbi->meta_inode, + NULL, 0, DATA); + + truncate_inode_pages_range(META_MAPPING(sbi), + F2FS_BLK_TO_BYTES((loff_t)blkaddr), + F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1))); +} + +static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) +{ + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); + f2fs_invalidate_compress_pages_range(sbi, blkaddr, len); +} + #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ce9d567cd5fe..d7047ca6b98d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -8,7 +8,6 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/stat.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/falloc.h> @@ -36,57 +35,80 @@ #include <trace/events/f2fs.h> #include <uapi/linux/f2fs.h> +static void f2fs_zero_post_eof_page(struct inode *inode, + loff_t new_size, bool lock) +{ + loff_t old_size = i_size_read(inode); + + if (old_size >= new_size) + return; + + if (mapping_empty(inode->i_mapping)) + return; + + if (lock) + filemap_invalidate_lock(inode->i_mapping); + /* zero or drop pages only in range of [old_size, new_size] */ + truncate_inode_pages_range(inode->i_mapping, old_size, new_size); + if (lock) + filemap_invalidate_unlock(inode->i_mapping); +} + static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); + vm_flags_t flags = vmf->vma->vm_flags; vm_fault_t ret; ret = filemap_fault(vmf); - if (!ret) + if (ret & VM_FAULT_LOCKED) f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_MAPPED_READ_IO, F2FS_BLKSIZE); - trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret); + trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); return ret; } static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - bool need_alloc = true; + bool need_alloc = !f2fs_is_pinned_file(inode); int err = 0; + vm_fault_t ret; if (unlikely(IS_IMMUTABLE(inode))) return VM_FAULT_SIGBUS; - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) - return VM_FAULT_SIGBUS; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + err = -EIO; + goto out; + } if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; - goto err; + goto out; } if (!f2fs_is_checkpoint_ready(sbi)) { err = -ENOSPC; - goto err; + goto out; } err = f2fs_convert_inline_inode(inode); if (err) - goto err; + goto out; #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { - int ret = f2fs_is_compressed_cluster(inode, page->index); + int ret = f2fs_is_compressed_cluster(inode, folio->index); if (ret < 0) { err = ret; - goto err; + goto out; } else if (ret) { need_alloc = false; } @@ -100,36 +122,38 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); + f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true); + file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); - lock_page(page); - if (unlikely(page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode) || - !PageUptodate(page))) { - unlock_page(page); + + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) > i_size_read(inode) || + !folio_test_uptodate(folio))) { + folio_unlock(folio); err = -EFAULT; goto out_sem; } + set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_alloc) { /* block allocation */ - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_block_locked(&dn, page->index); - } - -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (!need_alloc) { - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_block_locked(&dn, folio->index); + } else { + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); f2fs_put_dnode(&dn); + if (f2fs_is_pinned_file(inode) && + !__is_valid_data_blkaddr(dn.data_blkaddr)) + err = -EIO; } -#endif + if (err) { - unlock_page(page); + folio_unlock(folio); goto out_sem; } - f2fs_wait_on_page_writeback(page, DATA, false, true); + f2fs_folio_wait_writeback(folio, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -137,29 +161,31 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) /* * check to see if the page is mapped already (no holes) */ - if (PageMappedToDisk(page)) + if (folio_test_mappedtodisk(folio)) goto out_sem; /* page is wholly or partially inside EOF */ - if (((loff_t)(page->index + 1) << PAGE_SHIFT) > + if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > i_size_read(inode)) { loff_t offset; offset = i_size_read(inode) & ~PAGE_MASK; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); } - set_page_dirty(page); + folio_mark_dirty(folio); f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); f2fs_update_time(sbi, REQ_TIME); - trace_f2fs_vm_page_mkwrite(page, DATA); out_sem: filemap_invalidate_unlock_shared(inode->i_mapping); sb_end_pagefault(inode->i_sb); -err: - return vmf_fs_error(err); +out: + ret = vmf_fs_error(err); + + trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); + return ret; } static const struct vm_operations_struct f2fs_file_vm_ops = { @@ -180,7 +206,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - *pino = parent_ino(dentry); + *pino = d_parent_ino(dentry); dput(dentry); return 1; } @@ -213,18 +239,22 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; + else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + XATTR_DIR_INO)) + cp_reason = CP_XATTR_DIR; return cp_reason; } static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) { - struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino); bool ret = false; /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino)) + if ((!IS_ERR(i) && folio_test_dirty(i)) || + f2fs_need_inode_block_update(sbi, ino)) ret = true; - f2fs_put_page(i, 0); + f2fs_folio_put(i, false); return ret; } @@ -253,7 +283,6 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, - .for_reclaim = 0, }; unsigned int seq_id = 0; @@ -368,8 +397,7 @@ sync_nodes: f2fs_remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - if ((!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) || - (atomic && !test_opt(sbi, NOBARRIER) && f2fs_sb_has_blkzoned(sbi))) + if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ret = f2fs_issue_flush(sbi, inode->i_ino); if (!ret) { f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); @@ -389,9 +417,20 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return f2fs_do_sync_file(file, start, end, datasync, false); } -static bool __found_offset(struct address_space *mapping, block_t blkaddr, - pgoff_t index, int whence) +static bool __found_offset(struct address_space *mapping, + struct dnode_of_data *dn, pgoff_t index, int whence) { + block_t blkaddr = f2fs_data_blkaddr(dn); + struct inode *inode = mapping->host; + bool compressed_cluster = false; + + if (f2fs_compressed_file(inode)) { + block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, + ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); + + compressed_cluster = first_blkaddr == COMPRESS_ADDR; + } + switch (whence) { case SEEK_DATA: if (__is_valid_data_blkaddr(blkaddr)) @@ -399,8 +438,12 @@ static bool __found_offset(struct address_space *mapping, block_t blkaddr, if (blkaddr == NEW_ADDR && xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) return true; + if (compressed_cluster) + return true; break; case SEEK_HOLE: + if (compressed_cluster) + return false; if (blkaddr == NULL_ADDR) return true; break; @@ -411,14 +454,14 @@ static bool __found_offset(struct address_space *mapping, block_t blkaddr, static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); struct dnode_of_data dn; pgoff_t pgofs, end_offset; loff_t data_ofs = offset; loff_t isize; int err = 0; - inode_lock(inode); + inode_lock_shared(inode); isize = i_size_read(inode); if (offset >= isize) @@ -452,7 +495,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) } } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; @@ -469,7 +512,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; } - if (__found_offset(file->f_mapping, blkaddr, + if (__found_offset(file->f_mapping, &dn, pgofs, whence)) { f2fs_put_dnode(&dn); goto found; @@ -483,20 +526,17 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) found: if (whence == SEEK_HOLE && data_ofs > isize) data_ofs = isize; - inode_unlock(inode); + inode_unlock_shared(inode); return vfs_setpos(file, data_ofs, maxbytes); fail: - inode_unlock(inode); + inode_unlock_shared(inode); return -ENXIO; } static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; - - if (f2fs_compressed_file(inode)) - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); switch (whence) { case SEEK_SET: @@ -514,8 +554,9 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) return -EINVAL; } -static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int f2fs_file_mmap_prepare(struct vm_area_desc *desc) { + struct file *file = desc->file; struct inode *inode = file_inode(file); if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) @@ -525,11 +566,54 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return -EOPNOTSUPP; file_accessed(file); - vma->vm_ops = &f2fs_file_vm_ops; + desc->vm_ops = &f2fs_file_vm_ops; + + f2fs_down_read(&F2FS_I(inode)->i_sem); set_inode_flag(inode, FI_MMAP_FILE); + f2fs_up_read(&F2FS_I(inode)->i_sem); + return 0; } +static int finish_preallocate_blocks(struct inode *inode) +{ + int ret = 0; + bool opened; + + f2fs_down_read(&F2FS_I(inode)->i_sem); + opened = is_inode_flag_set(inode, FI_OPENED_FILE); + f2fs_up_read(&F2FS_I(inode)->i_sem); + if (opened) + return 0; + + inode_lock(inode); + if (is_inode_flag_set(inode, FI_OPENED_FILE)) + goto out_unlock; + + if (!file_should_truncate(inode)) + goto out_update; + + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + filemap_invalidate_lock(inode->i_mapping); + + truncate_setsize(inode, i_size_read(inode)); + ret = f2fs_truncate(inode); + + filemap_invalidate_unlock(inode->i_mapping); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (ret) + goto out_unlock; + + file_dont_truncate(inode); +out_update: + f2fs_down_write(&F2FS_I(inode)->i_sem); + set_inode_flag(inode, FI_OPENED_FILE); + f2fs_up_write(&F2FS_I(inode)->i_sem); +out_unlock: + inode_unlock(inode); + return ret; +} + static int f2fs_file_open(struct inode *inode, struct file *filp) { int err = fscrypt_file_open(inode, filp); @@ -544,29 +628,33 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; + filp->f_mode |= FMODE_NOWAIT; filp->f_mode |= FMODE_CAN_ODIRECT; - return dquot_file_open(inode, filp); + err = dquot_file_open(inode, filp); + if (err) + return err; + + err = finish_preallocate_blocks(inode); + if (!err) + atomic_inc(&F2FS_I(inode)->open_count); + return err; } void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - struct f2fs_node *raw_node; int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; - int base = 0; bool compressed_cluster = false; int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); + block_t blkstart; + int blklen = 0; - if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) - base = get_extra_isize(dn->inode); - - raw_node = F2FS_NODE(dn->node_page); - addr = blkaddr_in_node(raw_node) + base + ofs; + addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs; + blkstart = le32_to_cpu(*addr); /* Assumption: truncation starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { @@ -582,28 +670,44 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) } if (blkaddr == NULL_ADDR) - continue; + goto next; - dn->data_blkaddr = NULL_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NULL_ADDR); if (__is_valid_data_blkaddr(blkaddr)) { - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE)) - continue; + if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) + goto next; + if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, + DATA_GENERIC_ENHANCE)) + goto next; if (compressed_cluster) valid_blocks++; } - if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) - clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); - - f2fs_invalidate_blocks(sbi, blkaddr); + if (blkstart + blklen == blkaddr) { + blklen++; + } else { + f2fs_invalidate_blocks(sbi, blkstart, blklen); + blkstart = blkaddr; + blklen = 1; + } if (!released || blkaddr != COMPRESS_ADDR) nr_free++; + + continue; + +next: + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + + blkstart = le32_to_cpu(*(addr + 1)); + blklen = 0; } + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + if (compressed_cluster) f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); @@ -613,7 +717,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) * once we invalidate valid blkaddr in range [ofs, ofs + count], * we will invalidate all blkaddr in the whole range. */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), + fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) + ofs; f2fs_update_read_extent_cache_range(dn, fofs, 0, len); f2fs_update_age_extent_cache_range(dn, fofs, len); @@ -632,31 +736,33 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, loff_t offset = from & (PAGE_SIZE - 1); pgoff_t index = from >> PAGE_SHIFT; struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; if (!offset && !cache_only) return 0; if (cache_only) { - page = find_lock_page(mapping, index); - if (page && PageUptodate(page)) + folio = filemap_lock_folio(mapping, index); + if (IS_ERR(folio)) + return 0; + if (folio_test_uptodate(folio)) goto truncate_out; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return 0; } - page = f2fs_get_lock_data_page(inode, index, true); - if (IS_ERR(page)) - return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); + folio = f2fs_get_lock_data_folio(inode, index, true); + if (IS_ERR(folio)) + return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio); truncate_out: - f2fs_wait_on_page_writeback(page, DATA, true, true); - zero_user(page, offset, PAGE_SIZE - offset); + f2fs_folio_wait_writeback(folio, DATA, true, true); + folio_zero_segment(folio, offset, folio_size(folio)); /* An encrypted inode should have a key and truncate the last page. */ f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); if (!cache_only) - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); return 0; } @@ -666,11 +772,16 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; - struct page *ipage; + struct folio *ifolio; bool truncate_page = false; trace_f2fs_truncate_blocks_enter(inode, from); + if (IS_DEVICE_ALIASING(inode) && from) { + err = -EINVAL; + goto out_err; + } + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= max_file_blocks(inode)) @@ -679,20 +790,33 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) if (lock) f2fs_lock_op(sbi); - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); + goto out; + } + + if (IS_DEVICE_ALIASING(inode)) { + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_info ei = et->largest; + + f2fs_invalidate_blocks(sbi, ei.blk, ei.len); + + dec_valid_block_count(sbi, inode, ei.len); + f2fs_update_time(sbi, REQ_TIME); + + f2fs_folio_put(ifolio, true); goto out; } if (f2fs_has_inline_data(inode)) { - f2fs_truncate_inline_inode(inode, ipage, from); - f2fs_put_page(ipage, 1); + f2fs_truncate_inline_inode(inode, ifolio, from); + f2fs_folio_put(ifolio, true); truncate_page = true; goto out; } - set_new_dnode(&dn, inode, ipage, NULL, 0); + set_new_dnode(&dn, inode, ifolio, NULL, 0); err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); if (err) { if (err == -ENOENT) @@ -700,12 +824,12 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) goto out; } - count = ADDRS_PER_PAGE(dn.node_page, inode); + count = ADDRS_PER_PAGE(dn.node_folio, inode); count -= dn.ofs_in_node; f2fs_bug_on(sbi, count < 0); - if (dn.ofs_in_node || IS_INODE(dn.node_page)) { + if (dn.ofs_in_node || IS_INODE(dn.node_folio)) { f2fs_truncate_data_blocks_range(&dn, count); free_from += count; } @@ -720,7 +844,7 @@ free_partial: /* lastly zero out the first data page */ if (!err) err = truncate_partial_data_page(inode, from, truncate_page); - +out_err: trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -786,15 +910,23 @@ int f2fs_truncate(struct inode *inode) /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); - if (err) + if (err) { + /* + * Always truncate page #0 to avoid page cache + * leak in evict() path. + */ + truncate_inode_pages_range(inode->i_mapping, + F2FS_BLK_TO_BYTES(0), + F2FS_BLK_END_BYTES(0)); return err; + } } err = f2fs_truncate_blocks(inode, i_size_read(inode), true); if (err) return err; - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); f2fs_mark_inode_dirty_sync(inode, false); return 0; } @@ -809,6 +941,12 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) return true; if (f2fs_compressed_file(inode)) return true; + /* + * only force direct read to use buffered IO, for direct write, + * it expects inline data conversion before committing IO. + */ + if (f2fs_has_inline_data(inode) && rw == READ) + return true; /* disallow direct IO if any of devices has unaligned blksize */ if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) @@ -817,9 +955,8 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. */ - if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE)) - return true; - if (f2fs_lfs_mode(sbi) && rw == WRITE && F2FS_IO_ALIGNED(sbi)) + if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && + !f2fs_is_pinned_file(inode)) return true; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) return true; @@ -901,17 +1038,15 @@ static void __setattr_copy(struct mnt_idmap *idmap, i_uid_update(idmap, attr, inode); i_gid_update(idmap, attr, inode); if (ia_valid & ATTR_ATIME) - inode->i_atime = attr->ia_atime; + inode_set_atime_to_ts(inode, attr->ia_atime); if (ia_valid & ATTR_MTIME) - inode->i_mtime = attr->ia_mtime; + inode_set_mtime_to_ts(inode, attr->ia_mtime); if (ia_valid & ATTR_CTIME) inode_set_ctime_to_ts(inode, attr->ia_ctime); if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; - vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); - if (!vfsgid_in_group_p(vfsgid) && - !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) + if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; set_acl_inode(inode, mode); } @@ -924,23 +1059,13 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if (unlikely(IS_IMMUTABLE(inode))) - return -EPERM; - - if (unlikely(IS_APPEND(inode) && - (attr->ia_valid & (ATTR_MODE | ATTR_UID | - ATTR_GID | ATTR_TIMES_SET)))) - return -EPERM; - - if ((attr->ia_valid & ATTR_SIZE) && - !f2fs_is_compress_backend_ready(inode)) - return -EOPNOTSUPP; - err = setattr_prepare(idmap, dentry, attr); if (err) return err; @@ -953,6 +1078,35 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (err) return err; + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + if (unlikely(IS_APPEND(inode) && + (attr->ia_valid & (ATTR_MODE | ATTR_UID | + ATTR_GID | ATTR_TIMES_SET)))) + return -EPERM; + + if ((attr->ia_valid & ATTR_SIZE)) { + if (!f2fs_is_compress_backend_ready(inode) || + IS_DEVICE_ALIASING(inode)) + return -EOPNOTSUPP; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(fi->i_cluster_size))) + return -EINVAL; + /* + * To prevent scattered pin block generation, we don't allow + * smaller/equal size unaligned truncation for pinned file. + * We only support overwrite IO to pinned file, so don't + * care about larger size truncation. + */ + if (f2fs_is_pinned_file(inode) && + attr->ia_size <= i_size_read(inode) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi)))) + return -EINVAL; + } + if (is_quota_modification(idmap, inode, attr)) { err = f2fs_dquot_initialize(inode); if (err) @@ -960,12 +1114,11 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, } if (i_uid_needs_update(idmap, attr, inode) || i_gid_needs_update(idmap, attr, inode)) { - f2fs_lock_op(F2FS_I_SB(inode)); + f2fs_lock_op(sbi); err = dquot_transfer(idmap, inode, attr); if (err) { - set_sbi_flag(F2FS_I_SB(inode), - SBI_QUOTA_NEED_REPAIR); - f2fs_unlock_op(F2FS_I_SB(inode)); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(sbi); return err; } /* @@ -975,7 +1128,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, i_uid_update(idmap, attr, inode); i_gid_update(idmap, attr, inode); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_unlock_op(F2FS_I_SB(inode)); + f2fs_unlock_op(sbi); } if (attr->ia_valid & ATTR_SIZE) { @@ -991,9 +1144,18 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; } - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + /* + * wait for inflight dio, blocks should be removed after + * IO completion. + */ + if (attr->ia_size < old_size) + inode_dio_wait(inode); + + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + if (attr->ia_size > old_size) + f2fs_zero_post_eof_page(inode, attr->ia_size, false); truncate_setsize(inode, attr->ia_size); if (attr->ia_size <= old_size) @@ -1003,14 +1165,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, * larger than i_size. */ filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; - spin_lock(&F2FS_I(inode)->i_size_lock); - inode->i_mtime = inode_set_ctime_current(inode); - F2FS_I(inode)->last_disk_size = i_size_read(inode); - spin_unlock(&F2FS_I(inode)->i_size_lock); + spin_lock(&fi->i_size_lock); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); + fi->last_disk_size = i_size_read(inode); + spin_unlock(&fi->i_size_lock); } __setattr_copy(idmap, inode, attr); @@ -1020,7 +1182,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) - inode->i_mode = F2FS_I(inode)->i_acl_mode; + inode->i_mode = fi->i_acl_mode; clear_inode_flag(inode, FI_ACL_MODE); } } @@ -1029,7 +1191,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, f2fs_mark_inode_dirty_sync(inode, true); /* inode change will produce dirty node pages flushed by checkpoint */ - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); return err; } @@ -1049,7 +1211,7 @@ static int fill_zero(struct inode *inode, pgoff_t index, loff_t start, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; + struct folio *folio; if (!len) return 0; @@ -1057,16 +1219,16 @@ static int fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - page = f2fs_get_new_data_page(inode, NULL, index, false); + folio = f2fs_get_new_data_folio(inode, NULL, index, false); f2fs_unlock_op(sbi); - if (IS_ERR(page)) - return PTR_ERR(page); + if (IS_ERR(folio)) + return PTR_ERR(folio); - f2fs_wait_on_page_writeback(page, DATA, true, true); - zero_user(page, start, len); - set_page_dirty(page); - f2fs_put_page(page, 1); + f2fs_folio_wait_writeback(folio, DATA, true, true); + folio_zero_range(folio, start, len); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); return 0; } @@ -1089,7 +1251,7 @@ int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) return err; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); @@ -1112,6 +1274,8 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; + f2fs_zero_post_eof_page(inode, offset + len, true); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1184,7 +1348,7 @@ next_dnode: goto next; } - done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - + done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = f2fs_data_blkaddr(&dn); @@ -1193,7 +1357,6 @@ next_dnode: !f2fs_is_valid_blkaddr(sbi, *blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; } @@ -1233,7 +1396,7 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); if (ret) { dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, *blkaddr); + f2fs_invalidate_blocks(sbi, *blkaddr, 1); } else { f2fs_update_data_blkaddr(&dn, *blkaddr); } @@ -1274,7 +1437,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } ilen = min((pgoff_t) - ADDRS_PER_PAGE(dn.node_page, dst_inode) - + ADDRS_PER_PAGE(dn.node_folio, dst_inode) - dn.ofs_in_node, len - i); do { dn.data_blkaddr = f2fs_data_blkaddr(&dn); @@ -1299,22 +1462,26 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_dnode(&dn); } else { - struct page *psrc, *pdst; + struct folio *fsrc, *fdst; - psrc = f2fs_get_lock_data_page(src_inode, + fsrc = f2fs_get_lock_data_folio(src_inode, src + i, true); - if (IS_ERR(psrc)) - return PTR_ERR(psrc); - pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i, + if (IS_ERR(fsrc)) + return PTR_ERR(fsrc); + fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i, true); - if (IS_ERR(pdst)) { - f2fs_put_page(psrc, 1); - return PTR_ERR(pdst); + if (IS_ERR(fdst)) { + f2fs_folio_put(fsrc, true); + return PTR_ERR(fdst); } - memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); - set_page_dirty(pdst); - f2fs_put_page(pdst, 1); - f2fs_put_page(psrc, 1); + + f2fs_folio_wait_writeback(fdst, DATA, true, true); + + memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); + folio_mark_dirty(fdst); + folio_set_f2fs_gcing(fdst); + f2fs_folio_put(fdst, true); + f2fs_folio_put(fsrc, true); ret = f2fs_truncate_hole(src_inode, src + i, src + i + 1); @@ -1392,6 +1559,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len, false); + f2fs_lock_op(sbi); f2fs_drop_extent_tree(inode); truncate_pagecache(inode, offset); @@ -1478,17 +1647,18 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, DATA_GENERIC_ENHANCE)) { ret = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); break; } - f2fs_invalidate_blocks(sbi, dn->data_blkaddr); - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); + f2fs_set_data_blkaddr(dn, NEW_ADDR); } - f2fs_update_read_extent_cache_range(dn, start, 0, index - start); - f2fs_update_age_extent_cache_range(dn, start, index - start); + if (index > start) { + f2fs_update_read_extent_cache_range(dn, start, 0, + index - start); + f2fs_update_age_extent_cache_range(dn, start, index - start); + } return ret; } @@ -1515,6 +1685,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; + f2fs_zero_post_eof_page(inode, offset + len, true); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1562,7 +1734,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, goto out; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); end = min(pg_end, end_offset - dn.ofs_in_node + index); ret = f2fs_do_zero_range(&dn, index, end); @@ -1646,6 +1818,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) /* avoid gc operation during block exchange */ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); + + f2fs_zero_post_eof_page(inode, offset + len, false); truncate_pagecache(inode, offset); while (!ret && idx > pg_start) { @@ -1663,10 +1837,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) } filemap_invalidate_unlock(mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (ret) + return ret; /* write out all moved pages, if possible */ filemap_invalidate_lock(mapping); - filemap_write_and_wait_range(mapping, offset, LLONG_MAX); + ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); filemap_invalidate_unlock(mapping); @@ -1701,6 +1877,8 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, if (err) return err; + f2fs_zero_post_eof_page(inode, offset + len, true); + f2fs_balance_fs(sbi, true); pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; @@ -1721,19 +1899,36 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, map.m_len = sec_blks; next_alloc: + f2fs_down_write(&sbi->pin_sem); + + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (has_not_enough_free_secs(sbi, 0, 0)) { + f2fs_up_write(&sbi->pin_sem); + err = -ENOSPC; + f2fs_warn_ratelimited(sbi, + "ino:%lu, start:%lu, end:%lu, need to trigger GC to " + "reclaim enough free segment when checkpoint is enabled", + inode->i_ino, pg_start, pg_end); + goto out_err; + } + } + if (has_not_enough_free_secs(sbi, 0, - GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { + sbi->reserved_pin_section)) { f2fs_down_write(&sbi->gc_lock); + stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); - if (err && err != -ENODATA) + if (err && err != -ENODATA) { + f2fs_up_write(&sbi->pin_sem); goto out_err; + } } - f2fs_down_write(&sbi->pin_sem); - - f2fs_lock_op(sbi); - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); - f2fs_unlock_op(sbi); + err = f2fs_allocate_pinning_section(sbi); + if (err) { + f2fs_up_write(&sbi->pin_sem); + goto out_err; + } map.m_seg_type = CURSEG_COLD_DATA_PINNED; err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); @@ -1788,7 +1983,7 @@ static long f2fs_fallocate(struct file *file, int mode, return -EIO; if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) return -ENOSPC; - if (!f2fs_is_compress_backend_ready(inode)) + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) return -EOPNOTSUPP; /* f2fs only support ->fallocate for regular file */ @@ -1799,15 +1994,6 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; - /* - * Pinned file should not support partial truncation since the block - * can be used by applications. - */ - if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && - (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | - FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) - return -EOPNOTSUPP; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) @@ -1815,10 +2001,27 @@ static long f2fs_fallocate(struct file *file, int mode, inode_lock(inode); + /* + * Pinned file should not support partial truncation since the block + * can be used by applications. + */ + if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { + ret = -EOPNOTSUPP; + goto out; + } + ret = file_modified(file); if (ret) goto out; + /* + * wait for inflight dio, blocks should be removed after IO + * completion. + */ + inode_dio_wait(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; @@ -1835,7 +2038,7 @@ static long f2fs_fallocate(struct file *file, int mode, } if (!ret) { - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); f2fs_mark_inode_dirty_sync(inode, false); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1849,6 +2052,9 @@ out: static int f2fs_release_file(struct inode *inode, struct file *filp) { + if (atomic_dec_and_test(&F2FS_I(inode)->open_count)) + f2fs_remove_donate_inode(inode); + /* * f2fs_release_file is called at every close calls. So we should * not drop any inmemory pages by close called by other process. @@ -1919,12 +2125,20 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) int err = f2fs_convert_inline_inode(inode); if (err) return err; - if (!f2fs_may_compress(inode)) - return -EINVAL; - if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) + + f2fs_down_write(&fi->i_sem); + if (!f2fs_may_compress(inode) || + atomic_read(&fi->writeback) || + (S_ISREG(inode->i_mode) && + F2FS_HAS_BLOCKS(inode))) { + f2fs_up_write(&fi->i_sem); return -EINVAL; - if (set_compress_context(inode)) - return -EOPNOTSUPP; + } + err = set_compress_context(inode); + f2fs_up_write(&fi->i_sem); + + if (err) + return err; } } @@ -2040,10 +2254,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) struct mnt_idmap *idmap = file_mnt_idmap(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode *pinode; loff_t isize; int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2059,7 +2275,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) inode_lock(inode); - if (!f2fs_disable_compressed_file(inode)) { + if (!f2fs_disable_compressed_file(inode) || + f2fs_is_pinned_file(inode)) { ret = -EINVAL; goto out; } @@ -2072,6 +2289,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) goto out; f2fs_down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[READ]); /* * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -2081,37 +2299,33 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; /* Check if the inode already has a COW inode */ if (fi->cow_inode == NULL) { /* Create a COW inode for atomic write */ - pinode = f2fs_iget(inode->i_sb, fi->i_pino); - if (IS_ERR(pinode)) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - ret = PTR_ERR(pinode); - goto out; - } + struct dentry *dentry = file_dentry(filp); + struct inode *dir = d_inode(dentry->d_parent); - ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode); - iput(pinode); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); + if (ret) + goto out_unlock; set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); + + /* Set the COW inode's atomic_inode to the atomic inode */ + F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ + f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); + + invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); + ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; } f2fs_write_inode(inode, NULL); @@ -2130,7 +2344,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) } f2fs_i_size_write(fi->cow_inode, isize); +out_unlock: + f2fs_up_write(&fi->i_gc_rwsem[READ]); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + if (ret) + goto out; f2fs_update_time(sbi, REQ_TIME); fi->atomic_write_task = current; @@ -2148,6 +2366,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2180,6 +2401,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2198,46 +2422,28 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) return ret; } -static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly, bool need_lock) { - struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; - __u32 in; int ret = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (get_user(in, (__u32 __user *)arg)) - return -EFAULT; - - if (in != F2FS_GOING_DOWN_FULLSYNC) { - ret = mnt_want_write_file(filp); - if (ret) { - if (ret == -EROFS) { - ret = 0; - f2fs_stop_checkpoint(sbi, false, - STOP_CP_REASON_SHUTDOWN); - trace_f2fs_shutdown(sbi, in, ret); - } - return ret; - } - } - - switch (in) { + switch (flag) { case F2FS_GOING_DOWN_FULLSYNC: - ret = freeze_bdev(sb->s_bdev); + ret = bdev_freeze(sb->s_bdev); if (ret) goto out; f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); - thaw_bdev(sb->s_bdev); + bdev_thaw(sb->s_bdev); break; case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ ret = f2fs_sync_fs(sb, 1); - if (ret) + if (ret) { + if (ret == -EIO) + ret = 0; goto out; + } f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: @@ -2253,24 +2459,128 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) set_sbi_flag(sbi, SBI_IS_DIRTY); /* do checkpoint only */ ret = f2fs_sync_fs(sb, 1); + if (ret == -EIO) + ret = 0; goto out; default: ret = -EINVAL; goto out; } + if (readonly) + goto out; + + /* + * grab sb->s_umount to avoid racing w/ remount() and other shutdown + * paths. + */ + if (need_lock) + down_write(&sbi->sb->s_umount); + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); + if (need_lock) + up_write(&sbi->sb->s_umount); + f2fs_update_time(sbi, REQ_TIME); out: - if (in != F2FS_GOING_DOWN_FULLSYNC) + + trace_f2fs_shutdown(sbi, flag, ret); + + return ret; +} + +static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + __u32 in; + int ret; + bool need_drop = false, readonly = false; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__u32 __user *)arg)) + return -EFAULT; + + if (in != F2FS_GOING_DOWN_FULLSYNC) { + ret = mnt_want_write_file(filp); + if (ret) { + if (ret != -EROFS) + return ret; + + /* fallback to nosync shutdown for readonly fs */ + in = F2FS_GOING_DOWN_NOSYNC; + readonly = true; + } else { + need_drop = true; + } + } + + ret = f2fs_do_shutdown(sbi, in, readonly, true); + + if (need_drop) mnt_drop_write_file(filp); - trace_f2fs_shutdown(sbi, in, ret); + return ret; +} + +static int f2fs_keep_noreuse_range(struct inode *inode, + loff_t offset, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); + u64 start, end; + int ret = 0; + + if (!S_ISREG(inode->i_mode)) + return 0; + + if (offset >= max_bytes || len > max_bytes || + (offset + len) > max_bytes) + return 0; + + start = offset >> PAGE_SHIFT; + end = DIV_ROUND_UP(offset + len, PAGE_SIZE); + + inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + inode_unlock(inode); + return 0; + } + + spin_lock(&sbi->inode_lock[DONATE_INODE]); + /* let's remove the range, if len = 0 */ + if (!len) { + if (!list_empty(&F2FS_I(inode)->gdonate_list)) { + list_del_init(&F2FS_I(inode)->gdonate_list); + sbi->donate_files--; + if (is_inode_flag_set(inode, FI_DONATE_FINISHED)) + ret = -EALREADY; + else + set_inode_flag(inode, FI_DONATE_FINISHED); + } else + ret = -ENOENT; + } else { + if (list_empty(&F2FS_I(inode)->gdonate_list)) { + list_add_tail(&F2FS_I(inode)->gdonate_list, + &sbi->inode_list[DONATE_INODE]); + sbi->donate_files++; + } else { + list_move_tail(&F2FS_I(inode)->gdonate_list, + &sbi->inode_list[DONATE_INODE]); + } + F2FS_I(inode)->donate_start = start; + F2FS_I(inode)->donate_end = end - 1; + clear_inode_flag(inode, FI_DONATE_FINISHED); + } + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + inode_unlock(inode); return ret; } @@ -2278,14 +2588,14 @@ out: static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct super_block *sb = inode->i_sb; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct fstrim_range range; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!f2fs_hw_support_discard(F2FS_SB(sb))) + if (!f2fs_hw_support_discard(sbi)) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, @@ -2296,9 +2606,9 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (ret) return ret; - range.minlen = max((unsigned int)range.minlen, - bdev_discard_granularity(sb->s_bdev)); - ret = f2fs_trim_fs(F2FS_SB(sb), &range); + range.minlen = max_t(unsigned int, range.minlen, + f2fs_hw_discard_granularity(sbi)); + ret = f2fs_trim_fs(sbi, &range); mnt_drop_write_file(filp); if (ret < 0) return ret; @@ -2306,7 +2616,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) return -EFAULT; - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); return 0; } @@ -2323,13 +2633,14 @@ static bool uuid_is_nonzero(__u8 u[16]) static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + int ret; if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) return -EOPNOTSUPP; + ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - - return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); + return ret; } static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) @@ -2465,6 +2776,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) gc_control.init_gc_type = sync ? FG_GC : BG_GC; gc_control.err_gc_skipped = sync; + stat_inc_gc_call_count(sbi, FOREGROUND); ret = f2fs_gc(sbi, &gc_control); out: mnt_drop_write_file(filp); @@ -2508,6 +2820,7 @@ do_more: } gc_control.victim_segno = GET_SEGNO(sbi, range->start); + stat_inc_gc_call_count(sbi, FOREGROUND); ret = f2fs_gc(sbi, &gc_control); if (ret) { if (ret == -EBUSY) @@ -2569,20 +2882,21 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, .m_may_create = false }; struct extent_info ei = {}; pgoff_t pg_start, pg_end, next_pgofs; - unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; block_t blk_end = 0; bool fragmented = false; int err; - pg_start = range->start >> PAGE_SHIFT; - pg_end = (range->start + range->len) >> PAGE_SHIFT; - f2fs_balance_fs(sbi, true); inode_lock(inode); + pg_start = range->start >> PAGE_SHIFT; + pg_end = min_t(pgoff_t, + (range->start + range->len) >> PAGE_SHIFT, + DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || + f2fs_is_atomic_file(inode)) { err = -EINVAL; goto unlock_out; } @@ -2595,8 +2909,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } /* writeback all dirty pages in the range */ - err = filemap_write_and_wait_range(inode->i_mapping, range->start, - range->start + range->len - 1); + err = filemap_write_and_wait_range(inode->i_mapping, + pg_start << PAGE_SHIFT, + (pg_end << PAGE_SHIFT) - 1); if (err) goto out; @@ -2605,7 +2920,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * block addresses are continuous. */ if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { - if (ei.fofs + ei.len >= pg_end) + if ((pgoff_t)ei.fofs + ei.len >= pg_end) goto out; } @@ -2678,18 +2993,21 @@ do_map: set_inode_flag(inode, FI_SKIP_WRITES); idx = map.m_lblk; - while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { - struct page *page; + while (idx < map.m_lblk + map.m_len && + cnt < BLKS_PER_SEG(sbi)) { + struct folio *folio; - page = f2fs_get_lock_data_page(inode, idx, true); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_lock_data_folio(inode, idx, true); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto clear_out; } - set_page_dirty(page); - set_page_private_gcing(page); - f2fs_put_page(page, 1); + f2fs_folio_wait_writeback(folio, DATA, true, true); + + folio_mark_dirty(folio); + folio_set_f2fs_gcing(folio); + f2fs_folio_put(folio, true); idx++; cnt++; @@ -2698,7 +3016,7 @@ do_map: map.m_lblk = idx; check: - if (map.m_lblk < pg_end && cnt < blk_per_seg) + if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) goto do_map; clear_inode_flag(inode, FI_SKIP_WRITES); @@ -2728,7 +3046,7 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode)) + if (!S_ISREG(inode->i_mode)) return -EINVAL; if (f2fs_readonly(sbi->sb)) @@ -2753,7 +3071,8 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) err = f2fs_defragment_range(sbi, filp, &range); mnt_drop_write_file(filp); - f2fs_update_time(sbi, REQ_TIME); + if (range.len) + f2fs_update_time(sbi, REQ_TIME); if (err < 0) return err; @@ -2804,6 +3123,17 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out; } + if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || + f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + + if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { + ret = -EINVAL; + goto out_unlock; + } + ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) goto out_unlock; @@ -2855,9 +3185,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } f2fs_lock_op(sbi); - ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, - pos_out >> F2FS_BLKSIZE_BITS, - len >> F2FS_BLKSIZE_BITS, false); + ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), + F2FS_BYTES_TO_BLK(pos_out), + F2FS_BYTES_TO_BLK(len), false); if (!ret) { if (dst_max_i_size) @@ -2874,10 +3204,10 @@ out_src: if (ret) goto out_unlock; - src->i_mtime = inode_set_ctime_current(src); + inode_set_mtime_to_ts(src, inode_set_ctime_current(src)); f2fs_mark_inode_dirty_sync(src, false); if (src != dst) { - dst->i_mtime = inode_set_ctime_current(dst); + inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst)); f2fs_mark_inode_dirty_sync(dst, false); } f2fs_update_time(sbi, REQ_TIME); @@ -2893,32 +3223,27 @@ out: static int __f2fs_ioc_move_range(struct file *filp, struct f2fs_move_range *range) { - struct fd dst; int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) return -EBADF; - dst = fdget(range->dst_fd); - if (!dst.file) + CLASS(fd, dst)(range->dst_fd); + if (fd_empty(dst)) return -EBADF; - if (!(dst.file->f_mode & FMODE_WRITE)) { - err = -EBADF; - goto err_out; - } + if (!(fd_file(dst)->f_mode & FMODE_WRITE)) + return -EBADF; err = mnt_want_write_file(filp); if (err) - goto err_out; + return err; - err = f2fs_move_file_range(filp, range->pos_in, dst.file, + err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), range->pos_out, range->len); mnt_drop_write_file(filp); -err_out: - fdput(dst); return err; } @@ -2962,8 +3287,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || __is_large_section(sbi)) { - f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1", - range.dev_num, sbi->s_ndevs, sbi->segs_per_sec); + f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", + range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); return -EINVAL; } @@ -2990,6 +3315,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) sm->last_victim[ALLOC_NEXT] = end_segno + 1; gc_control.victim_segno = start_segno; + stat_inc_gc_call_count(sbi, FOREGROUND); ret = f2fs_gc(sbi, &gc_control); if (ret == -EAGAIN) ret = 0; @@ -3093,7 +3419,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) } #endif -int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa) +int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -3117,7 +3443,7 @@ int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa) } int f2fs_fileattr_set(struct mnt_idmap *idmap, - struct dentry *dentry, struct fileattr *fa) + struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; @@ -3150,24 +3476,27 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - /* Use i_gc_failures for normal file as a risk signal. */ - if (inc) - f2fs_i_gc_failures_write(inode, - fi->i_gc_failures[GC_FAILURE_PIN] + 1); + if (IS_DEVICE_ALIASING(inode)) + return -EINVAL; - if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { + if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", - __func__, inode->i_ino, - fi->i_gc_failures[GC_FAILURE_PIN]); + __func__, inode->i_ino, fi->i_gc_failures); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } + + /* Use i_gc_failures for normal file as a risk signal. */ + if (inc) + f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); + return 0; } static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); __u32 pin; int ret = 0; @@ -3177,22 +3506,39 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + if (f2fs_readonly(sbi->sb)) return -EROFS; + if (!pin && IS_DEVICE_ALIASING(inode)) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + ret = -EINVAL; + goto out; + } + if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); goto done; + } else if (f2fs_is_pinned_file(inode)) { + goto done; + } + + if (F2FS_HAS_BLOCKS(inode)) { + ret = -EFBIG; + goto out; } - if (f2fs_should_update_outplace(inode, NULL)) { + /* Let's allow file pinning on zoned device. */ + if (!f2fs_sb_has_blkzoned(sbi) && + f2fs_should_update_outplace(inode, NULL)) { ret = -EINVAL; goto out; } @@ -3212,9 +3558,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) } set_inode_flag(inode, FI_PIN_FILE); - ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + ret = F2FS_I(inode)->i_gc_failures; done: - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); mnt_drop_write_file(filp); @@ -3227,10 +3573,33 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) __u32 pin = 0; if (is_inode_flag_set(inode, FI_PIN_FILE)) - pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + pin = F2FS_I(inode)->i_gc_failures; return put_user(pin, (u32 __user *)arg); } +static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) +{ + return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, + (u32 __user *)arg); +} + +static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 level; + + if (get_user(level, (__u32 __user *)arg)) + return -EFAULT; + + if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX) + return -EINVAL; + + inode_lock(inode); + F2FS_I(inode)->ioprio_hint = level; + inode_unlock(inode); + return 0; +} + int f2fs_precache_extents(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); @@ -3243,11 +3612,12 @@ int f2fs_precache_extents(struct inode *inode) return -EOPNOTSUPP; map.m_lblk = 0; + map.m_pblk = 0; map.m_next_pgofs = NULL; map.m_next_extent = &m_next_extent; map.m_seg_type = NO_CHECK_TYPE; map.m_may_create = false; - end = max_file_blocks(inode); + end = F2FS_BLK_ALIGN(i_size_read(inode)); while (map.m_lblk < end) { map.m_len = end - map.m_lblk; @@ -3255,7 +3625,7 @@ int f2fs_precache_extents(struct inode *inode) f2fs_down_write(&fi->i_gc_rwsem[WRITE]); err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - if (err) + if (err || !map.m_len) return err; map.m_lblk = m_next_extent; @@ -3416,16 +3786,14 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) int i; for (i = 0; i < count; i++) { - blkaddr = data_blkaddr(dn->inode, dn->node_page, + blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) continue; if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE))) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - } } while (count) { @@ -3447,8 +3815,7 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (blkaddr != NEW_ADDR) continue; - dn->data_blkaddr = NULL_ADDR; - f2fs_set_data_blkaddr(dn); + f2fs_set_data_blkaddr(dn, NULL_ADDR); } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); @@ -3466,6 +3833,7 @@ next: static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int released_blocks = 0; @@ -3475,9 +3843,6 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3496,7 +3861,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -3505,7 +3871,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) goto out; - if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + if (!atomic_read(&fi->i_compr_blocks)) { ret = -EPERM; goto out; } @@ -3514,7 +3880,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3523,9 +3889,12 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3535,14 +3904,16 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); ret = release_compress_blocks(&dn, count); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -3551,8 +3922,10 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); out: + if (released_blocks) + f2fs_update_time(sbi, REQ_TIME); inode_unlock(inode); mnt_drop_write_file(filp); @@ -3560,85 +3933,103 @@ out: if (ret >= 0) { ret = put_user(released_blocks, (u64 __user *)arg); } else if (released_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " "iblocks=%llu, released=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, released_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; } -static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, + unsigned int *reserved_blocks) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - unsigned int reserved_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; block_t blkaddr; int i; for (i = 0; i < count; i++) { - blkaddr = data_blkaddr(dn->inode, dn->node_page, + blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) continue; if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE))) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - } } while (count) { int compr_blocks = 0; - blkcnt_t reserved; + blkcnt_t reserved = 0; + blkcnt_t to_reserved; int ret; - for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { - blkaddr = f2fs_data_blkaddr(dn); + for (i = 0; i < cluster_size; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_folio, + dn->ofs_in_node + i); if (i == 0) { - if (blkaddr == COMPRESS_ADDR) - continue; - dn->ofs_in_node += cluster_size; - goto next; + if (blkaddr != COMPRESS_ADDR) { + dn->ofs_in_node += cluster_size; + goto next; + } + continue; } + /* + * compressed cluster was not released due to it + * fails in release_compress_blocks(), so NEW_ADDR + * is a possible case. + */ + if (blkaddr == NEW_ADDR) { + reserved++; + continue; + } if (__is_valid_data_blkaddr(blkaddr)) { compr_blocks++; continue; } + } - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + to_reserved = cluster_size - compr_blocks - reserved; + + /* for the case all blocks in cluster were reserved */ + if (reserved && to_reserved == 1) { + dn->ofs_in_node += cluster_size; + goto next; } - reserved = cluster_size - compr_blocks; - ret = inc_valid_block_count(sbi, dn->inode, &reserved); - if (ret) + ret = inc_valid_block_count(sbi, dn->inode, + &to_reserved, false); + if (unlikely(ret)) return ret; - if (reserved != cluster_size - compr_blocks) - return -ENOSPC; + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + if (f2fs_data_blkaddr(dn) == NULL_ADDR) + f2fs_set_data_blkaddr(dn, NEW_ADDR); + } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); - reserved_blocks += reserved; + *reserved_blocks += to_reserved; next: count -= cluster_size; } - return reserved_blocks; + return 0; } static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int reserved_blocks = 0; @@ -3647,9 +4038,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3657,19 +4045,20 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) - goto out; - f2fs_balance_fs(sbi, true); inode_lock(inode); - if (!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto unlock_inode; } - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (atomic_read(&fi->i_compr_blocks)) + goto unlock_inode; + + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3678,9 +4067,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3690,45 +4082,47 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); - ret = reserve_compress_blocks(&dn, count); + ret = reserve_compress_blocks(&dn, count, &reserved_blocks); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; page_idx += count; - reserved_blocks += ret; } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - if (ret >= 0) { + if (!ret) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); } unlock_inode: + if (reserved_blocks) + f2fs_update_time(sbi, REQ_TIME); inode_unlock(inode); -out: mnt_drop_write_file(filp); - if (ret >= 0) { + if (!ret) { ret = put_user(reserved_blocks, (u64 __user *)arg); } else if (reserved_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " + f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " "iblocks=%llu, reserved=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, reserved_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; @@ -3791,7 +4185,9 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) return -EOPNOTSUPP; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || @@ -3850,7 +4246,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) goto out; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(dn.node_folio, inode); count = min(end_offset - dn.ofs_in_node, pg_end - index); for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { struct block_device *cur_bdev; @@ -3863,8 +4259,6 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) DATA_GENERIC_ENHANCE)) { ret = -EFSCORRUPTED; f2fs_put_dnode(&dn); - f2fs_handle_error(sbi, - ERROR_INVALID_BLKADDR); goto out; } @@ -3913,12 +4307,13 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) if (len) ret = f2fs_secure_erase(prev_bdev, inode, prev_index, prev_block, len, range.flags); + f2fs_update_time(sbi, REQ_TIME); out: filemap_invalidate_unlock(mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); err: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -3953,6 +4348,7 @@ static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_comp_option option; int ret = 0; @@ -3967,15 +4363,22 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) sizeof(option))) return -EFAULT; - if (!f2fs_compressed_file(inode) || - option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || - option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || - option.algorithm >= COMPRESS_MAX) + if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || + option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || + option.algorithm >= COMPRESS_MAX) return -EINVAL; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); + f2fs_down_write(&F2FS_I(inode)->i_sem); + if (!f2fs_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } + if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { ret = -EBUSY; goto out; @@ -3986,17 +4389,27 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) goto out; } - F2FS_I(inode)->i_compress_algorithm = option.algorithm; - F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size; - F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size); + fi->i_compress_algorithm = option.algorithm; + fi->i_log_cluster_size = option.log_cluster_size; + fi->i_cluster_size = BIT(option.log_cluster_size); + /* Set default level */ + if (fi->i_compress_algorithm == COMPRESS_ZSTD) + fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + else + fi->i_compress_level = 0; + /* Adjust mount option level */ + if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && + F2FS_OPTION(sbi).compress_level) + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; f2fs_mark_inode_dirty_sync(inode, true); if (!f2fs_is_compress_backend_ready(inode)) f2fs_warn(sbi, "compression algorithm is successfully set, " "but current kernel doesn't support this algorithm."); out: + f2fs_up_write(&fi->i_sem); inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4005,31 +4418,36 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; pgoff_t redirty_idx = page_idx; - int i, page_len = 0, ret = 0; + int page_len = 0, ret = 0; page_cache_ra_unbounded(&ractl, len, 0); - for (i = 0; i < len; i++, page_idx++) { - page = read_cache_page(mapping, page_idx, NULL, NULL); - if (IS_ERR(page)) { - ret = PTR_ERR(page); + do { + folio = read_cache_folio(mapping, page_idx, NULL, NULL); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); break; } - page_len++; - } + page_len += folio_nr_pages(folio) - (page_idx - folio->index); + page_idx = folio_next_index(folio); + } while (page_len < len); - for (i = 0; i < page_len; i++, redirty_idx++) { - page = find_lock_page(mapping, redirty_idx); + do { + folio = filemap_lock_folio(mapping, redirty_idx); - /* It will never fail, when page has pinned above */ - f2fs_bug_on(F2FS_I_SB(inode), !page); + /* It will never fail, when folio has pinned above */ + f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio)); - set_page_dirty(page); - f2fs_put_page(page, 1); - f2fs_put_page(page, 0); - } + f2fs_folio_wait_writeback(folio, DATA, true, true); + + folio_mark_dirty(folio); + folio_set_f2fs_gcing(folio); + redirty_idx = folio_next_index(folio); + folio_unlock(folio); + folio_put_refs(folio, 2); + } while (redirty_idx < page_idx); return ret; } @@ -4039,10 +4457,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - pgoff_t page_idx = 0, last_idx; - unsigned int blk_per_seg = sbi->blocks_per_seg; - int cluster_size = fi->i_cluster_size; - int count, ret; + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4051,12 +4467,11 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4064,7 +4479,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4077,23 +4493,29 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; - count = last_idx - page_idx; - while (count) { - int len = min(cluster_size, count); + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; - ret = redirty_blocks(inode, page_idx, len); + if (!f2fs_is_compressed_cluster(inode, page_idx)) + continue; + + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; - if (get_dirty_pages(inode) >= blk_per_seg) { + if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { ret = filemap_fdatawrite(inode->i_mapping); if (ret < 0) break; } - count -= len; - page_idx += len; + cond_resched(); + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } } if (!ret) @@ -4103,9 +4525,10 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (ret) f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", __func__, ret); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4114,10 +4537,9 @@ static int f2fs_ioc_compress_file(struct file *filp) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t page_idx = 0, last_idx; - unsigned int blk_per_seg = sbi->blocks_per_seg; - int cluster_size = F2FS_I(inode)->i_cluster_size; - int count, ret; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4126,12 +4548,11 @@ static int f2fs_ioc_compress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4139,7 +4560,8 @@ static int f2fs_ioc_compress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4151,23 +4573,29 @@ static int f2fs_ioc_compress_file(struct file *filp) set_inode_flag(inode, FI_ENABLE_COMPRESS); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; - count = last_idx - page_idx; - while (count) { - int len = min(cluster_size, count); + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; - ret = redirty_blocks(inode, page_idx, len); + if (f2fs_is_sparse_cluster(inode, page_idx)) + continue; + + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; - if (get_dirty_pages(inode) >= blk_per_seg) { + if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { ret = filemap_fdatawrite(inode->i_mapping); if (ret < 0) break; } - count -= len; - page_idx += len; + cond_resched(); + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } } if (!ret) @@ -4179,9 +4607,10 @@ static int f2fs_ioc_compress_file(struct file *filp) if (ret) f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", __func__, ret); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4272,6 +4701,10 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_decompress_file(filp); case F2FS_IOC_COMPRESS_FILE: return f2fs_ioc_compress_file(filp); + case F2FS_IOC_GET_DEV_ALIAS_FILE: + return f2fs_ioc_get_dev_alias_file(filp, arg); + case F2FS_IOC_IO_PRIO: + return f2fs_ioc_io_prio(filp, arg); default: return -ENOTTY; } @@ -4362,6 +4795,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_down_read(&fi->i_gc_rwsem[READ]); } + /* dio is not compatible w/ atomic file */ + if (f2fs_is_atomic_file(inode)) { + f2fs_up_read(&fi->i_gc_rwsem[READ]); + ret = -EOPNOTSUPP; + goto out; + } + /* * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of * the higher-level function iomap_dio_rw() in order to ensure that the @@ -4413,6 +4853,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); const loff_t pos = iocb->ki_pos; ssize_t ret; + bool dio; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; @@ -4421,7 +4862,15 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); - if (f2fs_should_use_dio(inode, iocb, to)) { + dio = f2fs_should_use_dio(inode, iocb, to); + + /* In LFS mode, if there is inflight dio, wait for its completion */ + if (f2fs_lfs_mode(F2FS_I_SB(inode)) && + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) && + (!f2fs_is_pinned_file(inode) || !dio)) + inode_dio_wait(inode); + + if (dio) { ret = f2fs_dio_read_iter(iocb, to); } else { ret = filemap_read(iocb, to, 0); @@ -4429,8 +4878,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_BUFFERED_READ_IO, ret); } - if (trace_f2fs_dataread_end_enabled()) - trace_f2fs_dataread_end(inode, pos, ret); + trace_f2fs_dataread_end(inode, pos, ret); return ret; } @@ -4453,8 +4901,7 @@ static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_BUFFERED_READ_IO, ret); - if (trace_f2fs_dataread_end_enabled()) - trace_f2fs_dataread_end(inode, pos, ret); + trace_f2fs_dataread_end(inode, pos, ret); return ret; } @@ -4478,6 +4925,9 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) err = file_modified(file); if (err) return err; + + f2fs_zero_post_eof_page(inode, + iocb->ki_pos + iov_iter_count(from), true); return count; } @@ -4533,10 +4983,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, if (map.m_len > map.m_lblk) map.m_len -= map.m_lblk; else - map.m_len = 0; - map.m_may_create = true; + return 0; + + if (!IS_DEVICE_ALIASING(inode)) + map.m_may_create = true; if (dio) { - map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, + inode->i_write_hint); flag = F2FS_GET_BLOCK_PRE_DIO; } else { map.m_seg_type = NO_CHECK_TYPE; @@ -4579,12 +5032,26 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, dec_page_count(sbi, F2FS_DIO_WRITE); if (error) return error; + f2fs_update_time(sbi, REQ_TIME); f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); return 0; } +static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, + struct bio *bio, loff_t file_offset) +{ + struct inode *inode = iter->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); + enum temp_type temp = f2fs_get_segment_temp(sbi, type); + + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); + submit_bio(bio); +} + static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { - .end_io = f2fs_dio_write_end_io, + .end_io = f2fs_dio_write_end_io, + .submit_io = f2fs_dio_write_submit_io, }; static void f2fs_flush_buffered_write(struct address_space *mapping, @@ -4721,6 +5188,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) bool dio; bool may_need_sync = true; int preallocated; + const loff_t pos = iocb->ki_pos; + const ssize_t count = iov_iter_count(from); ssize_t ret; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { @@ -4742,6 +5211,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); } + if (f2fs_is_pinned_file(inode) && + !f2fs_overwrite_io(inode, pos, count)) { + ret = -EIO; + goto out_unlock; + } + ret = f2fs_write_checks(iocb, from); if (ret <= 0) goto out_unlock; @@ -4749,6 +5224,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Determine whether we will do a direct write or a buffered write. */ dio = f2fs_should_use_dio(inode, iocb, from); + /* dio is not compatible w/ atomic write */ + if (dio && f2fs_is_atomic_file(inode)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + /* Possibly preallocate the blocks for the write. */ target_size = iocb->ki_pos + iov_iter_count(from); preallocated = f2fs_preallocate_blocks(iocb, from, dio); @@ -4764,8 +5245,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) f2fs_dio_write_iter(iocb, from, &may_need_sync) : f2fs_buffered_write_iter(iocb, from); - if (trace_f2fs_datawrite_end_enabled()) - trace_f2fs_datawrite_end(inode, orig_pos, ret); + trace_f2fs_datawrite_end(inode, orig_pos, ret); } /* Don't leave any preallocated blocks around past i_size. */ @@ -4808,6 +5288,8 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, struct inode *inode = file_inode(filp); int err; + trace_f2fs_fadvise(inode, offset, len, advice); + if (advice == POSIX_FADV_SEQUENTIAL) { if (S_ISFIFO(inode->i_mode)) return -ESPIPE; @@ -4823,14 +5305,21 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, filp->f_mode &= ~FMODE_RANDOM; spin_unlock(&filp->f_lock); return 0; + } else if (advice == POSIX_FADV_WILLNEED && offset == 0) { + /* Load extent cache at the first readahead. */ + f2fs_precache_extents(inode); } err = generic_fadvise(filp, offset, len, advice); - if (!err && advice == POSIX_FADV_DONTNEED && - test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && - f2fs_compressed_file(inode)) - f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); + if (err) + return err; + if (advice == POSIX_FADV_DONTNEED && + (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && + f2fs_compressed_file(inode))) + f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); + else if (advice == POSIX_FADV_NOREUSE) + err = f2fs_keep_noreuse_range(inode, offset, len); return err; } @@ -4939,6 +5428,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_SET_COMPRESS_OPTION: case F2FS_IOC_DECOMPRESS_FILE: case F2FS_IOC_COMPRESS_FILE: + case F2FS_IOC_GET_DEV_ALIAS_FILE: + case F2FS_IOC_IO_PRIO: break; default: return -ENOIOCTLCMD; @@ -4954,7 +5445,7 @@ const struct file_operations f2fs_file_operations = { .iopoll = iocb_bio_iopoll, .open = f2fs_file_open, .release = f2fs_release_file, - .mmap = f2fs_file_mmap, + .mmap_prepare = f2fs_file_mmap_prepare, .flush = f2fs_file_flush, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, @@ -4965,4 +5456,5 @@ const struct file_operations f2fs_file_operations = { .splice_read = f2fs_file_splice_read, .splice_write = iter_file_splice_write, .fadvise = f2fs_file_fadvise, + .fop_flags = FOP_BUFFER_RASYNC, }; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a1ca394bc327..384fa7e2085b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -38,28 +38,33 @@ static int gc_thread_func(void *data) struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, .should_migrate_blocks = false, - .err_gc_skipped = false }; + .err_gc_skipped = false, + .one_time = false }; wait_ms = gc_th->min_sleep_time; set_freezable(); do { - bool sync_mode, foreground = false; + bool sync_mode, foreground = false, gc_boost = false; - wait_event_interruptible_timeout(*wq, - kthread_should_stop() || freezing(current) || + wait_event_freezable_timeout(*wq, + kthread_should_stop() || waitqueue_active(fggc_wq) || gc_th->gc_wake, msecs_to_jiffies(wait_ms)); - if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) + if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) { foreground = true; + gc_control.one_time = false; + } else if (f2fs_sb_has_blkzoned(sbi)) { + gc_control.one_time = true; + } /* give it a try one time */ if (gc_th->gc_wake) gc_th->gc_wake = false; - if (try_to_freeze() || f2fs_readonly(sbi->sb)) { + if (f2fs_readonly(sbi->sb)) { stat_other_skip_bggc_count(sbi); continue; } @@ -116,18 +121,33 @@ static int gc_thread_func(void *data) goto next; } - if (has_enough_invalid_blocks(sbi)) + if (f2fs_sb_has_blkzoned(sbi)) { + if (has_enough_free_blocks(sbi, + gc_th->no_zoned_gc_percent)) { + wait_ms = gc_th->no_gc_sleep_time; + f2fs_up_write(&sbi->gc_lock); + goto next; + } + if (wait_ms == gc_th->no_gc_sleep_time) + wait_ms = gc_th->max_sleep_time; + } + + if (need_to_boost_gc(sbi)) { decrease_sleep_time(gc_th, &wait_ms); - else + if (f2fs_sb_has_blkzoned(sbi)) + gc_boost = true; + } else { increase_sleep_time(gc_th, &wait_ms); + } do_gc: - if (!foreground) - stat_inc_bggc_count(sbi->stat_info); + stat_inc_gc_call_count(sbi, foreground ? + FOREGROUND : BACKGROUND); - sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || + (gc_boost && gc_th->boost_gc_greedy); /* foreground GC was been triggered via f2fs_balance_fs() */ - if (foreground) + if (foreground && !f2fs_sb_has_blkzoned(sbi)) sync_mode = false; gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC; @@ -179,9 +199,23 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) return -ENOMEM; gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; - gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; - gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; - gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; + gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE; + gc_th->boost_gc_greedy = GC_GREEDY; + + if (f2fs_sb_has_blkzoned(sbi)) { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED; + gc_th->no_zoned_gc_percent = LIMIT_NO_ZONED_GC; + gc_th->boost_zoned_gc_percent = LIMIT_BOOST_ZONED_GC; + } else { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->no_zoned_gc_percent = 0; + gc_th->boost_zoned_gc_percent = 0; + } gc_th->gc_wake = false; @@ -228,6 +262,8 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type) switch (sbi->gc_mode) { case GC_IDLE_CB: + case GC_URGENT_LOW: + case GC_URGENT_MID: gc_mode = GC_CB; break; case GC_IDLE_GREEDY: @@ -247,19 +283,14 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - if (p->alloc_mode == SSR) { - p->gc_mode = GC_GREEDY; - p->dirty_bitmap = dirty_i->dirty_segmap[type]; - p->max_search = dirty_i->nr_dirty[type]; - p->ofs_unit = 1; - } else if (p->alloc_mode == AT_SSR) { + if (p->alloc_mode == SSR || p->alloc_mode == AT_SSR) { p->gc_mode = GC_GREEDY; p->dirty_bitmap = dirty_i->dirty_segmap[type]; p->max_search = dirty_i->nr_dirty[type]; p->ofs_unit = 1; } else { p->gc_mode = select_gc_type(sbi, gc_type); - p->ofs_unit = sbi->segs_per_sec; + p->ofs_unit = SEGS_PER_SEC(sbi); if (__is_large_section(sbi)) { p->dirty_bitmap = dirty_i->dirty_secmap; p->max_search = count_bits(p->dirty_bitmap, @@ -280,11 +311,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - /* let's select beginning hot/small space first in no_heap mode*/ + /* let's select beginning hot/small space first. */ if (f2fs_need_rand_seg(sbi)) - p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); - else if (test_opt(sbi, NOHEAP) && - (type == CURSEG_HOT_DATA || IS_NODESEG(type))) + p->offset = get_random_u32_below(MAIN_SECS(sbi) * + SEGS_PER_SEC(sbi)); + else if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) p->offset = 0; else p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; @@ -295,13 +326,13 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, { /* SSR allocates in a segment unit */ if (p->alloc_mode == SSR) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); else if (p->alloc_mode == AT_SSR) return UINT_MAX; /* LFS */ if (p->gc_mode == GC_GREEDY) - return 2 * sbi->blocks_per_seg * p->ofs_unit; + return SEGS_TO_BLKS(sbi, 2 * p->ofs_unit); else if (p->gc_mode == GC_CB) return UINT_MAX; else if (p->gc_mode == GC_AT) @@ -332,23 +363,18 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; unsigned int vblocks; unsigned char age = 0; unsigned char u; - unsigned int i; - unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); - for (i = 0; i < usable_segs_per_sec; i++) - mtime += get_seg_entry(sbi, start + i)->mtime; + mtime = f2fs_get_section_mtime(sbi, segno); + f2fs_bug_on(sbi, mtime == INVALID_MTIME); vblocks = get_valid_blocks(sbi, segno, true); - - mtime = div_u64(mtime, usable_segs_per_sec); vblocks = div_u64(vblocks, usable_segs_per_sec); - u = (vblocks * 100) >> sbi->log_blocks_per_seg; + u = BLKS_TO_SEGS(sbi, vblocks * 100); /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) @@ -363,11 +389,17 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) } static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, - unsigned int segno, struct victim_sel_policy *p) + unsigned int segno, struct victim_sel_policy *p, + unsigned int valid_thresh_ratio) { if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + if (p->one_time_gc && (valid_thresh_ratio < 100) && + (get_valid_blocks(sbi, segno, true) >= + CAP_BLKS_PER_SEC(sbi) * valid_thresh_ratio / 100)) + return UINT_MAX; + /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) return get_valid_blocks(sbi, segno, true); @@ -485,10 +517,7 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, struct victim_sel_policy *p, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; - unsigned int i; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (p->gc_mode == GC_AT && @@ -496,9 +525,8 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, return; } - for (i = 0; i < sbi->segs_per_sec; i++) - mtime += get_seg_entry(sbi, start + i)->mtime; - mtime = div_u64(mtime, sbi->segs_per_sec); + mtime = f2fs_get_section_mtime(sbi, segno); + f2fs_bug_on(sbi, mtime == INVALID_MTIME); /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) @@ -599,7 +627,6 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi, unsigned long long age; unsigned long long max_mtime = sit_i->dirty_max_mtime; unsigned long long min_mtime = sit_i->dirty_min_mtime; - unsigned int seg_blocks = sbi->blocks_per_seg; unsigned int vblocks; unsigned int dirty_threshold = max(am->max_candidate_count, am->candidate_ratio * @@ -629,7 +656,7 @@ next_node: f2fs_bug_on(sbi, !vblocks); /* rare case */ - if (vblocks == seg_blocks) + if (vblocks == BLKS_PER_SEG(sbi)) goto skip_node; iter++; @@ -743,23 +770,29 @@ static int f2fs_gc_pinned_control(struct inode *inode, int gc_type, */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age) + unsigned long long age, bool one_time) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct sit_info *sm = SIT_I(sbi); - struct victim_sel_policy p; + struct victim_sel_policy p = {0}; unsigned int secno, last_victim; unsigned int last_segment; unsigned int nsearched; + unsigned int valid_thresh_ratio = 100; bool is_atgc; int ret = 0; mutex_lock(&dirty_i->seglist_lock); - last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; + last_segment = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi); p.alloc_mode = alloc_mode; p.age = age; p.age_threshold = sbi->am.age_threshold; + if (one_time) { + p.one_time_gc = one_time; + if (has_enough_free_secs(sbi, 0, NR_PERSISTENT_LOG)) + valid_thresh_ratio = sbi->gc_thread->valid_thresh_ratio; + } retry: select_policy(sbi, gc_type, type, &p); @@ -779,11 +812,14 @@ retry: goto out; } - if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) { ret = -EBUSY; - else - p.min_segno = *result; - goto out; + goto out; + } + if (gc_type == FG_GC) + clear_bit(GET_SEC_FROM_SEG(sbi, *result), dirty_i->victim_secmap); + p.min_segno = *result; + goto got_result; } ret = -ENODATA; @@ -882,7 +918,7 @@ retry: goto next; } - cost = get_gc_cost(sbi, segno, &p); + cost = get_gc_cost(sbi, segno, &p, valid_thresh_ratio); if (p.min_cost > cost) { p.min_segno = segno; @@ -896,7 +932,7 @@ next: else sm->last_victim[p.gc_mode] = segno + p.ofs_unit; sm->last_victim[p.gc_mode] %= - (MAIN_SECS(sbi) * sbi->segs_per_sec); + (MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); break; } } @@ -1015,7 +1051,7 @@ next_step: for (off = 0; off < usable_blks_in_seg; off++, entry++) { nid_t nid = le32_to_cpu(entry->nid); - struct page *node_page; + struct folio *node_folio; struct node_info ni; int err; @@ -1038,27 +1074,27 @@ next_step: } /* phase == 2 */ - node_page = f2fs_get_node_page(sbi, nid); - if (IS_ERR(node_page)) + node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR); + if (IS_ERR(node_folio)) continue; - /* block may become invalid during f2fs_get_node_page */ + /* block may become invalid during f2fs_get_node_folio */ if (check_valid_map(sbi, segno, off) == 0) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); continue; } if (f2fs_get_node_info(sbi, nid, &ni, false)) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); continue; } if (ni.blk_addr != start_addr + off) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); continue; } - err = f2fs_move_node_page(node_page, gc_type); + err = f2fs_move_node_folio(node_folio, gc_type); if (!err && gc_type == FG_GC) submitted++; stat_inc_node_blk_count(sbi, 1, gc_type); @@ -1104,7 +1140,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode) static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct node_info *dni, block_t blkaddr, unsigned int *nofs) { - struct page *node_page; + struct folio *node_folio; nid_t nid; unsigned int ofs_in_node, max_addrs, base; block_t source_blkaddr; @@ -1112,12 +1148,12 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, nid = le32_to_cpu(sum->nid); ofs_in_node = le16_to_cpu(sum->ofs_in_node); - node_page = f2fs_get_node_page(sbi, nid); - if (IS_ERR(node_page)) + node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR); + if (IS_ERR(node_folio)) return false; if (f2fs_get_node_info(sbi, nid, dni, false)) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); return false; } @@ -1128,12 +1164,12 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } if (f2fs_check_nid_range(sbi, dni->ino)) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); return false; } - if (IS_INODE(node_page)) { - base = offset_in_addr(F2FS_INODE(node_page)); + if (IS_INODE(node_folio)) { + base = offset_in_addr(F2FS_INODE(node_folio)); max_addrs = DEF_ADDRS_PER_INODE; } else { base = 0; @@ -1143,13 +1179,13 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (base + ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent blkaddr offset: base:%u, ofs_in_node:%u, max:%u, ino:%u, nid:%u", base, ofs_in_node, max_addrs, dni->ino, dni->nid); - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); return false; } - *nofs = ofs_of_node(node_page); - source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); - f2fs_put_page(node_page, 1); + *nofs = ofs_of_node(node_folio); + source_blkaddr = data_blkaddr(NULL, node_folio, ofs_in_node); + f2fs_folio_put(node_folio, true); if (source_blkaddr != blkaddr) { #ifdef CONFIG_F2FS_CHECK_FS @@ -1172,9 +1208,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static int ra_data_block(struct inode *inode, pgoff_t index) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct address_space *mapping = inode->i_mapping; + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct dnode_of_data dn; - struct page *page; + struct folio *folio, *efolio; struct f2fs_io_info fio = { .sbi = sbi, .ino = inode->i_ino, @@ -1184,21 +1221,19 @@ static int ra_data_block(struct inode *inode, pgoff_t index) .op_flags = 0, .encrypted_page = NULL, .in_list = 0, - .retry = 0, }; int err; - page = f2fs_grab_cache_page(mapping, index, true); - if (!page) - return -ENOMEM; + folio = f2fs_grab_cache_folio(mapping, index, true); + if (IS_ERR(folio)) + return PTR_ERR(folio); if (f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); - goto put_page; + goto put_folio; } goto got_it; } @@ -1206,54 +1241,54 @@ static int ra_data_block(struct inode *inode, pgoff_t index) set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) - goto put_page; + goto put_folio; f2fs_put_dnode(&dn); if (!__is_valid_data_blkaddr(dn.data_blkaddr)) { err = -ENOENT; - goto put_page; + goto put_folio; } if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE))) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); - goto put_page; + goto put_folio; } got_it: - /* read page */ - fio.page = page; + /* read folio */ + fio.folio = folio; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; /* * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); - fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), - dn.data_blkaddr, + efolio = f2fs_filemap_get_folio(META_MAPPING(sbi), dn.data_blkaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); - if (!fio.encrypted_page) { - err = -ENOMEM; - goto put_page; + if (IS_ERR(efolio)) { + err = PTR_ERR(efolio); + goto put_folio; } + fio.encrypted_page = &efolio->page; + err = f2fs_submit_page_bio(&fio); if (err) goto put_encrypted_page; - f2fs_put_page(fio.encrypted_page, 0); - f2fs_put_page(page, 1); + f2fs_put_page(fio.encrypted_page, false); + f2fs_folio_put(folio, true); f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE); f2fs_update_iostat(sbi, NULL, FS_GDATA_READ_IO, F2FS_BLKSIZE); return 0; put_encrypted_page: - f2fs_put_page(fio.encrypted_page, 1); -put_page: - f2fs_put_page(page, 1); + f2fs_put_page(fio.encrypted_page, true); +put_folio: + f2fs_folio_put(folio, true); return err; } @@ -1264,6 +1299,8 @@ put_page: static int move_data_block(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .ino = inode->i_ino, @@ -1273,12 +1310,11 @@ static int move_data_block(struct inode *inode, block_t bidx, .op_flags = 0, .encrypted_page = NULL, .in_list = 0, - .retry = 0, }; struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; - struct page *page, *mpage; + struct folio *folio, *mfolio, *efolio; block_t newaddr; int err = 0; bool lfs_mode = f2fs_lfs_mode(fio.sbi); @@ -1287,9 +1323,9 @@ static int move_data_block(struct inode *inode, block_t bidx, CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; /* do not read out */ - page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); - if (!page) - return -ENOMEM; + folio = f2fs_grab_cache_folio(mapping, bidx, false); + if (IS_ERR(folio)) + return PTR_ERR(folio); if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { err = -ENOENT; @@ -1306,7 +1342,7 @@ static int move_data_block(struct inode *inode, block_t bidx, goto out; if (unlikely(dn.data_blkaddr == NULL_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); err = -ENOENT; goto put_out; } @@ -1315,7 +1351,7 @@ static int move_data_block(struct inode *inode, block_t bidx, * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -1324,26 +1360,26 @@ static int move_data_block(struct inode *inode, block_t bidx, goto put_out; /* read page */ - fio.page = page; + fio.folio = folio; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; if (lfs_mode) f2fs_down_write(&fio.sbi->io_order_lock); - mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), + mfolio = f2fs_grab_cache_folio(META_MAPPING(fio.sbi), fio.old_blkaddr, false); - if (!mpage) { - err = -ENOMEM; + if (IS_ERR(mfolio)) { + err = PTR_ERR(mfolio); goto up_out; } - fio.encrypted_page = mpage; + fio.encrypted_page = folio_file_page(mfolio, fio.old_blkaddr); - /* read source block in mpage */ - if (!PageUptodate(mpage)) { + /* read source block in mfolio */ + if (!folio_test_uptodate(mfolio)) { err = f2fs_submit_page_bio(&fio); if (err) { - f2fs_put_page(mpage, 1); + f2fs_folio_put(mfolio, true); goto up_out; } @@ -1352,11 +1388,11 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_update_iostat(fio.sbi, NULL, FS_GDATA_READ_IO, F2FS_BLKSIZE); - lock_page(mpage); - if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || - !PageUptodate(mpage))) { + folio_lock(mfolio); + if (unlikely(!is_meta_folio(mfolio) || + !folio_test_uptodate(mfolio))) { err = -EIO; - f2fs_put_page(mpage, 1); + f2fs_folio_put(mfolio, true); goto up_out; } } @@ -1364,25 +1400,31 @@ static int move_data_block(struct inode *inode, block_t bidx, set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); /* allocate block address */ - f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, + err = f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, type, NULL); + if (err) { + f2fs_folio_put(mfolio, true); + /* filesystem should shutdown, no need to recovery block */ + goto up_out; + } - fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), - newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); - if (!fio.encrypted_page) { - err = -ENOMEM; - f2fs_put_page(mpage, 1); + efolio = f2fs_filemap_get_folio(META_MAPPING(fio.sbi), newaddr, + FGP_LOCK | FGP_CREAT, GFP_NOFS); + if (IS_ERR(efolio)) { + err = PTR_ERR(efolio); + f2fs_folio_put(mfolio, true); goto recover_block; } + fio.encrypted_page = &efolio->page; + /* write target block */ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); memcpy(page_address(fio.encrypted_page), - page_address(mpage), PAGE_SIZE); - f2fs_put_page(mpage, 1); - invalidate_mapping_pages(META_MAPPING(fio.sbi), - fio.old_blkaddr, fio.old_blkaddr); - f2fs_invalidate_compress_page(fio.sbi, fio.old_blkaddr); + folio_address(mfolio), PAGE_SIZE); + f2fs_folio_put(mfolio, true); + + f2fs_invalidate_internal_cache(fio.sbi, fio.old_blkaddr, 1); set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) @@ -1394,21 +1436,13 @@ static int move_data_block(struct inode *inode, block_t bidx, fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; f2fs_submit_page_write(&fio); - if (fio.retry) { - err = -EAGAIN; - if (PageWriteback(fio.encrypted_page)) - end_page_writeback(fio.encrypted_page); - goto put_page_out; - } f2fs_update_iostat(fio.sbi, NULL, FS_GC_DATA_IO, F2FS_BLKSIZE); f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); -put_page_out: - f2fs_put_page(fio.encrypted_page, 1); + + f2fs_put_page(fio.encrypted_page, true); recover_block: if (err) f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, @@ -1419,19 +1453,19 @@ up_out: put_out: f2fs_put_dnode(&dn); out: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } static int move_data_page(struct inode *inode, block_t bidx, int gc_type, - unsigned int segno, int off) + unsigned int segno, int off) { - struct page *page; + struct folio *folio; int err = 0; - page = f2fs_get_lock_data_page(inode, bidx, true); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_lock_data_folio(inode, bidx, true); + if (IS_ERR(folio)) + return PTR_ERR(folio); if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { err = -ENOENT; @@ -1443,12 +1477,12 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, goto out; if (gc_type == BG_GC) { - if (PageWriteback(page)) { + if (folio_test_writeback(folio)) { err = -EAGAIN; goto out; } - set_page_dirty(page); - set_page_private_gcing(page); + folio_mark_dirty(folio); + folio_set_f2fs_gcing(folio); } else { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -1458,37 +1492,37 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC, .old_blkaddr = NULL_ADDR, - .page = page, + .folio = folio, .encrypted_page = NULL, .need_lock = LOCK_REQ, .io_type = FS_GC_DATA_IO, }; - bool is_dirty = PageDirty(page); + bool is_dirty = folio_test_dirty(folio); retry: - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); - set_page_dirty(page); - if (clear_page_dirty_for_io(page)) { + folio_mark_dirty(folio); + if (folio_clear_dirty_for_io(folio)) { inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); } - set_page_private_gcing(page); + folio_set_f2fs_gcing(folio); err = f2fs_do_write_data_page(&fio); if (err) { - clear_page_private_gcing(page); + folio_clear_f2fs_gcing(folio); if (err == -ENOMEM) { memalloc_retry_wait(GFP_NOFS); goto retry; } if (is_dirty) - set_page_dirty(page); + folio_mark_dirty(folio); } } out: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } @@ -1517,7 +1551,6 @@ next_step: entry = sum; for (off = 0; off < usable_blks_in_seg; off++, entry++) { - struct page *data_page; struct inode *inode; struct node_info dni; /* dnode info for the data */ unsigned int ofs_in_node, nofs; @@ -1560,13 +1593,29 @@ next_step: ofs_in_node = le16_to_cpu(entry->ofs_in_node); if (phase == 3) { + struct folio *data_folio; int err; inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode) || is_bad_inode(inode) || - special_file(inode->i_mode)) + if (IS_ERR(inode)) continue; + if (is_bad_inode(inode) || + special_file(inode->i_mode)) { + iput(inode); + continue; + } + + if (f2fs_has_inline_data(inode)) { + iput(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err_ratelimited(sbi, + "inode %lx has both inline_data flag and " + "data block, nid=%u, ofs_in_node=%u", + inode->i_ino, dni.nid, ofs_in_node); + continue; + } + err = f2fs_gc_pinned_control(inode, gc_type, segno); if (err == -EAGAIN) { iput(inode); @@ -1583,7 +1632,7 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_post_read_required(inode)) { + if (f2fs_meta_inode_gc_required(inode)) { int err = ra_data_block(inode, start_bidx); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -1595,15 +1644,15 @@ next_step: continue; } - data_page = f2fs_get_read_data_page(inode, start_bidx, + data_folio = f2fs_get_read_data_folio(inode, start_bidx, REQ_RAHEAD, true, NULL); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (IS_ERR(data_page)) { + if (IS_ERR(data_folio)) { iput(inode); continue; } - f2fs_put_page(data_page, 0); + f2fs_folio_put(data_folio, false); add_gc_inode(gc_list, inode); continue; } @@ -1634,7 +1683,7 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_post_read_required(inode)) + if (f2fs_meta_inode_gc_required(inode)) err = move_data_block(inode, start_bidx, gc_type, segno, off); else @@ -1642,7 +1691,7 @@ next_step: segno, off); if (!err && (gc_type == FG_GC || - f2fs_post_read_required(inode))) + f2fs_meta_inode_gc_required(inode))) submitted++; if (locked) { @@ -1661,13 +1710,14 @@ next_step: } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type) + int gc_type, bool one_time) { struct sit_info *sit_i = SIT_I(sbi); int ret; down_write(&sit_i->sentry_lock); - ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS, 0); + ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, + LFS, 0, one_time); up_write(&sit_i->sentry_lock); return ret; } @@ -1675,120 +1725,166 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int start_segno, struct gc_inode_list *gc_list, int gc_type, - bool force_migrate) + bool force_migrate, bool one_time) { - struct page *sum_page; - struct f2fs_summary_block *sum; struct blk_plug plug; unsigned int segno = start_segno; - unsigned int end_segno = start_segno + sbi->segs_per_sec; + unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi); + unsigned int sec_end_segno; int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; - int submitted = 0; + unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE; + int submitted = 0, sum_blk_cnt; - if (__is_large_section(sbi)) - end_segno = rounddown(end_segno, sbi->segs_per_sec); + if (__is_large_section(sbi)) { + sec_end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); - /* - * zone-capacity can be less than zone-size in zoned devices, - * resulting in less than expected usable segments in the zone, - * calculate the end segno in the zone which can be garbage collected - */ - if (f2fs_sb_has_blkzoned(sbi)) - end_segno -= sbi->segs_per_sec - - f2fs_usable_segs_in_sec(sbi, segno); + /* + * zone-capacity can be less than zone-size in zoned devices, + * resulting in less than expected usable segments in the zone, + * calculate the end segno in the zone which can be garbage + * collected + */ + if (f2fs_sb_has_blkzoned(sbi)) + sec_end_segno -= SEGS_PER_SEC(sbi) - + f2fs_usable_segs_in_sec(sbi); + + if (gc_type == BG_GC || one_time) { + unsigned int window_granularity = + sbi->migration_window_granularity; + + if (f2fs_sb_has_blkzoned(sbi) && + !has_enough_free_blocks(sbi, + sbi->gc_thread->boost_zoned_gc_percent)) + window_granularity *= + sbi->gc_thread->boost_gc_multiple; + + end_segno = start_segno + window_granularity; + } + + if (end_segno > sec_end_segno) + end_segno = sec_end_segno; + } sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type); + segno = rounddown(segno, SUMS_PER_BLOCK); + sum_blk_cnt = DIV_ROUND_UP(end_segno - segno, SUMS_PER_BLOCK); /* readahead multi ssa blocks those have contiguous address */ if (__is_large_section(sbi)) f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), - end_segno - segno, META_SSA, true); + sum_blk_cnt, META_SSA, true); /* reference all summary page */ while (segno < end_segno) { - sum_page = f2fs_get_sum_page(sbi, segno++); - if (IS_ERR(sum_page)) { - int err = PTR_ERR(sum_page); + struct folio *sum_folio = f2fs_get_sum_folio(sbi, segno); + + segno += SUMS_PER_BLOCK; + if (IS_ERR(sum_folio)) { + int err = PTR_ERR(sum_folio); - end_segno = segno - 1; - for (segno = start_segno; segno < end_segno; segno++) { - sum_page = find_get_page(META_MAPPING(sbi), + end_segno = segno - SUMS_PER_BLOCK; + segno = rounddown(start_segno, SUMS_PER_BLOCK); + while (segno < end_segno) { + sum_folio = filemap_get_folio(META_MAPPING(sbi), GET_SUM_BLOCK(sbi, segno)); - f2fs_put_page(sum_page, 0); - f2fs_put_page(sum_page, 0); + folio_put_refs(sum_folio, 2); + segno += SUMS_PER_BLOCK; } return err; } - unlock_page(sum_page); + folio_unlock(sum_folio); } blk_start_plug(&plug); - for (segno = start_segno; segno < end_segno; segno++) { + segno = start_segno; + while (segno < end_segno) { + unsigned int cur_segno; /* find segment summary of victim */ - sum_page = find_get_page(META_MAPPING(sbi), + struct folio *sum_folio = filemap_get_folio(META_MAPPING(sbi), GET_SUM_BLOCK(sbi, segno)); - f2fs_put_page(sum_page, 0); - - if (get_valid_blocks(sbi, segno, false) == 0) - goto freed; - if (gc_type == BG_GC && __is_large_section(sbi) && - migrated >= sbi->migration_granularity) - goto skip; - if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) - goto skip; - - sum = page_address(sum_page); - if (type != GET_SUM_TYPE((&sum->footer))) { - f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", - segno, type, GET_SUM_TYPE((&sum->footer))); - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_stop_checkpoint(sbi, false, - STOP_CP_REASON_CORRUPTED_SUMMARY); - goto skip; + unsigned int block_end_segno = rounddown(segno, SUMS_PER_BLOCK) + + SUMS_PER_BLOCK; + + if (block_end_segno > end_segno) + block_end_segno = end_segno; + + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) { + f2fs_err(sbi, "%s: segment %u is used by log", + __func__, segno); + f2fs_bug_on(sbi, 1); + goto next_block; } - /* - * this is to avoid deadlock: - * - lock_page(sum_page) - f2fs_replace_block - * - check_valid_map() - down_write(sentry_lock) - * - down_read(sentry_lock) - change_curseg() - * - lock_page(sum_page) - */ - if (type == SUM_TYPE_NODE) - submitted += gc_node_segment(sbi, sum->entries, segno, - gc_type); - else - submitted += gc_data_segment(sbi, sum->entries, gc_list, - segno, gc_type, - force_migrate); + if (!folio_test_uptodate(sum_folio) || + unlikely(f2fs_cp_error(sbi))) + goto next_block; - stat_inc_seg_count(sbi, type, gc_type); - sbi->gc_reclaimed_segs[sbi->gc_mode]++; - migrated++; + for (cur_segno = segno; cur_segno < block_end_segno; + cur_segno++) { + struct f2fs_summary_block *sum; -freed: - if (gc_type == FG_GC && - get_valid_blocks(sbi, segno, false) == 0) - seg_freed++; + if (get_valid_blocks(sbi, cur_segno, false) == 0) + goto freed; + if (gc_type == BG_GC && __is_large_section(sbi) && + migrated >= sbi->migration_granularity) + continue; - if (__is_large_section(sbi)) - sbi->next_victim_seg[gc_type] = - (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO; -skip: - f2fs_put_page(sum_page, 0); + sum = SUM_BLK_PAGE_ADDR(sum_folio, cur_segno); + if (type != GET_SUM_TYPE((&sum->footer))) { + f2fs_err(sbi, "Inconsistent segment (%u) type " + "[%d, %d] in SSA and SIT", + cur_segno, type, + GET_SUM_TYPE((&sum->footer))); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_CORRUPTED_SUMMARY); + continue; + } + + /* + * this is to avoid deadlock: + * - lock_page(sum_page) - f2fs_replace_block + * - check_valid_map() - down_write(sentry_lock) + * - down_read(sentry_lock) - change_curseg() + * - lock_page(sum_page) + */ + if (type == SUM_TYPE_NODE) + submitted += gc_node_segment(sbi, sum->entries, + cur_segno, gc_type); + else + submitted += gc_data_segment(sbi, sum->entries, + gc_list, cur_segno, + gc_type, force_migrate); + + stat_inc_gc_seg_count(sbi, data_type, gc_type); + sbi->gc_reclaimed_segs[sbi->gc_mode]++; + migrated++; + +freed: + if (gc_type == FG_GC && + get_valid_blocks(sbi, cur_segno, false) == 0) + seg_freed++; + + if (__is_large_section(sbi)) + sbi->next_victim_seg[gc_type] = + (cur_segno + 1 < sec_end_segno) ? + cur_segno + 1 : NULL_SEGNO; + } +next_block: + folio_put_refs(sum_folio, 2); + segno = block_end_segno; } if (submitted) - f2fs_submit_merged_write(sbi, - (type == SUM_TYPE_NODE) ? NODE : DATA); + f2fs_submit_merged_write(sbi, data_type); blk_finish_plug(&plug); - stat_inc_call_count(sbi->stat_info); + if (migrated) + stat_inc_gc_sec_count(sbi, data_type, gc_type); return seg_freed; } @@ -1832,6 +1928,7 @@ gc_more: /* Let's run FG_GC, if we don't have enough space. */ if (has_not_enough_free_secs(sbi, 0, 0)) { gc_type = FG_GC; + gc_control->one_time = false; /* * For example, if there are many prefree_segments below given @@ -1839,6 +1936,7 @@ gc_more: * secure free segments which doesn't need fggc any more. */ if (prefree_segments(sbi)) { + stat_inc_cp_call_count(sbi, TOTAL_CALL); ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; @@ -1853,7 +1951,7 @@ gc_more: goto stop; } retry: - ret = __get_victim(sbi, &segno, gc_type); + ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time); if (ret) { /* allow to search victim from sections has pinned data */ if (ret == -ENODATA && gc_type == FG_GC && @@ -1865,14 +1963,21 @@ retry: } seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, - gc_control->should_migrate_blocks); + gc_control->should_migrate_blocks, + gc_control->one_time); + if (seg_freed < 0) + goto stop; + total_freed += seg_freed; - if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) { + if (seg_freed == f2fs_usable_segs_in_sec(sbi)) { sec_freed++; total_sec_freed++; } + if (gc_control->one_time) + goto stop; + if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -1887,6 +1992,7 @@ retry: round++; if (skipped_round > MAX_SKIP_GC_COUNT && skipped_round * 2 >= round) { + stat_inc_cp_call_count(sbi, TOTAL_CALL); ret = f2fs_write_checkpoint(sbi, &cpc); goto stop; } @@ -1902,6 +2008,7 @@ retry: */ if (free_sections(sbi) <= upper_secs + NR_GC_CHECKPOINT_SECS && prefree_segments(sbi)) { + stat_inc_cp_call_count(sbi, TOTAL_CALL); ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; @@ -1979,10 +2086,52 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) init_atgc_management(sbi); } +int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, + bool dry_run, unsigned int dry_run_sections) +{ + unsigned int segno; + unsigned int gc_secs = dry_run_sections; + + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + + for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), + }; + + /* + * avoid migrating empty section, as it can be allocated by + * log in parallel. + */ + if (!get_valid_blocks(sbi, segno, true)) + continue; + + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) + continue; + + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); + put_gc_inode(&gc_list); + + if (!dry_run && get_valid_blocks(sbi, segno, true)) + return -EAGAIN; + if (dry_run && dry_run_sections && + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0) + break; + + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + } + + return 0; +} + static int free_segment_range(struct f2fs_sb_info *sbi, - unsigned int secs, bool gc_only) + unsigned int secs, bool dry_run) { - unsigned int segno, next_inuse, start, end; + unsigned int next_inuse, start, end; struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; int gc_mode, gc_type; int err = 0; @@ -1990,7 +2139,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, /* Force block allocation for GC */ MAIN_SECS(sbi) -= secs; - start = MAIN_SECS(sbi) * sbi->segs_per_sec; + start = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi); end = MAIN_SEGS(sbi) - 1; mutex_lock(&DIRTY_I(sbi)->seglist_lock); @@ -2004,31 +2153,18 @@ static int free_segment_range(struct f2fs_sb_info *sbi, mutex_unlock(&DIRTY_I(sbi)->seglist_lock); /* Move out cursegs from the target range */ - for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) - f2fs_allocate_segment_for_resize(sbi, type, start, end); - - /* do GC to move out valid blocks in the range */ - for (segno = start; segno <= end; segno += sbi->segs_per_sec) { - struct gc_inode_list gc_list = { - .ilist = LIST_HEAD_INIT(gc_list.ilist), - .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), - }; - - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); - put_gc_inode(&gc_list); - - if (!gc_only && get_valid_blocks(sbi, segno, true)) { - err = -EAGAIN; - goto out; - } - if (fatal_signal_pending(current)) { - err = -ERESTARTSYS; + for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) { + err = f2fs_allocate_segment_for_resize(sbi, type, start, end); + if (err) goto out; - } } - if (gc_only) + + /* do GC to move out valid blocks in the range */ + err = f2fs_gc_range(sbi, start, end, dry_run, 0); + if (err || dry_run) goto out; + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); if (err) goto out; @@ -2051,7 +2187,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) int segment_count; int segment_count_main; long long block_count; - int segs = secs * sbi->segs_per_sec; + int segs = secs * SEGS_PER_SEC(sbi); f2fs_down_write(&sbi->sb_lock); @@ -2064,7 +2200,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->segment_count = cpu_to_le32(segment_count + segs); raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); raw_sb->block_count = cpu_to_le64(block_count + - (long long)segs * sbi->blocks_per_seg); + (long long)SEGS_TO_BLKS(sbi, segs)); if (f2fs_is_multi_device(sbi)) { int last_dev = sbi->s_ndevs - 1; int dev_segs = @@ -2079,14 +2215,16 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) { - int segs = secs * sbi->segs_per_sec; - long long blks = (long long)segs * sbi->blocks_per_seg; + int segs = secs * SEGS_PER_SEC(sbi); + long long blks = SEGS_TO_BLKS(sbi, segs); long long user_block_count = le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; MAIN_SECS(sbi) += secs; + if (sbi->allocate_section_hint > MAIN_SECS(sbi)) + sbi->allocate_section_hint = MAIN_SECS(sbi); FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); @@ -2094,6 +2232,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) if (f2fs_is_multi_device(sbi)) { int last_dev = sbi->s_ndevs - 1; + sbi->allocate_section_hint = FDEV(0).total_segments / + SEGS_PER_SEC(sbi); + FDEV(last_dev).total_segments = (int)FDEV(last_dev).total_segments + segs; FDEV(last_dev).end_blk = @@ -2122,7 +2263,7 @@ int f2fs_resize_fs(struct file *filp, __u64 block_count) int last_dev = sbi->s_ndevs - 1; __u64 last_segs = FDEV(last_dev).total_segments; - if (block_count + last_segs * sbi->blocks_per_seg <= + if (block_count + SEGS_TO_BLKS(sbi, last_segs) <= old_block_count) return -EINVAL; } @@ -2181,12 +2322,12 @@ out_drop_write: if (err) return err; - err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE); + err = freeze_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL); if (err) return err; if (f2fs_readonly(sbi->sb)) { - err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE); + err = thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL); if (err) return err; return -EROFS; @@ -2223,6 +2364,7 @@ out_drop_write: clear_sbi_flag(sbi, SBI_IS_RESIZEFS); set_sbi_flag(sbi, SBI_IS_DIRTY); + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); if (err) { update_fs_metadata(sbi, secs); @@ -2242,6 +2384,6 @@ recover_out: out_err: f2fs_up_write(&sbi->cp_global_sem); f2fs_up_write(&sbi->gc_lock); - thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE); + thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL); return err; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 28a00942802c..6c4d4567571e 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -15,17 +15,30 @@ #define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 #define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +/* GC sleep parameters for zoned deivces */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED 10 +#define DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED 20 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED 60000 + /* choose candidates from sections which has age of more than 7 days */ #define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7) #define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */ #define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */ #define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */ +#define DEF_GC_THREAD_VALID_THRESH_RATIO 80 /* do not GC over 80% valid block ratio for one time GC */ #define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ +#define LIMIT_NO_ZONED_GC 60 /* percentage over total user space of no gc for zoned devices */ +#define LIMIT_BOOST_ZONED_GC 25 /* percentage over total user space of boosted gc for zoned devices */ +#define DEF_MIGRATION_WINDOW_GRANULARITY_ZONED 3 +#define BOOST_GC_MULTIPLE 5 +#define ZONED_PIN_SEC_REQUIRED_COUNT 1 + #define DEF_GC_FAILED_PINNED_FILES 2048 +#define MAX_GC_FAILED_PINNED_FILES USHRT_MAX /* Search max. number of dirty segments to select a victim segment */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ @@ -50,6 +63,13 @@ struct f2fs_gc_kthread { * caller of f2fs_balance_fs() * will wait on this wait queue. */ + + /* for gc control for zoned devices */ + unsigned int no_zoned_gc_percent; + unsigned int boost_zoned_gc_percent; + unsigned int valid_thresh_ratio; + unsigned int boost_gc_multiple; + unsigned int boost_gc_greedy; }; struct gc_inode_list { @@ -96,7 +116,7 @@ static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi) if (f2fs_sb_has_blkzoned(sbi)) return free_segs_blk_count_zoned(sbi); - return free_segments(sbi) << sbi->log_blocks_per_seg; + return SEGS_TO_BLKS(sbi, free_segments(sbi)); } static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) @@ -104,7 +124,7 @@ static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) block_t free_blks, ovp_blks; free_blks = free_segs_blk_count(sbi); - ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg; + ovp_blks = SEGS_TO_BLKS(sbi, overprovision_segments(sbi)); if (free_blks < ovp_blks) return 0; @@ -151,6 +171,12 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, *wait -= min_time; } +static inline bool has_enough_free_blocks(struct f2fs_sb_info *sbi, + unsigned int limit_perc) +{ + return free_sections(sbi) > ((sbi->total_sections * limit_perc) / 100); +} + static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) { block_t user_block_count = sbi->user_block_count; @@ -166,3 +192,11 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) free_user_blocks(sbi) < limit_free_user_blocks(invalid_user_blocks)); } + +static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi) +{ + if (f2fs_sb_has_blkzoned(sbi)) + return !has_enough_free_blocks(sbi, + sbi->gc_thread->boost_zoned_gc_percent); + return has_enough_invalid_blocks(sbi); +} diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 88fc9208ffa7..e5c6a08b7e4f 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -16,7 +16,7 @@ static bool support_inline_data(struct inode *inode) { - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return false; if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; @@ -33,11 +33,29 @@ bool f2fs_may_inline_data(struct inode *inode) return !f2fs_post_read_required(inode); } -bool f2fs_sanity_check_inline_data(struct inode *inode) +static bool inode_has_blocks(struct inode *inode, struct folio *ifolio) +{ + struct f2fs_inode *ri = F2FS_INODE(ifolio); + int i; + + if (F2FS_HAS_BLOCKS(inode)) + return true; + + for (i = 0; i < DEF_NIDS_PER_INODE; i++) { + if (ri->i_nid[i]) + return true; + } + return false; +} + +bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio) { if (!f2fs_has_inline_data(inode)) return false; + if (inode_has_blocks(inode, ifolio)) + return false; + if (!support_inline_data(inode)) return true; @@ -61,70 +79,70 @@ bool f2fs_may_inline_dentry(struct inode *inode) return true; } -void f2fs_do_read_inline_data(struct page *page, struct page *ipage) +void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) return; - f2fs_bug_on(F2FS_P_SB(page), page->index); + f2fs_bug_on(F2FS_I_SB(inode), folio->index); - zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE); + folio_zero_segment(folio, MAX_INLINE_DATA(inode), folio_size(folio)); /* Copy the whole inline data block */ - memcpy_to_page(page, 0, inline_data_addr(inode, ipage), + memcpy_to_folio(folio, 0, inline_data_addr(inode, ifolio), MAX_INLINE_DATA(inode)); - if (!PageUptodate(page)) - SetPageUptodate(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); } -void f2fs_truncate_inline_inode(struct inode *inode, - struct page *ipage, u64 from) +void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio, + u64 from) { void *addr; if (from >= MAX_INLINE_DATA(inode)) return; - addr = inline_data_addr(inode, ipage); + addr = inline_data_addr(inode, ifolio); - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); - set_page_dirty(ipage); + folio_mark_dirty(ifolio); if (from == 0) clear_inode_flag(inode, FI_DATA_EXIST); } -int f2fs_read_inline_data(struct inode *inode, struct page *page) +int f2fs_read_inline_data(struct inode *inode, struct folio *folio) { - struct page *ipage; + struct folio *ifolio; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) { - unlock_page(page); - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) { + folio_unlock(folio); + return PTR_ERR(ifolio); } if (!f2fs_has_inline_data(inode)) { - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return -EAGAIN; } - if (page->index) - zero_user_segment(page, 0, PAGE_SIZE); + if (folio->index) + folio_zero_segment(folio, 0, folio_size(folio)); else - f2fs_do_read_inline_data(page, ipage); + f2fs_do_read_inline_data(folio, ifolio); - if (!PageUptodate(page)) - SetPageUptodate(page); - f2fs_put_page(ipage, 1); - unlock_page(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + f2fs_folio_put(ifolio, true); + folio_unlock(folio); return 0; } -int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) +int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(dn->inode), @@ -132,7 +150,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, - .page = page, + .folio = folio, .encrypted_page = NULL, .io_type = FS_DATA_IO, }; @@ -164,20 +182,20 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) return -EFSCORRUPTED; } - f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); + f2fs_bug_on(F2FS_F_SB(folio), folio_test_writeback(folio)); - f2fs_do_read_inline_data(page, dn->inode_page); - set_page_dirty(page); + f2fs_do_read_inline_data(folio, dn->inode_folio); + folio_mark_dirty(folio); /* clear dirty state */ - dirty = clear_page_dirty_for_io(page); + dirty = folio_clear_dirty_for_io(folio); /* write data page to try to make data consistent */ - set_page_writeback(page); + folio_start_writeback(folio); fio.old_blkaddr = dn->data_blkaddr; set_inode_flag(dn->inode, FI_HOT_DATA); f2fs_outplace_write_data(dn, &fio); - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); if (dirty) { inode_dec_dirty_pages(dn->inode); f2fs_remove_dirty_inode(dn->inode); @@ -187,8 +205,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - f2fs_truncate_inline_inode(dn->inode, dn->inode_page, 0); - clear_page_private_inline(dn->inode_page); + f2fs_truncate_inline_inode(dn->inode, dn->inode_folio, 0); + folio_clear_f2fs_inline(dn->inode_folio); clear_out: stat_dec_inline_inode(dn->inode); clear_inode_flag(dn->inode, FI_INLINE_DATA); @@ -200,39 +218,41 @@ int f2fs_convert_inline_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - struct page *ipage, *page; + struct folio *ifolio, *folio; int err = 0; - if (!f2fs_has_inline_data(inode) || - f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) + if (f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) + return -EROFS; + + if (!f2fs_has_inline_data(inode)) return 0; err = f2fs_dquot_initialize(inode); if (err) return err; - page = f2fs_grab_cache_page(inode->i_mapping, 0, false); - if (!page) - return -ENOMEM; + folio = f2fs_grab_cache_folio(inode->i_mapping, 0, false); + if (IS_ERR(folio)) + return PTR_ERR(folio); f2fs_lock_op(sbi); - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); goto out; } - set_new_dnode(&dn, inode, ipage, ipage, 0); + set_new_dnode(&dn, inode, ifolio, ifolio, 0); if (f2fs_has_inline_data(inode)) - err = f2fs_convert_inline_page(&dn, page); + err = f2fs_convert_inline_folio(&dn, folio); f2fs_put_dnode(&dn); out: f2fs_unlock_op(sbi); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); if (!err) f2fs_balance_fs(sbi, dn.node_changed); @@ -240,44 +260,42 @@ out: return err; } -int f2fs_write_inline_data(struct inode *inode, struct page *page) +int f2fs_write_inline_data(struct inode *inode, struct folio *folio) { - struct dnode_of_data dn; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct folio *ifolio; - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); - if (err) - return err; + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); if (!f2fs_has_inline_data(inode)) { - f2fs_put_dnode(&dn); + f2fs_folio_put(ifolio, true); return -EAGAIN; } - f2fs_bug_on(F2FS_I_SB(inode), page->index); + f2fs_bug_on(F2FS_I_SB(inode), folio->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); - memcpy_from_page(inline_data_addr(inode, dn.inode_page), - page, 0, MAX_INLINE_DATA(inode)); - set_page_dirty(dn.inode_page); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); + memcpy_from_folio(inline_data_addr(inode, ifolio), + folio, 0, MAX_INLINE_DATA(inode)); + folio_mark_dirty(ifolio); - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(folio); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_page_private_inline(dn.inode_page); - f2fs_put_dnode(&dn); + folio_clear_f2fs_inline(ifolio); + f2fs_folio_put(ifolio, true); return 0; } -int f2fs_recover_inline_data(struct inode *inode, struct page *npage) +int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode *ri = NULL; void *src_addr, *dst_addr; - struct page *ipage; /* * The inline_data recovery policy is as follows. @@ -287,38 +305,39 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) * x o -> remove data blocks, and then recover inline_data * x x -> recover data blocks */ - if (IS_INODE(npage)) - ri = F2FS_INODE(npage); + if (IS_INODE(nfolio)) + ri = F2FS_INODE(nfolio); if (f2fs_has_inline_data(inode) && ri && (ri->i_inline & F2FS_INLINE_DATA)) { + struct folio *ifolio; process_inline: - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); - src_addr = inline_data_addr(inode, npage); - dst_addr = inline_data_addr(inode, ipage); + src_addr = inline_data_addr(inode, nfolio); + dst_addr = inline_data_addr(inode, ifolio); memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); set_inode_flag(inode, FI_INLINE_DATA); set_inode_flag(inode, FI_DATA_EXIST); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + folio_mark_dirty(ifolio); + f2fs_folio_put(ifolio, true); return 1; } if (f2fs_has_inline_data(inode)) { - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); - f2fs_truncate_inline_inode(inode, ipage, 0); + struct folio *ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); + f2fs_truncate_inline_inode(inode, ifolio, 0); stat_dec_inline_inode(inode); clear_inode_flag(inode, FI_INLINE_DATA); - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { int ret; @@ -333,49 +352,50 @@ process_inline: struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page) + struct folio **res_folio, + bool use_hash) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; - struct page *ipage; + struct folio *ifolio; void *inline_dentry; - ipage = f2fs_get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) { - *res_page = ipage; + ifolio = f2fs_get_inode_folio(sbi, dir->i_ino); + if (IS_ERR(ifolio)) { + *res_folio = ifolio; return NULL; } - inline_dentry = inline_data_addr(dir, ipage); + inline_dentry = inline_data_addr(dir, ifolio); make_dentry_ptr_inline(dir, &d, inline_dentry); - de = f2fs_find_target_dentry(&d, fname, NULL); - unlock_page(ipage); + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash); + folio_unlock(ifolio); if (IS_ERR(de)) { - *res_page = ERR_CAST(de); + *res_folio = ERR_CAST(de); de = NULL; } if (de) - *res_page = ipage; + *res_folio = ifolio; else - f2fs_put_page(ipage, 0); + f2fs_folio_put(ifolio, false); return de; } int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, - struct page *ipage) + struct folio *ifolio) { struct f2fs_dentry_ptr d; void *inline_dentry; - inline_dentry = inline_data_addr(inode, ipage); + inline_dentry = inline_data_addr(inode, ifolio); make_dentry_ptr_inline(inode, &d, inline_dentry); f2fs_do_make_empty_dir(inode, parent, &d); - set_page_dirty(ipage); + folio_mark_dirty(ifolio); /* update i_size to MAX_INLINE_DATA */ if (i_size_read(inode) < MAX_INLINE_DATA(inode)) @@ -387,39 +407,39 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, * NOTE: ipage is grabbed by caller, but if any error occurs, we should * release ipage in this function. */ -static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, +static int f2fs_move_inline_dirents(struct inode *dir, struct folio *ifolio, void *inline_dentry) { - struct page *page; + struct folio *folio; struct dnode_of_data dn; struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr src, dst; int err; - page = f2fs_grab_cache_page(dir->i_mapping, 0, true); - if (!page) { - f2fs_put_page(ipage, 1); - return -ENOMEM; + folio = f2fs_grab_cache_folio(dir->i_mapping, 0, true); + if (IS_ERR(folio)) { + f2fs_folio_put(ifolio, true); + return PTR_ERR(folio); } - set_new_dnode(&dn, dir, ipage, NULL, 0); + set_new_dnode(&dn, dir, ifolio, NULL, 0); err = f2fs_reserve_block(&dn, 0); if (err) goto out; if (unlikely(dn.data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(&dn); - set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); - f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + set_sbi_flag(F2FS_F_SB(folio), SBI_NEED_FSCK); + f2fs_warn(F2FS_F_SB(folio), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", __func__, dir->i_ino, dn.data_blkaddr); - f2fs_handle_error(F2FS_P_SB(page), ERROR_INVALID_BLKADDR); + f2fs_handle_error(F2FS_F_SB(folio), ERROR_INVALID_BLKADDR); err = -EFSCORRUPTED; goto out; } - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); - dentry_blk = page_address(page); + dentry_blk = folio_address(folio); /* * Start by zeroing the full block, to ensure that all unused space is @@ -435,12 +455,12 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max); memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN); - if (!PageUptodate(page)) - SetPageUptodate(page); - set_page_dirty(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + folio_mark_dirty(folio); /* clear inline dir and flag after data writeback */ - f2fs_truncate_inline_inode(dir, ipage, 0); + f2fs_truncate_inline_inode(dir, ifolio, 0); stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); @@ -457,7 +477,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, if (i_size_read(dir) < PAGE_SIZE) f2fs_i_size_write(dir, PAGE_SIZE); out: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } @@ -513,7 +533,7 @@ punch_dentry_pages: return err; } -static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, +static int f2fs_move_rehashed_dirents(struct inode *dir, struct folio *ifolio, void *inline_dentry) { void *backup_dentry; @@ -522,20 +542,20 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, backup_dentry = f2fs_kmalloc(F2FS_I_SB(dir), MAX_INLINE_DATA(dir), GFP_F2FS_ZERO); if (!backup_dentry) { - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return -ENOMEM; } memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir)); - f2fs_truncate_inline_inode(dir, ipage, 0); + f2fs_truncate_inline_inode(dir, ifolio, 0); - unlock_page(ipage); + folio_unlock(ifolio); err = f2fs_add_inline_entries(dir, backup_dentry); if (err) goto recover; - lock_page(ipage); + folio_lock(ifolio); stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); @@ -551,31 +571,31 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, kfree(backup_dentry); return 0; recover: - lock_page(ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + folio_lock(ifolio); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + folio_mark_dirty(ifolio); + f2fs_folio_put(ifolio, true); kfree(backup_dentry); return err; } -static int do_convert_inline_dir(struct inode *dir, struct page *ipage, +static int do_convert_inline_dir(struct inode *dir, struct folio *ifolio, void *inline_dentry) { if (!F2FS_I(dir)->i_dir_level) - return f2fs_move_inline_dirents(dir, ipage, inline_dentry); + return f2fs_move_inline_dirents(dir, ifolio, inline_dentry); else - return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry); + return f2fs_move_rehashed_dirents(dir, ifolio, inline_dentry); } int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct page *ipage; + struct folio *ifolio; struct f2fs_filename fname; void *inline_dentry = NULL; int err = 0; @@ -589,22 +609,22 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) if (err) goto out; - ipage = f2fs_get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, dir->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); goto out_fname; } - if (f2fs_has_enough_room(dir, ipage, &fname)) { - f2fs_put_page(ipage, 1); + if (f2fs_has_enough_room(dir, ifolio, &fname)) { + f2fs_folio_put(ifolio, true); goto out_fname; } - inline_dentry = inline_data_addr(dir, ipage); + inline_dentry = inline_data_addr(dir, ifolio); - err = do_convert_inline_dir(dir, ipage, inline_dentry); + err = do_convert_inline_dir(dir, ifolio, inline_dentry); if (!err) - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); out_fname: f2fs_free_filename(&fname); out: @@ -616,24 +636,24 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct page *ipage; + struct folio *ifolio; unsigned int bit_pos; void *inline_dentry = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(fname->disk_name.len); - struct page *page = NULL; + struct folio *folio = NULL; int err = 0; - ipage = f2fs_get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, dir->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); - inline_dentry = inline_data_addr(dir, ipage); + inline_dentry = inline_data_addr(dir, ifolio); make_dentry_ptr_inline(dir, &d, inline_dentry); bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); if (bit_pos >= d.max) { - err = do_convert_inline_dir(dir, ipage, inline_dentry); + err = do_convert_inline_dir(dir, ifolio, inline_dentry); if (err) return err; err = -EAGAIN; @@ -641,20 +661,21 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, } if (inode) { - f2fs_down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, fname, ipage); - if (IS_ERR(page)) { - err = PTR_ERR(page); + f2fs_down_write_nested(&F2FS_I(inode)->i_sem, + SINGLE_DEPTH_NESTING); + folio = f2fs_init_inode_metadata(inode, dir, fname, ifolio); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto fail; } } - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash, bit_pos); - set_page_dirty(ipage); + folio_mark_dirty(ifolio); /* we don't need to mark_inode_dirty now */ if (inode) { @@ -662,9 +683,9 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, /* synchronize inode page's data from inode cache */ if (is_inode_flag_set(inode, FI_NEW_INODE)) - f2fs_update_inode(inode, page); + f2fs_update_inode(inode, folio); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } f2fs_update_parent_metadata(dir, inode, 0); @@ -672,12 +693,12 @@ fail: if (inode) f2fs_up_write(&F2FS_I(inode)->i_sem); out: - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return err; } -void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *dir, struct inode *inode) +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, + struct folio *folio, struct inode *dir, struct inode *inode) { struct f2fs_dentry_ptr d; void *inline_dentry; @@ -685,20 +706,20 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, unsigned int bit_pos; int i; - lock_page(page); - f2fs_wait_on_page_writeback(page, NODE, true, true); + folio_lock(folio); + f2fs_folio_wait_writeback(folio, NODE, true, true); - inline_dentry = inline_data_addr(dir, page); + inline_dentry = inline_data_addr(dir, folio); make_dentry_ptr_inline(dir, &d, inline_dentry); bit_pos = dentry - d.dentry; for (i = 0; i < slots; i++) __clear_bit_le(bit_pos + i, d.bitmap); - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); if (inode) @@ -708,21 +729,21 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, bool f2fs_empty_inline_dir(struct inode *dir) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct page *ipage; + struct folio *ifolio; unsigned int bit_pos = 2; void *inline_dentry; struct f2fs_dentry_ptr d; - ipage = f2fs_get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) + ifolio = f2fs_get_inode_folio(sbi, dir->i_ino); + if (IS_ERR(ifolio)) return false; - inline_dentry = inline_data_addr(dir, ipage); + inline_dentry = inline_data_addr(dir, ifolio); make_dentry_ptr_inline(dir, &d, inline_dentry); bit_pos = find_next_bit_le(d.bitmap, d.max, bit_pos); - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); if (bit_pos < d.max) return false; @@ -734,7 +755,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct fscrypt_str *fstr) { struct inode *inode = file_inode(file); - struct page *ipage = NULL; + struct folio *ifolio = NULL; struct f2fs_dentry_ptr d; void *inline_dentry = NULL; int err; @@ -744,17 +765,17 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (ctx->pos == d.max) return 0; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); /* * f2fs_readdir was protected by inode.i_rwsem, it is safe to access * ipage without page's lock held. */ - unlock_page(ipage); + folio_unlock(ifolio); - inline_dentry = inline_data_addr(inode, ipage); + inline_dentry = inline_data_addr(inode, ifolio); make_dentry_ptr_inline(inode, &d, inline_dentry); @@ -762,7 +783,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (!err) ctx->pos = d.max; - f2fs_put_page(ipage, 0); + f2fs_folio_put(ifolio, false); return err < 0 ? err : 0; } @@ -773,12 +794,12 @@ int f2fs_inline_data_fiemap(struct inode *inode, __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | FIEMAP_EXTENT_LAST; struct node_info ni; - struct page *ipage; + struct folio *ifolio; int err = 0; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); if ((S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) && !f2fs_has_inline_data(inode)) { @@ -803,11 +824,11 @@ int f2fs_inline_data_fiemap(struct inode *inode, goto out; byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; - byteaddr += (char *)inline_data_addr(inode, ipage) - - (char *)F2FS_INODE(ipage); + byteaddr += (char *)inline_data_addr(inode, ifolio) - + (char *)F2FS_INODE(ifolio); err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return err; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c1c2ba9f28e5..38b8994bc1b2 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -7,7 +7,6 @@ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/sched/mm.h> #include <linux/lz4.h> @@ -29,9 +28,17 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) if (is_inode_flag_set(inode, FI_NEW_INODE)) return; + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return; + if (f2fs_inode_dirtied(inode, sync)) return; + /* only atomic file w/ FI_ATOMIC_COMMITTED can be set vfs dirty */ + if (f2fs_is_atomic_file(inode) && + !is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) + return; + mark_inode_dirty_sync(inode); } @@ -61,80 +68,63 @@ void f2fs_set_inode_flags(struct inode *inode) S_ENCRYPTED|S_VERITY|S_CASEFOLD); } -static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +static void __get_inode_rdev(struct inode *inode, struct folio *node_folio) { - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, node_folio); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - if (ri->i_addr[extra_size]) - inode->i_rdev = old_decode_dev( - le32_to_cpu(ri->i_addr[extra_size])); + if (addr[0]) + inode->i_rdev = old_decode_dev(le32_to_cpu(addr[0])); else - inode->i_rdev = new_decode_dev( - le32_to_cpu(ri->i_addr[extra_size + 1])); + inode->i_rdev = new_decode_dev(le32_to_cpu(addr[1])); } } -static int __written_first_block(struct f2fs_sb_info *sbi, - struct f2fs_inode *ri) +static void __set_inode_rdev(struct inode *inode, struct folio *node_folio) { - block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - - if (!__is_valid_data_blkaddr(addr)) - return 1; - if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); - return -EFSCORRUPTED; - } - return 0; -} - -static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) -{ - int extra_size = get_extra_isize(inode); + __le32 *addr = get_dnode_addr(inode, node_folio); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[extra_size] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - ri->i_addr[extra_size + 1] = 0; + addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); + addr[1] = 0; } else { - ri->i_addr[extra_size] = 0; - ri->i_addr[extra_size + 1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - ri->i_addr[extra_size + 2] = 0; + addr[0] = 0; + addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); + addr[2] = 0; } } } -static void __recover_inline_status(struct inode *inode, struct page *ipage) +static void __recover_inline_status(struct inode *inode, struct folio *ifolio) { - void *inline_data = inline_data_addr(inode, ipage); + void *inline_data = inline_data_addr(inode, ifolio); __le32 *start = inline_data; __le32 *end = start + MAX_INLINE_DATA(inode) / sizeof(__le32); while (start < end) { if (*start++) { - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); - set_raw_inline(inode, F2FS_INODE(ipage)); - set_page_dirty(ipage); + set_raw_inline(inode, F2FS_INODE(ifolio)); + folio_mark_dirty(ifolio); return; } } return; } -static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +static +bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(page)->i; + struct f2fs_inode *ri = &F2FS_NODE(folio)->i; if (!f2fs_sb_has_inode_chksum(sbi)) return false; - if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + if (!IS_INODE(folio) || !(ri->i_inline & F2FS_EXTRA_ATTR)) return false; if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize), @@ -144,9 +134,9 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page return true; } -static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_node *node = F2FS_NODE(page); + struct f2fs_node *node = F2FS_NODE(folio); struct f2fs_inode *ri = &node->i; __le32 ino = node->footer.ino; __le32 gen = ri->i_generation; @@ -155,19 +145,18 @@ static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) unsigned int offset = offsetof(struct f2fs_inode, i_inode_checksum); unsigned int cs_size = sizeof(dummy_cs); - chksum = f2fs_chksum(sbi, sbi->s_chksum_seed, (__u8 *)&ino, - sizeof(ino)); - chksum_seed = f2fs_chksum(sbi, chksum, (__u8 *)&gen, sizeof(gen)); + chksum = f2fs_chksum(sbi->s_chksum_seed, (__u8 *)&ino, sizeof(ino)); + chksum_seed = f2fs_chksum(chksum, (__u8 *)&gen, sizeof(gen)); - chksum = f2fs_chksum(sbi, chksum_seed, (__u8 *)ri, offset); - chksum = f2fs_chksum(sbi, chksum, (__u8 *)&dummy_cs, cs_size); + chksum = f2fs_chksum(chksum_seed, (__u8 *)ri, offset); + chksum = f2fs_chksum(chksum, (__u8 *)&dummy_cs, cs_size); offset += cs_size; - chksum = f2fs_chksum(sbi, chksum, (__u8 *)ri + offset, - F2FS_BLKSIZE - offset); + chksum = f2fs_chksum(chksum, (__u8 *)ri + offset, + F2FS_BLKSIZE - offset); return chksum; } -bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *ri; __u32 provided, calculated; @@ -176,32 +165,34 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) return true; #ifdef CONFIG_F2FS_CHECK_FS - if (!f2fs_enable_inode_chksum(sbi, page)) + if (!f2fs_enable_inode_chksum(sbi, folio)) #else - if (!f2fs_enable_inode_chksum(sbi, page) || - PageDirty(page) || PageWriteback(page)) + if (!f2fs_enable_inode_chksum(sbi, folio) || + folio_test_dirty(folio) || + folio_test_writeback(folio)) #endif return true; - ri = &F2FS_NODE(page)->i; + ri = &F2FS_NODE(folio)->i; provided = le32_to_cpu(ri->i_inode_checksum); - calculated = f2fs_inode_chksum(sbi, page); + calculated = f2fs_inode_chksum(sbi, folio); if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - page->index, ino_of_node(page), provided, calculated); + folio->index, ino_of_node(folio), + provided, calculated); return provided == calculated; } -void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio) { - struct f2fs_inode *ri = &F2FS_NODE(page)->i; + struct f2fs_inode *ri = &F2FS_NODE(folio)->i; - if (!f2fs_enable_inode_chksum(sbi, page)) + if (!f2fs_enable_inode_chksum(sbi, folio)) return; - ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); + ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, folio)); } static bool sanity_check_compress_inode(struct inode *inode, @@ -214,7 +205,7 @@ static bool sanity_check_compress_inode(struct inode *inode, f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported compress algorithm: %u, run fsck to fix", __func__, inode->i_ino, ri->i_compress_algorithm); - goto err; + return false; } if (le64_to_cpu(ri->i_compr_blocks) > SECTOR_TO_BLOCK(inode->i_blocks)) { @@ -222,14 +213,14 @@ static bool sanity_check_compress_inode(struct inode *inode, "%s: inode (ino=%lx) has inconsistent i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix", __func__, inode->i_ino, le64_to_cpu(ri->i_compr_blocks), SECTOR_TO_BLOCK(inode->i_blocks)); - goto err; + return false; } if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) { f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported log cluster size: %u, run fsck to fix", __func__, inode->i_ino, ri->i_log_cluster_size); - goto err; + return false; } clevel = le16_to_cpu(ri->i_compress_flag) >> @@ -273,37 +264,44 @@ static bool sanity_check_compress_inode(struct inode *inode, err_level: f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported compress level: %u, run fsck to fix", __func__, inode->i_ino, clevel); -err: - set_sbi_flag(sbi, SBI_NEED_FSCK); return false; } -static bool sanity_check_inode(struct inode *inode, struct page *node_page) +static bool sanity_check_inode(struct inode *inode, struct folio *node_folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_inode *ri = F2FS_INODE(node_page); + struct f2fs_inode *ri = F2FS_INODE(node_folio); unsigned long long iblocks; - iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); + iblocks = le64_to_cpu(F2FS_INODE(node_folio)->i_blocks); if (!iblocks) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", __func__, inode->i_ino, iblocks); return false; } - if (ino_of_node(node_page) != nid_of_node(node_page)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); + if (ino_of_node(node_folio) != nid_of_node(node_folio)) { f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", __func__, inode->i_ino, - ino_of_node(node_page), nid_of_node(node_page)); + ino_of_node(node_folio), nid_of_node(node_folio)); + return false; + } + + if (ino_of_node(node_folio) == fi->i_xattr_nid) { + f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.", + __func__, inode->i_ino, fi->i_xattr_nid); + return false; + } + + if (S_ISDIR(inode->i_mode) && unlikely(inode->i_nlink == 1)) { + f2fs_warn(sbi, "%s: directory inode (ino=%lx) has a single i_nlink", + __func__, inode->i_ino); return false; } if (f2fs_has_extra_attr(inode)) { if (!f2fs_sb_has_extra_attr(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", __func__, inode->i_ino); return false; @@ -311,22 +309,11 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || fi->i_extra_isize < F2FS_MIN_EXTRA_ATTR_SIZE || fi->i_extra_isize % sizeof(__le32)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu", __func__, inode->i_ino, fi->i_extra_isize, F2FS_TOTAL_EXTRA_ATTR_SIZE); return false; } - if (f2fs_sb_has_flexible_inline_xattr(sbi) && - f2fs_has_inline_xattr(inode) && - (!fi->i_inline_xattr_size || - fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu", - __func__, inode->i_ino, fi->i_inline_xattr_size, - MAX_INLINE_XATTR_SIZE); - return false; - } if (f2fs_sb_has_compression(sbi) && fi->i_flags & F2FS_COMPR_FL && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, @@ -334,67 +321,83 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (!sanity_check_compress_inode(inode, ri)) return false; } - } else if (f2fs_sb_has_flexible_inline_xattr(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", - __func__, inode->i_ino); + } + + if (f2fs_sb_has_flexible_inline_xattr(sbi) && + f2fs_has_inline_xattr(inode) && + (fi->i_inline_xattr_size < MIN_INLINE_XATTR_SIZE || + fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, min: %zu, max: %lu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MIN_INLINE_XATTR_SIZE, MAX_INLINE_XATTR_SIZE); return false; } if (!f2fs_sb_has_extra_attr(sbi)) { if (f2fs_sb_has_project_quota(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_PRJQUOTA); return false; } if (f2fs_sb_has_inode_chksum(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_INODE_CHKSUM); return false; } if (f2fs_sb_has_flexible_inline_xattr(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); return false; } if (f2fs_sb_has_inode_crtime(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_INODE_CRTIME); return false; } if (f2fs_sb_has_compression(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_COMPRESSION); return false; } } - if (f2fs_sanity_check_inline_data(inode)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); + if (f2fs_sanity_check_inline_data(inode, node_folio)) { f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; } if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; } if ((fi->i_flags & F2FS_CASEFOLD_FL) && !f2fs_sb_has_casefold(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) has casefold flag, but casefold feature is off", __func__, inode->i_ino); return false; } + if (fi->i_xattr_nid && f2fs_check_nid_range(sbi, fi->i_xattr_nid)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_xattr_nid: %u, run fsck to fix.", + __func__, inode->i_ino, fi->i_xattr_nid); + return false; + } + + if (IS_DEVICE_ALIASING(inode)) { + if (!f2fs_sb_has_device_alias(sbi)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", + __func__, inode->i_ino); + return false; + } + if (!f2fs_is_pinned_file(inode)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned", + __func__, inode->i_ino); + return false; + } + } + return true; } @@ -402,29 +405,28 @@ static void init_idisk_time(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - fi->i_disk_time[0] = inode->i_atime; + fi->i_disk_time[0] = inode_get_atime(inode); fi->i_disk_time[1] = inode_get_ctime(inode); - fi->i_disk_time[2] = inode->i_mtime; + fi->i_disk_time[2] = inode_get_mtime(inode); } static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct page *node_page; + struct folio *node_folio; struct f2fs_inode *ri; projid_t i_projid; - int err; /* Check if ino is within scope */ if (f2fs_check_nid_range(sbi, inode->i_ino)) return -EINVAL; - node_page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(node_page)) - return PTR_ERR(node_page); + node_folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(node_folio)) + return PTR_ERR(node_folio); - ri = F2FS_INODE(node_page); + ri = F2FS_INODE(node_folio); inode->i_mode = le16_to_cpu(ri->i_mode); i_uid_write(inode, le32_to_cpu(ri->i_uid)); @@ -433,18 +435,17 @@ static int do_read_inode(struct inode *inode) inode->i_size = le64_to_cpu(ri->i_size); inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1); - inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); + inode_set_atime(inode, le64_to_cpu(ri->i_atime), + le32_to_cpu(ri->i_atime_nsec)); inode_set_ctime(inode, le64_to_cpu(ri->i_ctime), le32_to_cpu(ri->i_ctime_nsec)); - inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); + inode_set_mtime(inode, le64_to_cpu(ri->i_mtime), + le32_to_cpu(ri->i_mtime_nsec)); inode->i_generation = le32_to_cpu(ri->i_generation); if (S_ISDIR(inode->i_mode)) fi->i_current_depth = le32_to_cpu(ri->i_current_depth); else if (S_ISREG(inode->i_mode)) - fi->i_gc_failures[GC_FAILURE_PIN] = - le16_to_cpu(ri->i_gc_failures); + fi->i_gc_failures = le16_to_cpu(ri->i_gc_failures); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); if (S_ISREG(inode->i_mode)) @@ -475,29 +476,26 @@ static int do_read_inode(struct inode *inode) fi->i_inline_xattr_size = 0; } + if (!sanity_check_inode(inode, node_folio)) { + f2fs_folio_put(node_folio, true); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); + return -EFSCORRUPTED; + } + /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) - __recover_inline_status(inode, node_page); + __recover_inline_status(inode, node_folio); /* try to recover cold bit for non-dir inode */ - if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) { - f2fs_wait_on_page_writeback(node_page, NODE, true, true); - set_cold_node(node_page, false); - set_page_dirty(node_page); + if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_folio)) { + f2fs_folio_wait_writeback(node_folio, NODE, true, true); + set_cold_node(node_folio, false); + folio_mark_dirty(node_folio); } /* get rdev by using inline_info */ - __get_inode_rdev(inode, ri); - - if (S_ISREG(inode->i_mode)) { - err = __written_first_block(sbi, ri); - if (err < 0) { - f2fs_put_page(node_page, 1); - return err; - } - if (!err) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); - } + __get_inode_rdev(inode, node_folio); if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; @@ -540,23 +538,17 @@ static int do_read_inode(struct inode *inode) init_idisk_time(inode); - /* Need all the flag bits */ - f2fs_init_read_extent_tree(inode, node_page); - f2fs_init_age_extent_tree(inode); - - if (!sanity_check_inode(inode, node_page)) { - f2fs_put_page(node_page, 1); + if (!sanity_check_extent_cache(inode, node_folio)) { + f2fs_folio_put(node_folio, true); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; } - if (!sanity_check_extent_cache(inode)) { - f2fs_put_page(node_page, 1); - f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); - return -EFSCORRUPTED; - } + /* Need all the flag bits */ + f2fs_init_read_extent_tree(inode, node_folio); + f2fs_init_age_extent_tree(inode); - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); stat_inc_inline_xattr(inode); stat_inc_inline_inode(inode); @@ -583,7 +575,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) if (!inode) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) { + if (!(inode_state_read_once(inode) & I_NEW)) { if (is_meta_ino(sbi, ino)) { f2fs_err(sbi, "inaccessible inode: %lu, run fsck to repair", ino); set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -615,7 +607,7 @@ make_now: #ifdef CONFIG_F2FS_FS_COMPRESSION inode->i_mapping->a_ops = &f2fs_compress_aops; /* - * generic_error_remove_page only truncates pages of regular + * generic_error_remove_folio only truncates pages of regular * inode */ inode->i_mode |= S_IFREG; @@ -648,14 +640,6 @@ make_now: } f2fs_set_inode_flags(inode); - if (file_should_truncate(inode) && - !is_sbi_flag_set(sbi, SBI_POR_DOING)) { - ret = f2fs_truncate(inode); - if (ret) - goto bad_inode; - file_dont_truncate(inode); - } - unlock_new_inode(inode); trace_f2fs_iget(inode); return inode; @@ -681,20 +665,21 @@ retry: return inode; } -void f2fs_update_inode(struct inode *inode, struct page *node_page) +void f2fs_update_inode(struct inode *inode, struct folio *node_folio) { + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_inode *ri; - struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_tree *et = fi->extent_tree[EX_READ]; - f2fs_wait_on_page_writeback(node_page, NODE, true, true); - set_page_dirty(node_page); + f2fs_folio_wait_writeback(node_folio, NODE, true, true); + folio_mark_dirty(node_folio); f2fs_inode_synced(inode); - ri = F2FS_INODE(node_page); + ri = F2FS_INODE(node_folio); ri->i_mode = cpu_to_le16(inode->i_mode); - ri->i_advise = F2FS_I(inode)->i_advise; + ri->i_advise = fi->i_advise; ri->i_uid = cpu_to_le32(i_uid_read(inode)); ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); @@ -713,102 +698,96 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) } set_raw_inline(inode, ri); - ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); - ri->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); - ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); - ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); - ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); - ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + ri->i_atime = cpu_to_le64(inode_get_atime_sec(inode)); + ri->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); + ri->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode)); + ri->i_atime_nsec = cpu_to_le32(inode_get_atime_nsec(inode)); + ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); + ri->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); if (S_ISDIR(inode->i_mode)) - ri->i_current_depth = - cpu_to_le32(F2FS_I(inode)->i_current_depth); + ri->i_current_depth = cpu_to_le32(fi->i_current_depth); else if (S_ISREG(inode->i_mode)) - ri->i_gc_failures = - cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]); - ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); - ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); - ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); + ri->i_gc_failures = cpu_to_le16(fi->i_gc_failures); + ri->i_xattr_nid = cpu_to_le32(fi->i_xattr_nid); + ri->i_flags = cpu_to_le32(fi->i_flags); + ri->i_pino = cpu_to_le32(fi->i_pino); ri->i_generation = cpu_to_le32(inode->i_generation); - ri->i_dir_level = F2FS_I(inode)->i_dir_level; + ri->i_dir_level = fi->i_dir_level; if (f2fs_has_extra_attr(inode)) { - ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + ri->i_extra_isize = cpu_to_le16(fi->i_extra_isize); if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode))) ri->i_inline_xattr_size = - cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); + cpu_to_le16(fi->i_inline_xattr_size); if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, - i_projid)) { + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) { projid_t i_projid; - i_projid = from_kprojid(&init_user_ns, - F2FS_I(inode)->i_projid); + i_projid = from_kprojid(&init_user_ns, fi->i_projid); ri->i_projid = cpu_to_le32(i_projid); } if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, - i_crtime)) { - ri->i_crtime = - cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec); - ri->i_crtime_nsec = - cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec); + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + ri->i_crtime = cpu_to_le64(fi->i_crtime.tv_sec); + ri->i_crtime_nsec = cpu_to_le32(fi->i_crtime.tv_nsec); } if (f2fs_sb_has_compression(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_compress_flag)) { unsigned short compress_flag; - ri->i_compr_blocks = - cpu_to_le64(atomic_read( - &F2FS_I(inode)->i_compr_blocks)); - ri->i_compress_algorithm = - F2FS_I(inode)->i_compress_algorithm; - compress_flag = F2FS_I(inode)->i_compress_flag | - F2FS_I(inode)->i_compress_level << + ri->i_compr_blocks = cpu_to_le64( + atomic_read(&fi->i_compr_blocks)); + ri->i_compress_algorithm = fi->i_compress_algorithm; + compress_flag = fi->i_compress_flag | + fi->i_compress_level << COMPRESS_LEVEL_OFFSET; ri->i_compress_flag = cpu_to_le16(compress_flag); - ri->i_log_cluster_size = - F2FS_I(inode)->i_log_cluster_size; + ri->i_log_cluster_size = fi->i_log_cluster_size; } } - __set_inode_rdev(inode, ri); + __set_inode_rdev(inode, node_folio); /* deleted inode */ if (inode->i_nlink == 0) - clear_page_private_inline(node_page); + folio_clear_f2fs_inline(node_folio); init_idisk_time(inode); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page); + f2fs_inode_chksum_set(F2FS_I_SB(inode), node_folio); #endif } void f2fs_update_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *node_page; + struct folio *node_folio; int count = 0; retry: - node_page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(node_page)) { - int err = PTR_ERR(node_page); + node_folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(node_folio)) { + int err = PTR_ERR(node_folio); /* The node block was truncated. */ if (err == -ENOENT) return; + if (err == -EFSCORRUPTED) + goto stop_checkpoint; + if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; +stop_checkpoint: f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_UPDATE_INODE); return; } - f2fs_update_inode(inode, node_page); - f2fs_put_page(node_page, 1); + f2fs_update_inode(inode, node_folio); + f2fs_folio_put(node_folio, true); } int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -826,8 +805,17 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; - if (!f2fs_is_checkpoint_ready(sbi)) + /* + * no need to update inode page, ultimately f2fs_evict_inode() will + * clear dirty status of inode. + */ + if (f2fs_cp_error(sbi)) + return -EIO; + + if (!f2fs_is_checkpoint_ready(sbi)) { + f2fs_mark_inode_dirty_sync(inode, true); return -ENOSPC; + } /* * We need to balance fs here to prevent from producing dirty node pages @@ -839,6 +827,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; } +void f2fs_remove_donate_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (list_empty(&F2FS_I(inode)->gdonate_list)) + return; + + spin_lock(&sbi->inode_lock[DONATE_INODE]); + list_del_init(&F2FS_I(inode)->gdonate_list); + sbi->donate_files--; + spin_unlock(&sbi->inode_lock[DONATE_INODE]); +} + /* * Called at the last iput() if i_nlink is zero */ @@ -848,11 +849,13 @@ void f2fs_evict_inode(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); nid_t xnid = fi->i_xattr_nid; int err = 0; + bool freeze_protected = false; f2fs_abort_atomic_write(inode, true); - if (fi->cow_inode) { + if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) { clear_inode_flag(fi->cow_inode, FI_COW_FILE); + F2FS_I(fi->cow_inode)->atomic_inode = NULL; iput(fi->cow_inode); fi->cow_inode = NULL; } @@ -871,8 +874,10 @@ void f2fs_evict_inode(struct inode *inode) f2fs_bug_on(sbi, get_dirty_pages(inode)); f2fs_remove_dirty_inode(inode); + f2fs_remove_donate_inode(inode); - f2fs_destroy_extent_tree(inode); + if (!IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; @@ -887,8 +892,10 @@ void f2fs_evict_inode(struct inode *inode) f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); - if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) { sb_start_intwrite(inode->i_sb); + freeze_protected = true; + } set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: @@ -926,12 +933,28 @@ retry: goto retry; } + if (IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); + if (err) { f2fs_update_inode_page(inode); if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + + /* + * If both f2fs_truncate() and f2fs_update_inode_page() failed + * due to fuzzed corrupted inode, call f2fs_inode_synced() to + * avoid triggering later f2fs_bug_on(). + */ + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + f2fs_warn(sbi, + "f2fs_evict_inode: inode is dirty, ino:%lu", + inode->i_ino); + f2fs_inode_synced(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } } - if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + if (freeze_protected) sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); @@ -946,8 +969,12 @@ no_delete: if (likely(!f2fs_cp_error(sbi) && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); - else - f2fs_inode_synced(inode); + + /* + * anyway, it needs to remove the inode from sbi->inode_list[DIRTY_META] + * list to avoid UAF in f2fs_sync_inode_meta() during checkpoint. + */ + f2fs_inode_synced(inode); /* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */ if (inode->i_ino) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 193b22a2d6bf..043d20516a21 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -221,6 +221,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, const char *name) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct f2fs_inode_info *fi; nid_t ino; struct inode *inode; bool nid_free = false; @@ -241,14 +242,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, inode_init_owner(idmap, inode, dir, mode); + fi = F2FS_I(inode); inode->i_ino = ino; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); - F2FS_I(inode)->i_crtime = inode->i_mtime; + simple_inode_init_ts(inode); + fi->i_crtime = inode_get_mtime(inode); inode->i_generation = get_random_u32(); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_current_depth = 1; + fi->i_current_depth = 1; err = insert_inode_locked(inode); if (err) { @@ -258,9 +260,9 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, if (f2fs_sb_has_project_quota(sbi) && (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) - F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; + fi->i_projid = F2FS_I(dir)->i_projid; else - F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, + fi->i_projid = make_kprojid(&init_user_ns, F2FS_DEF_PROJID); err = fscrypt_prepare_new_inode(dir, inode, &encrypt); @@ -278,7 +280,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); - F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; + fi->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } if (test_opt(sbi, INLINE_XATTR)) @@ -296,15 +298,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, f2fs_has_inline_dentry(inode)) { xattr_size = DEFAULT_INLINE_XATTR_ADDRS; } - F2FS_I(inode)->i_inline_xattr_size = xattr_size; + fi->i_inline_xattr_size = xattr_size; - F2FS_I(inode)->i_flags = + fi->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_flags |= F2FS_INDEX_FL; + fi->i_flags |= F2FS_INDEX_FL; - if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) + if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); /* Check compression first. */ @@ -339,6 +341,7 @@ fail_drop: trace_f2fs_new_inode(inode, err); dquot_drop(inode); inode->i_flags |= S_NOQUOTA; + make_bad_inode(inode); if (nid_free) set_inode_flag(inode, FI_FREE_NID); clear_nlink(inode); @@ -411,7 +414,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (is_inode_flag_set(dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid))) + F2FS_I(inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(dir); @@ -444,84 +447,26 @@ out: struct dentry *f2fs_get_parent(struct dentry *child) { - struct page *page; - unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot_name, &page); + struct folio *folio; + unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot_name, &folio); if (!ino) { - if (IS_ERR(page)) - return ERR_CAST(page); + if (IS_ERR(folio)) + return ERR_CAST(folio); return ERR_PTR(-ENOENT); } return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } -static int __recover_dot_dentries(struct inode *dir, nid_t pino) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct qstr dot = QSTR_INIT(".", 1); - struct qstr dotdot = QSTR_INIT("..", 2); - struct f2fs_dir_entry *de; - struct page *page; - int err = 0; - - if (f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", - dir->i_ino, pino); - return 0; - } - - if (!S_ISDIR(dir->i_mode)) { - f2fs_err(sbi, "inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u)", - dir->i_ino, dir->i_mode, pino); - set_sbi_flag(sbi, SBI_NEED_FSCK); - return -ENOTDIR; - } - - err = f2fs_dquot_initialize(dir); - if (err) - return err; - - f2fs_balance_fs(sbi, true); - - f2fs_lock_op(sbi); - - de = f2fs_find_entry(dir, &dot, &page); - if (de) { - f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } else { - err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); - if (err) - goto out; - } - - de = f2fs_find_entry(dir, &dotdot, &page); - if (de) - f2fs_put_page(page, 0); - else if (IS_ERR(page)) - err = PTR_ERR(page); - else - err = f2fs_do_add_link(dir, &dotdot, NULL, pino, S_IFDIR); -out: - if (!err) - clear_inode_flag(dir, FI_INLINE_DOTS); - - f2fs_unlock_op(sbi); - return err; -} - static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode = NULL; struct f2fs_dir_entry *de; - struct page *page; + struct folio *folio; struct dentry *new; nid_t ino = -1; int err = 0; - unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); @@ -532,17 +477,16 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } err = f2fs_prepare_lookup(dir, dentry, &fname); - generic_set_encrypted_ci_d_ops(dentry); if (err == -ENOENT) goto out_splice; if (err) goto out; - de = __f2fs_find_entry(dir, &fname, &page); + de = __f2fs_find_entry(dir, &fname, &folio); f2fs_free_filename(&fname); if (!de) { - if (IS_ERR(page)) { - err = PTR_ERR(page); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto out; } err = -ENOENT; @@ -550,7 +494,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } ino = le32_to_cpu(de->ino); - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) { @@ -558,17 +502,14 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { - err = __recover_dot_dentries(dir, root_ino); - if (err) - goto out_iput; + if (inode->i_nlink == 0) { + f2fs_warn(F2FS_I_SB(inode), "%s: inode (ino=%lx) has zero i_nlink", + __func__, inode->i_ino); + err = -EFSCORRUPTED; + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + goto out_iput; } - if (f2fs_has_inline_dots(inode)) { - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) - goto out_iput; - } if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { @@ -578,8 +519,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out_iput; } out_splice: -#if IS_ENABLED(CONFIG_UNICODE) - if (!inode && IS_CASEFOLDED(dir)) { + if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as * well. For now, prevent the negative dentry @@ -588,7 +528,7 @@ out_splice: trace_f2fs_lookup_end(dir, dentry, ino, err); return NULL; } -#endif + new = d_splice_alias(inode, dentry); trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry, ino, IS_ERR(new) ? PTR_ERR(new) : err); @@ -605,28 +545,38 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode = d_inode(dentry); struct f2fs_dir_entry *de; - struct page *page; + struct folio *folio; int err; trace_f2fs_unlink_enter(dir, dentry); if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; - goto fail; + goto out; } err = f2fs_dquot_initialize(dir); if (err) - goto fail; + goto out; err = f2fs_dquot_initialize(inode); if (err) - goto fail; + goto out; - de = f2fs_find_entry(dir, &dentry->d_name, &page); + de = f2fs_find_entry(dir, &dentry->d_name, &folio); if (!de) { - if (IS_ERR(page)) - err = PTR_ERR(page); - goto fail; + if (IS_ERR(folio)) + err = PTR_ERR(folio); + goto out; + } + + if (unlikely(inode->i_nlink == 0)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has zero i_nlink", + __func__, inode->i_ino); + goto corrupted; + } else if (S_ISDIR(inode->i_mode) && unlikely(inode->i_nlink == 1)) { + f2fs_warn(sbi, "%s: directory inode (ino=%lx) has a single i_nlink", + __func__, inode->i_ino); + goto corrupted; } f2fs_balance_fs(sbi, true); @@ -635,25 +585,30 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = f2fs_acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - f2fs_put_page(page, 0); - goto fail; + f2fs_folio_put(folio, false); + goto out; } - f2fs_delete_entry(de, page, dir, inode); + f2fs_delete_entry(de, folio, dir, inode); f2fs_unlock_op(sbi); -#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the * negative dentries at f2fs_lookup(), when it is better * supported by the VFS for the CI case. */ - if (IS_CASEFOLDED(dir)) + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_invalidate(dentry); -#endif + if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); -fail: + + goto out; +corrupted: + err = -EFSCORRUPTED; + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_folio_put(folio, false); +out: trace_f2fs_unlink_exit(inode, err); return err; } @@ -753,23 +708,23 @@ out_free_encrypted_link: return err; } -static int f2fs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) +static struct dentry *f2fs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; int err; if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + return ERR_PTR(-EIO); err = f2fs_dquot_initialize(dir); if (err) - return err; + return ERR_PTR(err); inode = f2fs_new_inode(idmap, dir, S_IFDIR | mode, NULL); if (IS_ERR(inode)) - return PTR_ERR(inode); + return ERR_CAST(inode); inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; @@ -791,12 +746,12 @@ static int f2fs_mkdir(struct mnt_idmap *idmap, struct inode *dir, f2fs_sync_fs(sbi->sb, 1); f2fs_balance_fs(sbi, true); - return 0; + return NULL; out_fail: clear_inode_flag(inode, FI_INC_LINK); f2fs_handle_failed_inode(inode); - return err; + return ERR_PTR(err); } static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) @@ -853,7 +808,7 @@ out: static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode, bool is_whiteout, - struct inode **new_inode) + struct inode **new_inode, struct f2fs_filename *fname) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; @@ -881,7 +836,7 @@ static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out; - err = f2fs_do_tmpfile(inode, dir); + err = f2fs_do_tmpfile(inode, dir, fname); if (err) goto release_out; @@ -896,7 +851,7 @@ static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, f2fs_i_links_write(inode, false); spin_lock(&inode->i_lock); - inode->i_state |= I_LINKABLE; + inode_state_set(inode, I_LINKABLE); spin_unlock(&inode->i_lock); } else { if (file) @@ -932,22 +887,24 @@ static int f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL); + err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL, NULL); return finish_open_simple(file, err); } static int f2fs_create_whiteout(struct mnt_idmap *idmap, - struct inode *dir, struct inode **whiteout) + struct inode *dir, struct inode **whiteout, + struct f2fs_filename *fname) { - return __f2fs_tmpfile(idmap, dir, NULL, - S_IFCHR | WHITEOUT_MODE, true, whiteout); + return __f2fs_tmpfile(idmap, dir, NULL, S_IFCHR | WHITEOUT_MODE, + true, whiteout, fname); } int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct inode **new_inode) { - return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, false, new_inode); + return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, + false, new_inode, NULL); } static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, @@ -958,11 +915,12 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct inode *whiteout = NULL; - struct page *old_dir_page = NULL; - struct page *old_page, *new_page = NULL; + struct folio *old_dir_folio = NULL; + struct folio *old_folio, *new_folio = NULL; struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; + bool old_is_dir = S_ISDIR(old_inode->i_mode); int err; if (unlikely(f2fs_cp_error(sbi))) @@ -972,7 +930,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid))) + F2FS_I(old_inode)->i_projid))) return -EXDEV; /* @@ -990,7 +948,14 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(idmap, old_dir, &whiteout); + struct f2fs_filename fname; + + err = f2fs_setup_filename(old_dir, &old_dentry->d_name, + 0, &fname); + if (err) + return err; + + err = f2fs_create_whiteout(idmap, old_dir, &whiteout, &fname); if (err) return err; } @@ -1010,18 +975,18 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } err = -ENOENT; - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_folio); if (!old_entry) { - if (IS_ERR(old_page)) - err = PTR_ERR(old_page); + if (IS_ERR(old_folio)) + err = PTR_ERR(old_folio); goto out; } - if (S_ISDIR(old_inode->i_mode)) { - old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); + if (old_is_dir && old_dir != new_dir) { + old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_folio); if (!old_dir_entry) { - if (IS_ERR(old_dir_page)) - err = PTR_ERR(old_dir_page); + if (IS_ERR(old_dir_folio)) + err = PTR_ERR(old_dir_folio); goto out_old; } } @@ -1029,15 +994,15 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (new_inode) { err = -ENOTEMPTY; - if (old_dir_entry && !f2fs_empty_dir(new_inode)) + if (old_is_dir && !f2fs_empty_dir(new_inode)) goto out_dir; err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, - &new_page); + &new_folio); if (!new_entry) { - if (IS_ERR(new_page)) - err = PTR_ERR(new_page); + if (IS_ERR(new_folio)) + err = PTR_ERR(new_folio); goto out_dir; } @@ -1049,12 +1014,12 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (err) goto put_out_dir; - f2fs_set_link(new_dir, new_entry, new_page, old_inode); - new_page = NULL; + f2fs_set_link(new_dir, new_entry, new_folio, old_inode); + new_folio = NULL; inode_set_ctime_current(new_inode); f2fs_down_write(&F2FS_I(new_inode)->i_sem); - if (old_dir_entry) + if (old_is_dir) f2fs_i_links_write(new_inode, false); f2fs_i_links_write(new_inode, false); f2fs_up_write(&F2FS_I(new_inode)->i_sem); @@ -1074,12 +1039,12 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, goto out_dir; } - if (old_dir_entry) + if (old_is_dir) f2fs_i_links_write(new_dir, true); } f2fs_down_write(&F2FS_I(old_inode)->i_sem); - if (!old_dir_entry || whiteout) + if (!old_is_dir || whiteout) file_lost_pino(old_inode); else /* adjust dir's i_pino to pass fsck check */ @@ -1089,30 +1054,29 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, inode_set_ctime_current(old_inode); f2fs_mark_inode_dirty_sync(old_inode, false); - f2fs_delete_entry(old_entry, old_page, old_dir, NULL); - old_page = NULL; + f2fs_delete_entry(old_entry, old_folio, old_dir, NULL); + old_folio = NULL; if (whiteout) { set_inode_flag(whiteout, FI_INC_LINK); err = f2fs_add_link(old_dentry, whiteout); - if (err) + if (err) { + d_invalidate(old_dentry); + d_invalidate(new_dentry); goto put_out_dir; - + } spin_lock(&whiteout->i_lock); - whiteout->i_state &= ~I_LINKABLE; + inode_state_clear(whiteout, I_LINKABLE); spin_unlock(&whiteout->i_lock); iput(whiteout); } - if (old_dir_entry) { - if (old_dir != new_dir && !whiteout) - f2fs_set_link(old_inode, old_dir_entry, - old_dir_page, new_dir); - else - f2fs_put_page(old_dir_page, 0); + if (old_dir_entry) + f2fs_set_link(old_inode, old_dir_entry, old_dir_folio, new_dir); + if (old_is_dir) f2fs_i_links_write(old_dir, false); - } + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); if (S_ISDIR(old_inode->i_mode)) @@ -1130,12 +1094,12 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, put_out_dir: f2fs_unlock_op(sbi); - f2fs_put_page(new_page, 0); + f2fs_folio_put(new_folio, false); out_dir: if (old_dir_entry) - f2fs_put_page(old_dir_page, 0); + f2fs_folio_put(old_dir_folio, false); out_old: - f2fs_put_page(old_page, 0); + f2fs_folio_put(old_folio, false); out: iput(whiteout); return err; @@ -1147,8 +1111,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct page *old_dir_page, *new_dir_page; - struct page *old_page, *new_page; + struct folio *old_dir_folio, *new_dir_folio; + struct folio *old_folio, *new_folio; struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; struct f2fs_dir_entry *old_entry, *new_entry; int old_nlink = 0, new_nlink = 0; @@ -1161,10 +1125,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid)) || - (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + F2FS_I(old_inode)->i_projid)) || + (is_inode_flag_set(old_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(old_dir)->i_projid, - F2FS_I(new_dentry->d_inode)->i_projid))) + F2FS_I(new_inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(old_dir); @@ -1176,17 +1140,17 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; err = -ENOENT; - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_folio); if (!old_entry) { - if (IS_ERR(old_page)) - err = PTR_ERR(old_page); + if (IS_ERR(old_folio)) + err = PTR_ERR(old_folio); goto out; } - new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); + new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_folio); if (!new_entry) { - if (IS_ERR(new_page)) - err = PTR_ERR(new_page); + if (IS_ERR(new_folio)) + err = PTR_ERR(new_folio); goto out_old; } @@ -1194,20 +1158,20 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir != new_dir) { if (S_ISDIR(old_inode->i_mode)) { old_dir_entry = f2fs_parent_dir(old_inode, - &old_dir_page); + &old_dir_folio); if (!old_dir_entry) { - if (IS_ERR(old_dir_page)) - err = PTR_ERR(old_dir_page); + if (IS_ERR(old_dir_folio)) + err = PTR_ERR(old_dir_folio); goto out_new; } } if (S_ISDIR(new_inode->i_mode)) { new_dir_entry = f2fs_parent_dir(new_inode, - &new_dir_page); + &new_dir_folio); if (!new_dir_entry) { - if (IS_ERR(new_dir_page)) - err = PTR_ERR(new_dir_page); + if (IS_ERR(new_dir_folio)) + err = PTR_ERR(new_dir_folio); goto out_old_dir; } } @@ -1234,14 +1198,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, /* update ".." directory entry info of old dentry */ if (old_dir_entry) - f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); + f2fs_set_link(old_inode, old_dir_entry, old_dir_folio, new_dir); /* update ".." directory entry info of new dentry */ if (new_dir_entry) - f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir); + f2fs_set_link(new_inode, new_dir_entry, new_dir_folio, old_dir); /* update directory entry info of old dir inode */ - f2fs_set_link(old_dir, old_entry, old_page, new_inode); + f2fs_set_link(old_dir, old_entry, old_folio, new_inode); f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry) @@ -1260,7 +1224,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_mark_inode_dirty_sync(old_dir, false); /* update directory entry info of new dir inode */ - f2fs_set_link(new_dir, new_entry, new_page, old_inode); + f2fs_set_link(new_dir, new_entry, new_folio, old_inode); f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (!new_dir_entry) @@ -1292,16 +1256,16 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; out_new_dir: if (new_dir_entry) { - f2fs_put_page(new_dir_page, 0); + f2fs_folio_put(new_dir_folio, false); } out_old_dir: if (old_dir_entry) { - f2fs_put_page(old_dir_page, 0); + f2fs_folio_put(old_dir_folio, false); } out_new: - f2fs_put_page(new_page, 0); + f2fs_folio_put(new_folio, false); out_old: - f2fs_put_page(old_page, 0); + f2fs_folio_put(old_folio, false); out: return err; } @@ -1316,40 +1280,46 @@ static int f2fs_rename2(struct mnt_idmap *idmap, if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; + trace_f2fs_rename_start(old_dir, old_dentry, new_dir, new_dentry, + flags); + err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry, flags); if (err) return err; - if (flags & RENAME_EXCHANGE) { - return f2fs_cross_rename(old_dir, old_dentry, - new_dir, new_dentry); - } + if (flags & RENAME_EXCHANGE) + err = f2fs_cross_rename(old_dir, old_dentry, + new_dir, new_dentry); + else /* * VFS has already handled the new dentry existence case, * here, we just deal with "RENAME_NOREPLACE" as regular rename. */ - return f2fs_rename(idmap, old_dir, old_dentry, + err = f2fs_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags); + + trace_f2fs_rename_end(old_dentry, new_dentry, flags, err); + return err; } static const char *f2fs_encrypted_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct page *page; + struct folio *folio; const char *target; if (!dentry) return ERR_PTR(-ECHILD); - page = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(page)) - return ERR_CAST(page); + folio = read_mapping_folio(inode->i_mapping, 0, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); - target = fscrypt_get_symlink(inode, page_address(page), + target = fscrypt_get_symlink(inode, folio_address(folio), inode->i_sb->s_blocksize, done); - put_page(page); + folio_put(folio); return target; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ee2e1dd64f25..482a362f2625 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -20,19 +20,24 @@ #include "iostat.h" #include <trace/events/f2fs.h> -#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) +#define on_f2fs_build_free_nids(nm_i) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; static struct kmem_cache *nat_entry_set_slab; static struct kmem_cache *fsync_node_entry_slab; +static inline bool is_invalid_nid(struct f2fs_sb_info *sbi, nid_t nid) +{ + return nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid; +} + /* * Check whether the given nid is within node id range. */ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) { - if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) { + if (unlikely(is_invalid_nid(sbi, nid))) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", __func__, nid); @@ -120,25 +125,25 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) return res; } -static void clear_node_page_dirty(struct page *page) +static void clear_node_folio_dirty(struct folio *folio) { - if (PageDirty(page)) { - f2fs_clear_page_cache_dirty_tag(page); - clear_page_dirty_for_io(page); - dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); + if (folio_test_dirty(folio)) { + f2fs_clear_page_cache_dirty_tag(folio); + folio_clear_dirty_for_io(folio); + dec_page_count(F2FS_F_SB(folio), F2FS_DIRTY_NODES); } - ClearPageUptodate(page); + folio_clear_uptodate(folio); } -static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +static struct folio *get_current_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) { - return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid)); + return f2fs_get_meta_folio_retry(sbi, current_nat_addr(sbi, nid)); } -static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +static struct folio *get_next_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) { - struct page *src_page; - struct page *dst_page; + struct folio *src_folio; + struct folio *dst_folio; pgoff_t dst_off; void *src_addr; void *dst_addr; @@ -147,21 +152,21 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid)); /* get current nat block page with lock */ - src_page = get_current_nat_page(sbi, nid); - if (IS_ERR(src_page)) - return src_page; - dst_page = f2fs_grab_meta_page(sbi, dst_off); - f2fs_bug_on(sbi, PageDirty(src_page)); - - src_addr = page_address(src_page); - dst_addr = page_address(dst_page); + src_folio = get_current_nat_folio(sbi, nid); + if (IS_ERR(src_folio)) + return src_folio; + dst_folio = f2fs_grab_meta_folio(sbi, dst_off); + f2fs_bug_on(sbi, folio_test_dirty(src_folio)); + + src_addr = folio_address(src_folio); + dst_addr = folio_address(dst_folio); memcpy(dst_addr, src_addr, PAGE_SIZE); - set_page_dirty(dst_page); - f2fs_put_page(src_page, 1); + folio_mark_dirty(dst_folio); + f2fs_folio_put(src_folio, true); set_to_next_nat(nm_i, nid); - return dst_page; + return dst_folio; } static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi, @@ -185,7 +190,7 @@ static void __free_nat_entry(struct nat_entry *e) /* must be locked by nat_tree_lock */ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, - struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail) + struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail, bool init_dirty) { if (no_fail) f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne); @@ -195,6 +200,12 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, if (raw_ne) node_info_from_raw_nat(&ne->ni, raw_ne); + if (init_dirty) { + INIT_LIST_HEAD(&ne->list); + nm_i->nat_cnt[TOTAL_NAT]++; + return ne; + } + spin_lock(&nm_i->nat_list_lock); list_add_tail(&ne->list, &nm_i->nat_entries); spin_unlock(&nm_i->nat_list_lock); @@ -204,14 +215,17 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, return ne; } -static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) +static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n, bool for_dirty) { struct nat_entry *ne; ne = radix_tree_lookup(&nm_i->nat_root, n); - /* for recent accessed nat entry, move it to tail of lru list */ - if (ne && !get_nat_flag(ne, IS_DIRTY)) { + /* + * for recent accessed nat entry which will not be dirtied soon + * later, move it to tail of lru list. + */ + if (ne && !get_nat_flag(ne, IS_DIRTY) && !for_dirty) { spin_lock(&nm_i->nat_list_lock); if (!list_empty(&ne->list)) list_move_tail(&ne->list, &nm_i->nat_entries); @@ -256,7 +270,7 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, } static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) + struct nat_entry *ne, bool init_dirty) { struct nat_entry_set *head; bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; @@ -279,7 +293,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, goto refresh_list; nm_i->nat_cnt[DIRTY_NAT]++; - nm_i->nat_cnt[RECLAIMABLE_NAT]--; + if (!init_dirty) + nm_i->nat_cnt[RECLAIMABLE_NAT]--; set_nat_flag(ne, IS_DIRTY, true); refresh_list: spin_lock(&nm_i->nat_list_lock); @@ -310,10 +325,9 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } -bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page) +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) { - return NODE_MAPPING(sbi) == page->mapping && - IS_DNODE(page) && is_cold_node(page); + return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio); } void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) @@ -325,7 +339,7 @@ void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) } static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, - struct page *page) + struct folio *folio) { struct fsync_node_entry *fn; unsigned long flags; @@ -334,8 +348,8 @@ static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS, true, NULL); - get_page(page); - fn->page = page; + folio_get(folio); + fn->folio = folio; INIT_LIST_HEAD(&fn->list); spin_lock_irqsave(&sbi->fsync_node_lock, flags); @@ -348,19 +362,19 @@ static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, return seq_id; } -void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page) +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio) { struct fsync_node_entry *fn; unsigned long flags; spin_lock_irqsave(&sbi->fsync_node_lock, flags); list_for_each_entry(fn, &sbi->fsync_node_list, list) { - if (fn->page == page) { + if (fn->folio == folio) { list_del(&fn->list); sbi->fsync_node_num--; spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); kmem_cache_free(fsync_node_entry_slab, fn); - put_page(page); + folio_put(folio); return; } } @@ -384,7 +398,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) bool need = false; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) @@ -401,7 +415,7 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) bool is_cp = true; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; f2fs_up_read(&nm_i->nat_tree_lock); @@ -415,7 +429,7 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) bool need_update = true; f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ino); + e = __lookup_nat_cache(nm_i, ino, false); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) @@ -440,9 +454,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, return; f2fs_down_write(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (!e) - e = __init_nat_entry(nm_i, new, ne, false); + e = __init_nat_entry(nm_i, new, ne, false, false); else f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || nat_get_blkaddr(e) != @@ -459,11 +473,13 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); + bool init_dirty = false; f2fs_down_write(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, ni->nid); + e = __lookup_nat_cache(nm_i, ni->nid, true); if (!e) { - e = __init_nat_entry(nm_i, new, NULL, true); + init_dirty = true; + e = __init_nat_entry(nm_i, new, NULL, true, true); copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -499,11 +515,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, nat_set_blkaddr(e, new_blkaddr); if (!__is_valid_data_blkaddr(new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); - __set_nat_cache_dirty(nm_i, e); + __set_nat_cache_dirty(nm_i, e, init_dirty); /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) - e = __lookup_nat_cache(nm_i, ni->ino); + e = __lookup_nat_cache(nm_i, ni->ino, false); if (e) { if (fsync_done && ni->nid == ni->ino) set_nat_flag(e, HAS_FSYNCED_INODE, true); @@ -551,23 +567,28 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_journal *journal = curseg->journal; nid_t start_nid = START_NID(nid); struct f2fs_nat_block *nat_blk; - struct page *page = NULL; + struct folio *folio = NULL; struct f2fs_nat_entry ne; struct nat_entry *e; pgoff_t index; - block_t blkaddr; int i; + bool need_cache = true; + ni->flag = 0; ni->nid = nid; retry: /* Check nat cache */ f2fs_down_read(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); + e = __lookup_nat_cache(nm_i, nid, false); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); f2fs_up_read(&nm_i->nat_tree_lock); + if (IS_ENABLED(CONFIG_F2FS_CHECK_FS)) { + need_cache = false; + goto sanity_check; + } return 0; } @@ -593,38 +614,47 @@ retry: up_read(&curseg->journal_rwsem); if (i >= 0) { f2fs_up_read(&nm_i->nat_tree_lock); - goto cache; + goto sanity_check; } /* Fill node_info from nat page */ index = current_nat_addr(sbi, nid); f2fs_up_read(&nm_i->nat_tree_lock); - page = f2fs_get_meta_page(sbi, index); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_meta_folio(sbi, index); + if (IS_ERR(folio)) + return PTR_ERR(folio); - nat_blk = (struct f2fs_nat_block *)page_address(page); + nat_blk = folio_address(folio); ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); - f2fs_put_page(page, 1); -cache: - blkaddr = le32_to_cpu(ne.block_addr); - if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - return -EFAULT; + f2fs_folio_put(folio, true); +sanity_check: + if (__is_valid_data_blkaddr(ni->blk_addr) && + !f2fs_is_valid_blkaddr(sbi, ni->blk_addr, + DATA_GENERIC_ENHANCE)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err_ratelimited(sbi, + "f2fs_get_node_info of %pS: inconsistent nat entry, " + "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u", + __builtin_return_address(0), + ni->ino, ni->nid, ni->blk_addr, ni->version, ni->flag); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); + return -EFSCORRUPTED; + } /* cache nat entry */ - cache_nat_entry(sbi, nid, &ne); + if (need_cache) + cache_nat_entry(sbi, nid, &ne); return 0; } /* * readahead MAX_RA_NODE number of node pages. */ -static void f2fs_ra_node_pages(struct page *parent, int start, int n) +static void f2fs_ra_node_pages(struct folio *parent, int start, int n) { - struct f2fs_sb_info *sbi = F2FS_P_SB(parent); + struct f2fs_sb_info *sbi = F2FS_F_SB(parent); struct blk_plug plug; int i, end; nid_t nid; @@ -633,7 +663,7 @@ static void f2fs_ra_node_pages(struct page *parent, int start, int n) /* Then, try readahead for siblings of the desired node */ end = start + n; - end = min(end, NIDS_PER_BLOCK); + end = min(end, (int)NIDS_PER_BLOCK); for (i = start; i < end; i++) { nid = get_nid(parent, i, false); f2fs_ra_node_page(sbi, nid); @@ -753,6 +783,8 @@ got: return level; } +static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start); + /* * Caller should call f2fs_put_dnode(dn). * Also, it should grab and release a rwsem by calling f2fs_lock_op() and @@ -761,8 +793,8 @@ got: int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - struct page *npage[4]; - struct page *parent = NULL; + struct folio *nfolio[4]; + struct folio *parent = NULL; int offset[4]; unsigned int noffset[4]; nid_t nids[4]; @@ -774,31 +806,42 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) return level; nids[0] = dn->inode->i_ino; - npage[0] = dn->inode_page; - if (!npage[0]) { - npage[0] = f2fs_get_node_page(sbi, nids[0]); - if (IS_ERR(npage[0])) - return PTR_ERR(npage[0]); + if (!dn->inode_folio) { + nfolio[0] = f2fs_get_inode_folio(sbi, nids[0]); + if (IS_ERR(nfolio[0])) + return PTR_ERR(nfolio[0]); + } else { + nfolio[0] = dn->inode_folio; } /* if inline_data is set, should not report any block indices */ if (f2fs_has_inline_data(dn->inode) && index) { err = -ENOENT; - f2fs_put_page(npage[0], 1); + f2fs_folio_put(nfolio[0], true); goto release_out; } - parent = npage[0]; + parent = nfolio[0]; if (level != 0) nids[1] = get_nid(parent, offset[0], true); - dn->inode_page = npage[0]; - dn->inode_page_locked = true; + dn->inode_folio = nfolio[0]; + dn->inode_folio_locked = true; /* get indirect or direct nodes */ for (i = 1; i <= level; i++) { bool done = false; + if (nids[i] && nids[i] == dn->inode->i_ino) { + err = -EFSCORRUPTED; + f2fs_err_ratelimited(sbi, + "inode mapping table is corrupted, run fsck to fix it, " + "ino:%lu, nid:%u, level:%d, offset:%d", + dn->inode->i_ino, nids[i], level, offset[level]); + set_sbi_flag(sbi, SBI_NEED_FSCK); + goto release_pages; + } + if (!nids[i] && mode == ALLOC_NODE) { /* alloc new node */ if (!f2fs_alloc_nid(sbi, &(nids[i]))) { @@ -807,10 +850,10 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } dn->nid = nids[i]; - npage[i] = f2fs_new_node_page(dn, noffset[i]); - if (IS_ERR(npage[i])) { + nfolio[i] = f2fs_new_node_folio(dn, noffset[i]); + if (IS_ERR(nfolio[i])) { f2fs_alloc_nid_failed(sbi, nids[i]); - err = PTR_ERR(npage[i]); + err = PTR_ERR(nfolio[i]); goto release_pages; } @@ -818,66 +861,75 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) f2fs_alloc_nid_done(sbi, nids[i]); done = true; } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { - npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]); - if (IS_ERR(npage[i])) { - err = PTR_ERR(npage[i]); + nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]); + if (IS_ERR(nfolio[i])) { + err = PTR_ERR(nfolio[i]); goto release_pages; } done = true; } if (i == 1) { - dn->inode_page_locked = false; - unlock_page(parent); + dn->inode_folio_locked = false; + folio_unlock(parent); } else { - f2fs_put_page(parent, 1); + f2fs_folio_put(parent, true); } if (!done) { - npage[i] = f2fs_get_node_page(sbi, nids[i]); - if (IS_ERR(npage[i])) { - err = PTR_ERR(npage[i]); - f2fs_put_page(npage[0], 0); + nfolio[i] = f2fs_get_node_folio(sbi, nids[i], + NODE_TYPE_NON_INODE); + if (IS_ERR(nfolio[i])) { + err = PTR_ERR(nfolio[i]); + f2fs_folio_put(nfolio[0], false); goto release_out; } } if (i < level) { - parent = npage[i]; + parent = nfolio[i]; nids[i + 1] = get_nid(parent, offset[i], false); } } dn->nid = nids[level]; dn->ofs_in_node = offset[level]; - dn->node_page = npage[level]; + dn->node_folio = nfolio[level]; dn->data_blkaddr = f2fs_data_blkaddr(dn); if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && f2fs_sb_has_readonly(sbi)) { - unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn); + unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + unsigned int ofs_in_node = dn->ofs_in_node; + pgoff_t fofs = index; + unsigned int c_len; block_t blkaddr; + /* should align fofs and ofs_in_node to cluster_size */ + if (fofs % cluster_size) { + fofs = round_down(fofs, cluster_size); + ofs_in_node = round_down(ofs_in_node, cluster_size); + } + + c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node); if (!c_len) goto out; - blkaddr = f2fs_data_blkaddr(dn); + blkaddr = data_blkaddr(dn->inode, dn->node_folio, ofs_in_node); if (blkaddr == COMPRESS_ADDR) - blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + 1); + blkaddr = data_blkaddr(dn->inode, dn->node_folio, + ofs_in_node + 1); f2fs_update_read_extent_tree_range_compressed(dn->inode, - index, blkaddr, - F2FS_I(dn->inode)->i_cluster_size, - c_len); + fofs, blkaddr, cluster_size, c_len); } out: return 0; release_pages: - f2fs_put_page(parent, 1); + f2fs_folio_put(parent, true); if (i > 1) - f2fs_put_page(npage[0], 0); + f2fs_folio_put(nfolio[0], false); release_out: - dn->inode_page = NULL; - dn->node_page = NULL; + dn->inode_folio = NULL; + dn->node_folio = NULL; if (err == -ENOENT) { dn->cur_level = i; dn->max_level = level; @@ -897,8 +949,18 @@ static int truncate_node(struct dnode_of_data *dn) if (err) return err; + if (ni.blk_addr != NEW_ADDR && + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { + f2fs_err_ratelimited(sbi, + "nat entry is corrupted, run fsck to fix it, ino:%u, " + "nid:%u, blkaddr:%u", ni.ino, ni.nid, ni.blk_addr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); + return -EFSCORRUPTED; + } + /* Deallocate node address */ - f2fs_invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); set_node_addr(sbi, &ni, NULL_ADDR, false); @@ -908,16 +970,16 @@ static int truncate_node(struct dnode_of_data *dn) f2fs_inode_synced(dn->inode); } - clear_node_page_dirty(dn->node_page); + clear_node_folio_dirty(dn->node_folio); set_sbi_flag(sbi, SBI_IS_DIRTY); - index = dn->node_page->index; - f2fs_put_page(dn->node_page, 1); + index = dn->node_folio->index; + f2fs_folio_put(dn->node_folio, true); invalidate_mapping_pages(NODE_MAPPING(sbi), index, index); - dn->node_page = NULL; + dn->node_folio = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); return 0; @@ -926,35 +988,35 @@ static int truncate_node(struct dnode_of_data *dn) static int truncate_dnode(struct dnode_of_data *dn) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - struct page *page; + struct folio *folio; int err; if (dn->nid == 0) return 1; /* get direct node */ - page = f2fs_get_node_page(sbi, dn->nid); - if (PTR_ERR(page) == -ENOENT) + folio = f2fs_get_node_folio(sbi, dn->nid, NODE_TYPE_NON_INODE); + if (PTR_ERR(folio) == -ENOENT) return 1; - else if (IS_ERR(page)) - return PTR_ERR(page); + else if (IS_ERR(folio)) + return PTR_ERR(folio); - if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) { + if (IS_INODE(folio) || ino_of_node(folio) != dn->inode->i_ino) { f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", - dn->inode->i_ino, dn->nid, ino_of_node(page)); + dn->inode->i_ino, dn->nid, ino_of_node(folio)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return -EFSCORRUPTED; } /* Make dnode_of_data for parameter */ - dn->node_page = page; + dn->node_folio = folio; dn->ofs_in_node = 0; f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); err = truncate_node(dn); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } @@ -965,7 +1027,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, int ofs, int depth) { struct dnode_of_data rdn = *dn; - struct page *page; + struct folio *folio; struct f2fs_node *rn; nid_t child_nid; unsigned int child_nofs; @@ -977,15 +1039,16 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); - page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid); - if (IS_ERR(page)) { - trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); - return PTR_ERR(page); + folio = f2fs_get_node_folio(F2FS_I_SB(dn->inode), dn->nid, + NODE_TYPE_NON_INODE); + if (IS_ERR(folio)) { + trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(folio)); + return PTR_ERR(folio); } - f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK); + f2fs_ra_node_pages(folio, ofs, NIDS_PER_BLOCK); - rn = F2FS_NODE(page); + rn = F2FS_NODE(folio); if (depth < 3) { for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { child_nid = le32_to_cpu(rn->in.nid[i]); @@ -995,7 +1058,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, ret = truncate_dnode(&rdn); if (ret < 0) goto out_err; - if (set_nid(page, i, 0, false)) + if (set_nid(folio, i, 0, false)) dn->node_changed = true; } } else { @@ -1009,7 +1072,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, rdn.nid = child_nid; ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); if (ret == (NIDS_PER_BLOCK + 1)) { - if (set_nid(page, i, 0, false)) + if (set_nid(folio, i, 0, false)) dn->node_changed = true; child_nofs += ret; } else if (ret < 0 && ret != -ENOENT) { @@ -1021,19 +1084,19 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, if (!ofs) { /* remove current indirect node */ - dn->node_page = page; + dn->node_folio = folio; ret = truncate_node(dn); if (ret) goto out_err; freed++; } else { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } trace_f2fs_truncate_nodes_exit(dn->inode, freed); return freed; out_err: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); trace_f2fs_truncate_nodes_exit(dn->inode, ret); return ret; } @@ -1041,59 +1104,60 @@ out_err: static int truncate_partial_nodes(struct dnode_of_data *dn, struct f2fs_inode *ri, int *offset, int depth) { - struct page *pages[2]; + struct folio *folios[2]; nid_t nid[3]; nid_t child_nid; int err = 0; int i; int idx = depth - 2; - nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + nid[0] = get_nid(dn->inode_folio, offset[0], true); if (!nid[0]) return 0; /* get indirect nodes in the path */ for (i = 0; i < idx + 1; i++) { /* reference count'll be increased */ - pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]); - if (IS_ERR(pages[i])) { - err = PTR_ERR(pages[i]); + folios[i] = f2fs_get_node_folio(F2FS_I_SB(dn->inode), nid[i], + NODE_TYPE_NON_INODE); + if (IS_ERR(folios[i])) { + err = PTR_ERR(folios[i]); idx = i - 1; goto fail; } - nid[i + 1] = get_nid(pages[i], offset[i + 1], false); + nid[i + 1] = get_nid(folios[i], offset[i + 1], false); } - f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); + f2fs_ra_node_pages(folios[idx], offset[idx + 1], NIDS_PER_BLOCK); /* free direct nodes linked to a partial indirect node */ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { - child_nid = get_nid(pages[idx], i, false); + child_nid = get_nid(folios[idx], i, false); if (!child_nid) continue; dn->nid = child_nid; err = truncate_dnode(dn); if (err < 0) goto fail; - if (set_nid(pages[idx], i, 0, false)) + if (set_nid(folios[idx], i, 0, false)) dn->node_changed = true; } if (offset[idx + 1] == 0) { - dn->node_page = pages[idx]; + dn->node_folio = folios[idx]; dn->nid = nid[idx]; err = truncate_node(dn); if (err) goto fail; } else { - f2fs_put_page(pages[idx], 1); + f2fs_folio_put(folios[idx], true); } offset[idx]++; offset[idx + 1] = 0; idx--; fail: for (i = idx; i >= 0; i--) - f2fs_put_page(pages[i], 1); + f2fs_folio_put(folios[i], true); trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); @@ -1111,26 +1175,33 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) unsigned int nofs = 0; struct f2fs_inode *ri; struct dnode_of_data dn; - struct page *page; + struct folio *folio; trace_f2fs_truncate_inode_blocks_enter(inode, from); level = get_node_path(inode, from, offset, noffset); - if (level < 0) { + if (level <= 0) { + if (!level) { + level = -EFSCORRUPTED; + f2fs_err(sbi, "%s: inode ino=%lx has corrupted node block, from:%lu addrs:%u", + __func__, inode->i_ino, + from, ADDRS_PER_INODE(inode)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } trace_f2fs_truncate_inode_blocks_exit(inode, level); return level; } - page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); - return PTR_ERR(page); + folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(folio)) { + trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(folio)); + return PTR_ERR(folio); } - set_new_dnode(&dn, inode, page, NULL, 0); - unlock_page(page); + set_new_dnode(&dn, inode, folio, NULL, 0); + folio_unlock(folio); - ri = F2FS_INODE(page); + ri = F2FS_INODE(folio); switch (level) { case 0: case 1: @@ -1159,7 +1230,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) skip_partial: while (cont) { - dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + dn.nid = get_nid(folio, offset[0], true); switch (offset[0]) { case NODE_DIR1_BLOCK: case NODE_DIR2_BLOCK: @@ -1179,23 +1250,30 @@ skip_partial: default: BUG(); } - if (err < 0 && err != -ENOENT) + if (err == -ENOENT) { + set_sbi_flag(F2FS_F_SB(folio), SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + f2fs_err_ratelimited(sbi, + "truncate node fail, ino:%lu, nid:%u, " + "offset[0]:%d, offset[1]:%d, nofs:%d", + inode->i_ino, dn.nid, offset[0], + offset[1], nofs); + err = 0; + } + if (err < 0) goto fail; - if (offset[1] == 0 && - ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { - lock_page(page); - BUG_ON(page->mapping != NODE_MAPPING(sbi)); - f2fs_wait_on_page_writeback(page, NODE, true, true); - ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; - set_page_dirty(page); - unlock_page(page); + if (offset[1] == 0 && get_nid(folio, offset[0], true)) { + folio_lock(folio); + BUG_ON(!is_node_folio(folio)); + set_nid(folio, offset[0], 0, true); + folio_unlock(folio); } offset[1] = 0; offset[0]++; nofs += err; } fail: - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); trace_f2fs_truncate_inode_blocks_exit(inode, err); return err > 0 ? 0 : err; } @@ -1206,20 +1284,20 @@ int f2fs_truncate_xattr_node(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t nid = F2FS_I(inode)->i_xattr_nid; struct dnode_of_data dn; - struct page *npage; + struct folio *nfolio; int err; if (!nid) return 0; - npage = f2fs_get_node_page(sbi, nid); - if (IS_ERR(npage)) - return PTR_ERR(npage); + nfolio = f2fs_get_xnode_folio(sbi, nid); + if (IS_ERR(nfolio)) + return PTR_ERR(nfolio); - set_new_dnode(&dn, inode, NULL, npage, nid); + set_new_dnode(&dn, inode, NULL, nfolio, nid); err = truncate_node(&dn); if (err) { - f2fs_put_page(npage, 1); + f2fs_folio_put(nfolio, true); return err; } @@ -1249,8 +1327,9 @@ int f2fs_remove_inode_page(struct inode *inode) } /* remove potential inline_data blocks */ - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) + if (!IS_DEVICE_ALIASING(inode) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) f2fs_truncate_data_blocks_range(&dn, 1); /* 0 is possible, after f2fs_new_inode() has failed */ @@ -1275,30 +1354,30 @@ int f2fs_remove_inode_page(struct inode *inode) return 0; } -struct page *f2fs_new_inode_page(struct inode *inode) +struct folio *f2fs_new_inode_folio(struct inode *inode) { struct dnode_of_data dn; /* allocate inode page for new inode */ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - /* caller should f2fs_put_page(page, 1); */ - return f2fs_new_node_page(&dn, 0); + /* caller should f2fs_folio_put(folio, true); */ + return f2fs_new_node_folio(&dn, 0); } -struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) +struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info new_ni; - struct page *page; + struct folio *folio; int err; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return ERR_PTR(-EPERM); - page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); - if (!page) - return ERR_PTR(-ENOMEM); + folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), dn->nid, false); + if (IS_ERR(folio)) + return folio; if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs)))) goto fail; @@ -1311,8 +1390,14 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) } if (unlikely(new_ni.blk_addr != NULL_ADDR)) { err = -EFSCORRUPTED; + dec_valid_node_count(sbi, dn->inode, !ofs); set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + f2fs_warn_ratelimited(sbi, + "f2fs_new_node_folio: inconsistent nat entry, " + "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u", + new_ni.ino, new_ni.nid, new_ni.blk_addr, + new_ni.version, new_ni.flag); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); goto fail; } #endif @@ -1323,12 +1408,12 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) new_ni.version = 0; set_node_addr(sbi, &new_ni, NEW_ADDR, false); - f2fs_wait_on_page_writeback(page, NODE, true, true); - fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(page, S_ISDIR(dn->inode->i_mode)); - if (!PageUptodate(page)) - SetPageUptodate(page); - if (set_page_dirty(page)) + f2fs_folio_wait_writeback(folio, NODE, true, true); + fill_node_footer(folio, dn->nid, dn->inode->i_ino, ofs, true); + set_cold_node(folio, S_ISDIR(dn->inode->i_mode)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + if (folio_mark_dirty(folio)) dn->node_changed = true; if (f2fs_has_xattr_block(ofs)) @@ -1336,48 +1421,47 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) if (ofs == 0) inc_valid_inode_count(sbi); - return page; - + return folio; fail: - clear_node_page_dirty(page); - f2fs_put_page(page, 1); + clear_node_folio_dirty(folio); + f2fs_folio_put(folio, true); return ERR_PTR(err); } /* * Caller should do after getting the following values. - * 0: f2fs_put_page(page, 0) - * LOCKED_PAGE or error: f2fs_put_page(page, 1) + * 0: f2fs_folio_put(folio, false) + * LOCKED_PAGE or error: f2fs_folio_put(folio, true) */ -static int read_node_page(struct page *page, blk_opf_t op_flags) +static int read_node_folio(struct folio *folio, blk_opf_t op_flags) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, .type = NODE, .op = REQ_OP_READ, .op_flags = op_flags, - .page = page, + .folio = folio, .encrypted_page = NULL, }; int err; - if (PageUptodate(page)) { - if (!f2fs_inode_chksum_verify(sbi, page)) { - ClearPageUptodate(page); + if (folio_test_uptodate(folio)) { + if (!f2fs_inode_chksum_verify(sbi, folio)) { + folio_clear_uptodate(folio); return -EFSBADCRC; } return LOCKED_PAGE; } - err = f2fs_get_node_info(sbi, page->index, &ni, false); + err = f2fs_get_node_info(sbi, folio->index, &ni, false); if (err) return err; /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); return -ENOENT; } @@ -1396,7 +1480,7 @@ static int read_node_page(struct page *page, blk_opf_t op_flags) */ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) { - struct page *apage; + struct folio *afolio; int err; if (!nid) @@ -1404,22 +1488,59 @@ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) if (f2fs_check_nid_range(sbi, nid)) return; - apage = xa_load(&NODE_MAPPING(sbi)->i_pages, nid); - if (apage) + afolio = xa_load(&NODE_MAPPING(sbi)->i_pages, nid); + if (afolio) return; - apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); - if (!apage) + afolio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), nid, false); + if (IS_ERR(afolio)) return; - err = read_node_page(apage, REQ_RAHEAD); - f2fs_put_page(apage, err ? 1 : 0); + err = read_node_folio(afolio, REQ_RAHEAD); + f2fs_folio_put(afolio, err ? true : false); +} + +static int sanity_check_node_footer(struct f2fs_sb_info *sbi, + struct folio *folio, pgoff_t nid, + enum node_type ntype) +{ + if (unlikely(nid != nid_of_node(folio))) + goto out_err; + + switch (ntype) { + case NODE_TYPE_INODE: + if (!IS_INODE(folio)) + goto out_err; + break; + case NODE_TYPE_XATTR: + if (!f2fs_has_xattr_block(ofs_of_node(folio))) + goto out_err; + break; + case NODE_TYPE_NON_INODE: + if (IS_INODE(folio)) + goto out_err; + break; + default: + break; + } + if (time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER)) + goto out_err; + return 0; +out_err: + f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " + "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + ntype, nid, nid_of_node(folio), ino_of_node(folio), + ofs_of_node(folio), cpver_of_node(folio), + next_blkaddr_of_node(folio)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); + return -EFSCORRUPTED; } -static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, - struct page *parent, int start) +static struct folio *__get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid, + struct folio *parent, int start, enum node_type ntype) { - struct page *page; + struct folio *folio; int err; if (!nid) @@ -1427,74 +1548,77 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, if (f2fs_check_nid_range(sbi, nid)) return ERR_PTR(-EINVAL); repeat: - page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); - if (!page) - return ERR_PTR(-ENOMEM); + folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), nid, false); + if (IS_ERR(folio)) + return folio; - err = read_node_page(page, 0); - if (err < 0) { + err = read_node_folio(folio, 0); + if (err < 0) goto out_put_err; - } else if (err == LOCKED_PAGE) { - err = 0; + if (err == LOCKED_PAGE) goto page_hit; - } if (parent) f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE); - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { - f2fs_put_page(page, 1); + if (unlikely(!is_node_folio(folio))) { + f2fs_folio_put(folio, true); goto repeat; } - if (unlikely(!PageUptodate(page))) { + if (unlikely(!folio_test_uptodate(folio))) { err = -EIO; - goto out_err; + goto out_put_err; } - if (!f2fs_inode_chksum_verify(sbi, page)) { + if (!f2fs_inode_chksum_verify(sbi, folio)) { err = -EFSBADCRC; goto out_err; } page_hit: - if (likely(nid == nid_of_node(page))) - return page; - - f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - nid, nid_of_node(page), ino_of_node(page), - ofs_of_node(page), cpver_of_node(page), - next_blkaddr_of_node(page)); - set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = sanity_check_node_footer(sbi, folio, nid, ntype); + if (!err) + return folio; out_err: - ClearPageUptodate(page); + folio_clear_uptodate(folio); out_put_err: - /* ENOENT comes from read_node_page which is not an error. */ + /* ENOENT comes from read_node_folio which is not an error. */ if (err != -ENOENT) - f2fs_handle_page_eio(sbi, page->index, NODE); - f2fs_put_page(page, 1); + f2fs_handle_page_eio(sbi, folio, NODE); + f2fs_folio_put(folio, true); return ERR_PTR(err); } -struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid, + enum node_type node_type) +{ + return __get_node_folio(sbi, nid, NULL, 0, node_type); +} + +struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino) { - return __get_node_page(sbi, nid, NULL, 0); + return __get_node_folio(sbi, ino, NULL, 0, NODE_TYPE_INODE); } -struct page *f2fs_get_node_page_ra(struct page *parent, int start) +struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid) { - struct f2fs_sb_info *sbi = F2FS_P_SB(parent); + return __get_node_folio(sbi, xnid, NULL, 0, NODE_TYPE_XATTR); +} + +static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start) +{ + struct f2fs_sb_info *sbi = F2FS_F_SB(parent); nid_t nid = get_nid(parent, start, false); - return __get_node_page(sbi, nid, parent, start); + return __get_node_folio(sbi, nid, parent, start, NODE_TYPE_REGULAR); } static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; - struct page *page; + struct folio *folio; int ret; /* should flush inline_data before evict_inode */ @@ -1502,36 +1626,36 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!inode) return; - page = f2fs_pagecache_get_page(inode->i_mapping, 0, + folio = f2fs_filemap_get_folio(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); - if (!page) + if (IS_ERR(folio)) goto iput_out; - if (!PageUptodate(page)) - goto page_out; + if (!folio_test_uptodate(folio)) + goto folio_out; - if (!PageDirty(page)) - goto page_out; + if (!folio_test_dirty(folio)) + goto folio_out; - if (!clear_page_dirty_for_io(page)) - goto page_out; + if (!folio_clear_dirty_for_io(folio)) + goto folio_out; - ret = f2fs_write_inline_data(inode, page); + ret = f2fs_write_inline_data(inode, folio); inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); if (ret) - set_page_dirty(page); -page_out: - f2fs_put_page(page, 1); + folio_mark_dirty(folio); +folio_out: + f2fs_folio_put(folio, true); iput_out: iput(inode); } -static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) +static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) { pgoff_t index; struct folio_batch fbatch; - struct page *last_page = NULL; + struct folio *last_folio = NULL; int nr_folios; folio_batch_init(&fbatch); @@ -1543,61 +1667,61 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) int i; for (i = 0; i < nr_folios; i++) { - struct page *page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; if (unlikely(f2fs_cp_error(sbi))) { - f2fs_put_page(last_page, 0); + f2fs_folio_put(last_folio, false); folio_batch_release(&fbatch); return ERR_PTR(-EIO); } - if (!IS_DNODE(page) || !is_cold_node(page)) + if (!IS_DNODE(folio) || !is_cold_node(folio)) continue; - if (ino_of_node(page) != ino) + if (ino_of_node(folio) != ino) continue; - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { + if (unlikely(!is_node_folio(folio))) { continue_unlock: - unlock_page(page); + folio_unlock(folio); continue; } - if (ino_of_node(page) != ino) + if (ino_of_node(folio) != ino) goto continue_unlock; - if (!PageDirty(page)) { + if (!folio_test_dirty(folio)) { /* someone wrote it for us */ goto continue_unlock; } - if (last_page) - f2fs_put_page(last_page, 0); + if (last_folio) + f2fs_folio_put(last_folio, false); - get_page(page); - last_page = page; - unlock_page(page); + folio_get(folio); + last_folio = folio; + folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); } - return last_page; + return last_folio; } -static int __write_node_page(struct page *page, bool atomic, bool *submitted, +static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted, struct writeback_control *wbc, bool do_balance, enum iostat_type io_type, unsigned int *seq_id) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); nid_t nid; struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, - .ino = ino_of_node(page), + .ino = ino_of_node(folio), .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), - .page = page, + .folio = folio, .encrypted_page = NULL, .submitted = 0, .io_type = io_type, @@ -1605,16 +1729,16 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, }; unsigned int seq; - trace_f2fs_writepage(page, NODE); + trace_f2fs_writepage(folio, NODE); if (unlikely(f2fs_cp_error(sbi))) { /* keep node pages in remount-ro mode */ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) goto redirty_out; - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); - unlock_page(page); - return 0; + folio_unlock(folio); + return true; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) @@ -1622,30 +1746,25 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && wbc->sync_mode == WB_SYNC_NONE && - IS_DNODE(page) && is_cold_node(page)) + IS_DNODE(folio) && is_cold_node(folio)) goto redirty_out; /* get old block addr of this node page */ - nid = nid_of_node(page); - f2fs_bug_on(sbi, page->index != nid); + nid = nid_of_node(folio); + f2fs_bug_on(sbi, folio->index != nid); if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) goto redirty_out; - if (wbc->for_reclaim) { - if (!f2fs_down_read_trylock(&sbi->node_write)) - goto redirty_out; - } else { - f2fs_down_read(&sbi->node_write); - } + f2fs_down_read(&sbi->node_write); /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); - unlock_page(page); - return 0; + folio_unlock(folio); + return true; } if (__is_valid_data_blkaddr(ni.blk_addr) && @@ -1655,30 +1774,25 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, goto redirty_out; } - if (atomic && !test_opt(sbi, NOBARRIER) && !f2fs_sb_has_blkzoned(sbi)) + if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; /* should add to global list before clearing PAGECACHE status */ - if (f2fs_in_warm_node_list(sbi, page)) { - seq = f2fs_add_fsync_node_entry(sbi, page); + if (f2fs_in_warm_node_list(sbi, folio)) { + seq = f2fs_add_fsync_node_entry(sbi, folio); if (seq_id) *seq_id = seq; } - set_page_writeback(page); + folio_start_writeback(folio); fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); - set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); + set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(folio)); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); - if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); - submitted = NULL; - } - - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_write(sbi, NODE); @@ -1689,14 +1803,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (do_balance) f2fs_balance_fs(sbi, false); - return 0; + return true; redirty_out: - redirty_page_for_writepage(wbc, page); - return AOP_WRITEPAGE_ACTIVATE; + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); + return false; } -int f2fs_move_node_page(struct page *node_page, int gc_type) +int f2fs_move_node_folio(struct folio *node_folio, int gc_type) { int err = 0; @@ -1704,43 +1819,33 @@ int f2fs_move_node_page(struct page *node_page, int gc_type) struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 1, - .for_reclaim = 0, }; - f2fs_wait_on_page_writeback(node_page, NODE, true, true); + f2fs_folio_wait_writeback(node_folio, NODE, true, true); - set_page_dirty(node_page); + folio_mark_dirty(node_folio); - if (!clear_page_dirty_for_io(node_page)) { + if (!folio_clear_dirty_for_io(node_folio)) { err = -EAGAIN; goto out_page; } - if (__write_node_page(node_page, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) { + if (!__write_node_folio(node_folio, false, NULL, + &wbc, false, FS_GC_NODE_IO, NULL)) err = -EAGAIN; - unlock_page(node_page); - } goto release_page; } else { /* set page dirty and write it */ - if (!PageWriteback(node_page)) - set_page_dirty(node_page); + if (!folio_test_writeback(node_folio)) + folio_mark_dirty(node_folio); } out_page: - unlock_page(node_page); + folio_unlock(node_folio); release_page: - f2fs_put_page(node_page, 0); + f2fs_folio_put(node_folio, false); return err; } -static int f2fs_write_node_page(struct page *page, - struct writeback_control *wbc) -{ - return __write_node_page(page, false, NULL, wbc, false, - FS_NODE_IO, NULL); -} - int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id) @@ -1748,16 +1853,16 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, pgoff_t index; struct folio_batch fbatch; int ret = 0; - struct page *last_page = NULL; + struct folio *last_folio = NULL; bool marked = false; nid_t ino = inode->i_ino; int nr_folios; int nwritten = 0; if (atomic) { - last_page = last_fsync_dnode(sbi, ino); - if (IS_ERR_OR_NULL(last_page)) - return PTR_ERR_OR_ZERO(last_page); + last_folio = last_fsync_dnode(sbi, ino); + if (IS_ERR_OR_NULL(last_folio)) + return PTR_ERR_OR_ZERO(last_folio); } retry: folio_batch_init(&fbatch); @@ -1769,96 +1874,94 @@ retry: int i; for (i = 0; i < nr_folios; i++) { - struct page *page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; bool submitted = false; if (unlikely(f2fs_cp_error(sbi))) { - f2fs_put_page(last_page, 0); + f2fs_folio_put(last_folio, false); folio_batch_release(&fbatch); ret = -EIO; goto out; } - if (!IS_DNODE(page) || !is_cold_node(page)) + if (!IS_DNODE(folio) || !is_cold_node(folio)) continue; - if (ino_of_node(page) != ino) + if (ino_of_node(folio) != ino) continue; - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { + if (unlikely(!is_node_folio(folio))) { continue_unlock: - unlock_page(page); + folio_unlock(folio); continue; } - if (ino_of_node(page) != ino) + if (ino_of_node(folio) != ino) goto continue_unlock; - if (!PageDirty(page) && page != last_page) { + if (!folio_test_dirty(folio) && folio != last_folio) { /* someone wrote it for us */ goto continue_unlock; } - f2fs_wait_on_page_writeback(page, NODE, true, true); + f2fs_folio_wait_writeback(folio, NODE, true, true); - set_fsync_mark(page, 0); - set_dentry_mark(page, 0); + set_fsync_mark(folio, 0); + set_dentry_mark(folio, 0); - if (!atomic || page == last_page) { - set_fsync_mark(page, 1); + if (!atomic || folio == last_folio) { + set_fsync_mark(folio, 1); percpu_counter_inc(&sbi->rf_node_block_count); - if (IS_INODE(page)) { + if (IS_INODE(folio)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) - f2fs_update_inode(inode, page); - set_dentry_mark(page, + f2fs_update_inode(inode, folio); + set_dentry_mark(folio, f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ - if (!PageDirty(page)) - set_page_dirty(page); + if (!folio_test_dirty(folio)) + folio_mark_dirty(folio); } - if (!clear_page_dirty_for_io(page)) + if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - ret = __write_node_page(page, atomic && - page == last_page, + if (!__write_node_folio(folio, atomic && + folio == last_folio, &submitted, wbc, true, - FS_NODE_IO, seq_id); - if (ret) { - unlock_page(page); - f2fs_put_page(last_page, 0); - break; - } else if (submitted) { - nwritten++; + FS_NODE_IO, seq_id)) { + f2fs_folio_put(last_folio, false); + folio_batch_release(&fbatch); + ret = -EIO; + goto out; } + if (submitted) + nwritten++; - if (page == last_page) { - f2fs_put_page(page, 0); + if (folio == last_folio) { + f2fs_folio_put(folio, false); + folio_batch_release(&fbatch); marked = true; - break; + goto out; } } folio_batch_release(&fbatch); cond_resched(); - - if (ret || marked) - break; } - if (!ret && atomic && !marked) { + if (atomic && !marked) { f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", - ino, last_page->index); - lock_page(last_page); - f2fs_wait_on_page_writeback(last_page, NODE, true, true); - set_page_dirty(last_page); - unlock_page(last_page); + ino, last_folio->index); + folio_lock(last_folio); + f2fs_folio_wait_writeback(last_folio, NODE, true, true); + folio_mark_dirty(last_folio); + folio_unlock(last_folio); goto retry; } out: if (nwritten) f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); - return ret ? -EIO : 0; + return ret; } static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data) @@ -1885,18 +1988,18 @@ static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data) return 1; } -static bool flush_dirty_inode(struct page *page) +static bool flush_dirty_inode(struct folio *folio) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); struct inode *inode; - nid_t ino = ino_of_node(page); + nid_t ino = ino_of_node(folio); inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL); if (!inode) return false; - f2fs_update_inode(inode, page); - unlock_page(page); + f2fs_update_inode(inode, folio); + folio_unlock(folio); iput(inode); return true; @@ -1916,32 +2019,27 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) int i; for (i = 0; i < nr_folios; i++) { - struct page *page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; - if (!IS_DNODE(page)) + if (!IS_INODE(folio)) continue; - lock_page(page); - - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { -continue_unlock: - unlock_page(page); - continue; - } + folio_lock(folio); - if (!PageDirty(page)) { - /* someone wrote it for us */ - goto continue_unlock; - } + if (unlikely(!is_node_folio(folio))) + goto unlock; + if (!folio_test_dirty(folio)) + goto unlock; /* flush inline_data, if it's async context. */ - if (page_private_inline(page)) { - clear_page_private_inline(page); - unlock_page(page); - flush_inline_data(sbi, ino_of_node(page)); + if (folio_test_f2fs_inline(folio)) { + folio_clear_f2fs_inline(folio); + folio_unlock(folio); + flush_inline_data(sbi, ino_of_node(folio)); continue; } - unlock_page(page); +unlock: + folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); @@ -1970,7 +2068,7 @@ next_step: int i; for (i = 0; i < nr_folios; i++) { - struct page *page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; bool submitted = false; /* give a priority to WB_SYNC threads */ @@ -1986,27 +2084,27 @@ next_step: * 1. dentry dnodes * 2. file dnodes */ - if (step == 0 && IS_DNODE(page)) + if (step == 0 && IS_DNODE(folio)) continue; - if (step == 1 && (!IS_DNODE(page) || - is_cold_node(page))) + if (step == 1 && (!IS_DNODE(folio) || + is_cold_node(folio))) continue; - if (step == 2 && (!IS_DNODE(page) || - !is_cold_node(page))) + if (step == 2 && (!IS_DNODE(folio) || + !is_cold_node(folio))) continue; lock_node: if (wbc->sync_mode == WB_SYNC_ALL) - lock_page(page); - else if (!trylock_page(page)) + folio_lock(folio); + else if (!folio_trylock(folio)) continue; - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { + if (unlikely(!is_node_folio(folio))) { continue_unlock: - unlock_page(page); + folio_unlock(folio); continue; } - if (!PageDirty(page)) { + if (!folio_test_dirty(folio)) { /* someone wrote it for us */ goto continue_unlock; } @@ -2016,30 +2114,32 @@ continue_unlock: goto write_node; /* flush inline_data */ - if (page_private_inline(page)) { - clear_page_private_inline(page); - unlock_page(page); - flush_inline_data(sbi, ino_of_node(page)); + if (folio_test_f2fs_inline(folio)) { + folio_clear_f2fs_inline(folio); + folio_unlock(folio); + flush_inline_data(sbi, ino_of_node(folio)); goto lock_node; } /* flush dirty inode */ - if (IS_INODE(page) && flush_dirty_inode(page)) + if (IS_INODE(folio) && flush_dirty_inode(folio)) goto lock_node; write_node: - f2fs_wait_on_page_writeback(page, NODE, true, true); + f2fs_folio_wait_writeback(folio, NODE, true, true); - if (!clear_page_dirty_for_io(page)) + if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - set_fsync_mark(page, 0); - set_dentry_mark(page, 0); + set_fsync_mark(folio, 0); + set_dentry_mark(folio, 0); - ret = __write_node_page(page, false, &submitted, - wbc, do_balance, io_type, NULL); - if (ret) - unlock_page(page); - else if (submitted) + if (!__write_node_folio(folio, false, &submitted, + wbc, do_balance, io_type, NULL)) { + folio_batch_release(&fbatch); + ret = -EIO; + goto out; + } + if (submitted) nwritten++; if (--wbc->nr_to_write == 0) @@ -2074,12 +2174,13 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, unsigned int seq_id) { struct fsync_node_entry *fn; - struct page *page; struct list_head *head = &sbi->fsync_node_list; unsigned long flags; unsigned int cur_seq_id = 0; while (seq_id && cur_seq_id < seq_id) { + struct folio *folio; + spin_lock_irqsave(&sbi->fsync_node_lock, flags); if (list_empty(head)) { spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); @@ -2091,13 +2192,13 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, break; } cur_seq_id = fn->seq_id; - page = fn->page; - get_page(page); + folio = fn->folio; + folio_get(folio); spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); - f2fs_wait_on_page_writeback(page, NODE, true, false); + f2fs_folio_wait_writeback(folio, NODE, true, false); - put_page(page); + folio_put(folio); } return filemap_check_errors(NODE_MAPPING(sbi)); @@ -2152,17 +2253,17 @@ skip_write: static bool f2fs_dirty_node_folio(struct address_space *mapping, struct folio *folio) { - trace_f2fs_set_page_dirty(&folio->page, NODE); + trace_f2fs_set_page_dirty(folio, NODE); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); #ifdef CONFIG_F2FS_CHECK_FS - if (IS_INODE(&folio->page)) - f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page); + if (IS_INODE(folio)) + f2fs_inode_chksum_set(F2FS_M_SB(mapping), folio); #endif if (filemap_dirty_folio(mapping, folio)) { inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); - set_page_private_reference(&folio->page); + folio_set_f2fs_reference(folio); return true; } return false; @@ -2172,7 +2273,6 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping, * Structure of the f2fs node operations */ const struct address_space_operations f2fs_node_aops = { - .writepage = f2fs_write_node_page, .writepages = f2fs_write_node_pages, .dirty_folio = f2fs_dirty_node_folio, .invalidate_folio = f2fs_invalidate_folio, @@ -2234,24 +2334,6 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, } } -bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned int i; - bool ret = true; - - f2fs_down_read(&nm_i->nat_tree_lock); - for (i = 0; i < nm_i->nat_blocks; i++) { - if (!test_bit_le(i, nm_i->nat_block_bitmap)) { - ret = false; - break; - } - } - f2fs_up_read(&nm_i->nat_tree_lock); - - return ret; -} - static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set, bool build) { @@ -2283,7 +2365,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *e; struct nat_entry *ne; - int err = -EINVAL; + int err; bool ret = false; /* 0 nid should not be used */ @@ -2297,7 +2379,10 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, i->nid = nid; i->state = FREE_NID; - radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); + err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); + f2fs_bug_on(sbi, err); + + err = -EINVAL; spin_lock(&nm_i->nid_list_lock); @@ -2316,14 +2401,14 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, * - __lookup_nat_cache * - f2fs_add_link * - f2fs_init_inode_metadata - * - f2fs_new_inode_page - * - f2fs_new_node_page + * - f2fs_new_inode_folio + * - f2fs_new_node_folio * - set_node_addr * - f2fs_alloc_nid_done * - __remove_nid_from_list(PREALLOC_NID) * - __insert_nid_to_list(FREE_NID) */ - ne = __lookup_nat_cache(nm_i, nid); + ne = __lookup_nat_cache(nm_i, nid, false); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) goto err_out; @@ -2370,10 +2455,9 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) } static int scan_nat_page(struct f2fs_sb_info *sbi, - struct page *nat_page, nid_t start_nid) + struct f2fs_nat_block *nat_blk, nid_t start_nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct f2fs_nat_block *nat_blk = page_address(nat_page); block_t blk_addr; unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); int i; @@ -2389,7 +2473,7 @@ static int scan_nat_page(struct f2fs_sb_info *sbi, blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); if (blk_addr == NEW_ADDR) - return -EINVAL; + return -EFSCORRUPTED; if (blk_addr == NULL_ADDR) { add_free_nid(sbi, start_nid, true, true); @@ -2493,18 +2577,26 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, while (1) { if (!test_bit_le(NAT_BLOCK_OFFSET(nid), nm_i->nat_block_bitmap)) { - struct page *page = get_current_nat_page(sbi, nid); + struct folio *folio = get_current_nat_folio(sbi, nid); - if (IS_ERR(page)) { - ret = PTR_ERR(page); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); } else { - ret = scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); + ret = scan_nat_page(sbi, folio_address(folio), + nid); + f2fs_folio_put(folio, true); } if (ret) { f2fs_up_read(&nm_i->nat_tree_lock); - f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); + + if (ret == -EFSCORRUPTED) { + f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, + ERROR_INCONSISTENT_NAT); + } + return ret; } } @@ -2567,6 +2659,16 @@ retry: f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); i = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); + + if (unlikely(is_invalid_nid(sbi, i->nid))) { + spin_unlock(&nm_i->nid_list_lock); + f2fs_err(sbi, "Corrupted nid %u in free_nid_list", + i->nid); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_CORRUPTED_NID); + return false; + } + *nid = i->nid; __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID); @@ -2668,18 +2770,18 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) return nr - nr_shrink; } -int f2fs_recover_inline_xattr(struct inode *inode, struct page *page) +int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio) { void *src_addr, *dst_addr; size_t inline_size; - struct page *ipage; + struct folio *ifolio; struct f2fs_inode *ri; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); - ri = F2FS_INODE(page); + ri = F2FS_INODE(folio); if (ri->i_inline & F2FS_INLINE_XATTR) { if (!f2fs_has_inline_xattr(inode)) { set_inode_flag(inode, FI_INLINE_XATTR); @@ -2693,26 +2795,26 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct page *page) goto update_inode; } - dst_addr = inline_xattr_addr(inode, ipage); - src_addr = inline_xattr_addr(inode, page); + dst_addr = inline_xattr_addr(inode, ifolio); + src_addr = inline_xattr_addr(inode, folio); inline_size = inline_xattr_size(inode); - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); memcpy(dst_addr, src_addr, inline_size); update_inode: - f2fs_update_inode(inode, ipage); - f2fs_put_page(ipage, 1); + f2fs_update_inode(inode, ifolio); + f2fs_folio_put(ifolio, true); return 0; } -int f2fs_recover_xattr_data(struct inode *inode, struct page *page) +int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; nid_t new_xnid; struct dnode_of_data dn; struct node_info ni; - struct page *xpage; + struct folio *xfolio; int err; if (!prev_xnid) @@ -2723,7 +2825,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) if (err) return err; - f2fs_invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); @@ -2733,30 +2835,32 @@ recover_xnid: return -ENOSPC; set_new_dnode(&dn, inode, NULL, NULL, new_xnid); - xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET); - if (IS_ERR(xpage)) { + xfolio = f2fs_new_node_folio(&dn, XATTR_NODE_OFFSET); + if (IS_ERR(xfolio)) { f2fs_alloc_nid_failed(sbi, new_xnid); - return PTR_ERR(xpage); + return PTR_ERR(xfolio); } f2fs_alloc_nid_done(sbi, new_xnid); f2fs_update_inode_page(inode); /* 3: update and set xattr node page dirty */ - memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE); - - set_page_dirty(xpage); - f2fs_put_page(xpage, 1); + if (folio) { + memcpy(F2FS_NODE(xfolio), F2FS_NODE(folio), + VALID_XATTR_BLOCK_SIZE); + folio_mark_dirty(xfolio); + } + f2fs_folio_put(xfolio, true); return 0; } -int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *src, *dst; - nid_t ino = ino_of_node(page); + nid_t ino = ino_of_node(folio); struct node_info old_ni, new_ni; - struct page *ipage; + struct folio *ifolio; int err; err = f2fs_get_node_info(sbi, ino, &old_ni, false); @@ -2766,8 +2870,8 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) if (unlikely(old_ni.blk_addr != NULL_ADDR)) return -EINVAL; retry: - ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); - if (!ipage) { + ifolio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), ino, false); + if (IS_ERR(ifolio)) { memalloc_retry_wait(GFP_NOFS); goto retry; } @@ -2775,13 +2879,13 @@ retry: /* Should not use this inode from free nid list */ remove_free_nid(sbi, ino); - if (!PageUptodate(ipage)) - SetPageUptodate(ipage); - fill_node_footer(ipage, ino, ino, 0, true); - set_cold_node(ipage, false); + if (!folio_test_uptodate(ifolio)) + folio_mark_uptodate(ifolio); + fill_node_footer(ifolio, ino, ino, 0, true); + set_cold_node(ifolio, false); - src = F2FS_INODE(page); - dst = F2FS_INODE(ipage); + src = F2FS_INODE(folio); + dst = F2FS_INODE(ifolio); memcpy(dst, src, offsetof(struct f2fs_inode, i_ext)); dst->i_size = 0; @@ -2817,8 +2921,8 @@ retry: WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR, false); inc_valid_inode_count(sbi); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + folio_mark_dirty(ifolio); + f2fs_folio_put(ifolio, true); return 0; } @@ -2831,7 +2935,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, int i, idx, last_offset, nrpages; /* scan the node segment */ - last_offset = sbi->blocks_per_seg; + last_offset = BLKS_PER_SEG(sbi); addr = START_BLOCK(sbi, segno); sum_entry = &sum->entries[0]; @@ -2842,17 +2946,17 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true); for (idx = addr; idx < addr + nrpages; idx++) { - struct page *page = f2fs_get_tmp_page(sbi, idx); + struct folio *folio = f2fs_get_tmp_folio(sbi, idx); - if (IS_ERR(page)) - return PTR_ERR(page); + if (IS_ERR(folio)) + return PTR_ERR(folio); - rn = F2FS_NODE(page); + rn = F2FS_NODE(folio); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; sum_entry++; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } invalidate_mapping_pages(META_MAPPING(sbi), addr, @@ -2867,6 +2971,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_journal *journal = curseg->journal; int i; + bool init_dirty; down_write(&curseg->journal_rwsem); for (i = 0; i < nats_in_cursum(journal); i++) { @@ -2877,12 +2982,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) if (f2fs_check_nid_range(sbi, nid)) continue; + init_dirty = false; + raw_ne = nat_in_journal(journal, i); - ne = __lookup_nat_cache(nm_i, nid); + ne = __lookup_nat_cache(nm_i, nid, true); if (!ne) { + init_dirty = true; ne = __alloc_nat_entry(sbi, nid, true); - __init_nat_entry(nm_i, ne, &raw_ne, true); + __init_nat_entry(nm_i, ne, &raw_ne, true, true); } /* @@ -2897,7 +3005,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->nid_list_lock); } - __set_nat_cache_dirty(nm_i, ne); + __set_nat_cache_dirty(nm_i, ne, init_dirty); } update_nats_in_cursum(journal, -i); up_write(&curseg->journal_rwsem); @@ -2921,32 +3029,15 @@ add_out: list_add_tail(&nes->set_list, head); } -static void __update_nat_bits(struct f2fs_nm_info *nm_i, unsigned int nat_ofs, - unsigned int valid) -{ - if (valid == 0) { - __set_bit_le(nat_ofs, nm_i->empty_nat_bits); - __clear_bit_le(nat_ofs, nm_i->full_nat_bits); - return; - } - - __clear_bit_le(nat_ofs, nm_i->empty_nat_bits); - if (valid == NAT_ENTRY_PER_BLOCK) - __set_bit_le(nat_ofs, nm_i->full_nat_bits); - else - __clear_bit_le(nat_ofs, nm_i->full_nat_bits); -} - -static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, - struct page *page) +static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, + const struct f2fs_nat_block *nat_blk) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; - struct f2fs_nat_block *nat_blk = page_address(page); int valid = 0; int i = 0; - if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) + if (!enabled_nat_bits(sbi, NULL)) return; if (nat_index == 0) { @@ -2957,36 +3048,17 @@ static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR) valid++; } - - __update_nat_bits(nm_i, nat_index, valid); -} - -void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned int nat_ofs; - - f2fs_down_read(&nm_i->nat_tree_lock); - - for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) { - unsigned int valid = 0, nid_ofs = 0; - - /* handle nid zero due to it should never be used */ - if (unlikely(nat_ofs == 0)) { - valid = 1; - nid_ofs = 1; - } - - for (; nid_ofs < NAT_ENTRY_PER_BLOCK; nid_ofs++) { - if (!test_bit_le(nid_ofs, - nm_i->free_nid_bitmap[nat_ofs])) - valid++; - } - - __update_nat_bits(nm_i, nat_ofs, valid); + if (valid == 0) { + __set_bit_le(nat_index, nm_i->empty_nat_bits); + __clear_bit_le(nat_index, nm_i->full_nat_bits); + return; } - f2fs_up_read(&nm_i->nat_tree_lock); + __clear_bit_le(nat_index, nm_i->empty_nat_bits); + if (valid == NAT_ENTRY_PER_BLOCK) + __set_bit_le(nat_index, nm_i->full_nat_bits); + else + __clear_bit_le(nat_index, nm_i->full_nat_bits); } static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, @@ -2998,25 +3070,25 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, bool to_journal = true; struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; - struct page *page = NULL; + struct folio *folio = NULL; /* * there are two steps to flush nat entries: * #1, flush nat entries to journal in current hot data summary block. * #2, flush nat entries to nat page. */ - if ((cpc->reason & CP_UMOUNT) || + if (enabled_nat_bits(sbi, cpc) || !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) to_journal = false; if (to_journal) { down_write(&curseg->journal_rwsem); } else { - page = get_next_nat_page(sbi, start_nid); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = get_next_nat_folio(sbi, start_nid); + if (IS_ERR(folio)) + return PTR_ERR(folio); - nat_blk = page_address(page); + nat_blk = folio_address(folio); f2fs_bug_on(sbi, !nat_blk); } @@ -3052,8 +3124,8 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { up_write(&curseg->journal_rwsem); } else { - update_nat_bits(sbi, start_nid, page); - f2fs_put_page(page, 1); + __update_nat_bits(sbi, start_nid, nat_blk); + f2fs_folio_put(folio, true); } /* Allow dirty nats by node block allocation in write_begin */ @@ -3083,7 +3155,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * during unmount, let's flush nat_bits before checking * nat_cnt[DIRTY_NAT]. */ - if (cpc->reason & CP_UMOUNT) { + if (enabled_nat_bits(sbi, cpc)) { f2fs_down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); f2fs_up_write(&nm_i->nat_tree_lock); @@ -3099,7 +3171,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * entries, remove all entries from journal and merge them * into nat entry set. */ - if (cpc->reason & CP_UMOUNT || + if (enabled_nat_bits(sbi, cpc) || !__has_cursum_space(journal, nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL)) remove_nats_in_journal(sbi); @@ -3136,40 +3208,38 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) __u64 cp_ver = cur_cp_version(ckpt); block_t nat_bits_addr; + if (!enabled_nat_bits(sbi, NULL)) + return 0; + nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kvzalloc(sbi, - nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); + F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; - nm_i->full_nat_bits = nm_i->nat_bits + 8; - nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; - - if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) - return 0; - - nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg - + nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) { - struct page *page; + struct folio *folio; - page = f2fs_get_meta_page(sbi, nat_bits_addr++); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_meta_folio(sbi, nat_bits_addr++); + if (IS_ERR(folio)) + return PTR_ERR(folio); - memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), - page_address(page), F2FS_BLKSIZE); - f2fs_put_page(page, 1); + memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i), + folio_address(folio), F2FS_BLKSIZE); + f2fs_folio_put(folio, true); } cp_ver |= (cur_cp_crc(ckpt) << 32); if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) { - clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG); - f2fs_notice(sbi, "Disable nat_bits due to incorrect cp_ver (%llu, %llu)", - cp_ver, le64_to_cpu(*(__le64 *)nm_i->nat_bits)); + disable_nat_bits(sbi, true); return 0; } + nm_i->full_nat_bits = nm_i->nat_bits + 8; + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; + f2fs_notice(sbi, "Found nat_bits in checkpoint"); return 0; } @@ -3180,7 +3250,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) unsigned int i = 0; nid_t nid, last_nid; - if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) + if (!enabled_nat_bits(sbi, NULL)) return; for (i = 0; i < nm_i->nat_blocks; i++) { @@ -3252,6 +3322,9 @@ static int init_node_manager(struct f2fs_sb_info *sbi) if (!nm_i->nat_bitmap) return -ENOMEM; + if (!test_opt(sbi, NAT_BITS)) + disable_nat_bits(sbi, true); + err = __get_nat_bitmaps(sbi); if (err) return err; @@ -3392,10 +3465,10 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) } kvfree(nm_i->free_nid_count); - kvfree(nm_i->nat_bitmap); + kfree(nm_i->nat_bitmap); kvfree(nm_i->nat_bits); #ifdef CONFIG_F2FS_CHECK_FS - kvfree(nm_i->nat_bitmap_mir); + kfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; kfree(nm_i); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 5bd16a95eef8..9cb8dcf8d417 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -31,7 +31,7 @@ /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 -/* control total # of node writes used for roll-fowrad recovery */ +/* control total # of node writes used for roll-forward recovery */ #define DEF_RF_NODE_BLOCKS 0 /* vector size for gang look-up from nat cache that consists of radix tree */ @@ -52,6 +52,14 @@ enum { IS_PREALLOC, /* nat entry is preallocated */ }; +/* For node type in __get_node_folio() */ +enum node_type { + NODE_TYPE_REGULAR, + NODE_TYPE_INODE, + NODE_TYPE_XATTR, + NODE_TYPE_NON_INODE, +}; + /* * For node information */ @@ -208,10 +216,10 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) block_addr = (pgoff_t)(nm_i->nat_blkaddr + (block_off << 1) - - (block_off & (sbi->blocks_per_seg - 1))); + (block_off & (BLKS_PER_SEG(sbi) - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - block_addr += sbi->blocks_per_seg; + block_addr += BLKS_PER_SEG(sbi); return block_addr; } @@ -236,41 +244,41 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) #endif } -static inline nid_t ino_of_node(struct page *node_page) +static inline nid_t ino_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.ino); } -static inline nid_t nid_of_node(struct page *node_page) +static inline nid_t nid_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.nid); } -static inline unsigned int ofs_of_node(struct page *node_page) +static inline unsigned int ofs_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(node_folio); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } -static inline __u64 cpver_of_node(struct page *node_page) +static inline __u64 cpver_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le64_to_cpu(rn->footer.cp_ver); } -static inline block_t next_blkaddr_of_node(struct page *node_page) +static inline block_t next_blkaddr_of_node(const struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(node_folio); return le32_to_cpu(rn->footer.next_blkaddr); } -static inline void fill_node_footer(struct page *page, nid_t nid, +static inline void fill_node_footer(const struct folio *folio, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int old_flag = 0; if (reset) @@ -286,17 +294,18 @@ static inline void fill_node_footer(struct page *page, nid_t nid, (old_flag & OFFSET_BIT_MASK)); } -static inline void copy_node_footer(struct page *dst, struct page *src) +static inline void copy_node_footer(const struct folio *dst, + const struct folio *src) { struct f2fs_node *src_rn = F2FS_NODE(src); struct f2fs_node *dst_rn = F2FS_NODE(dst); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } -static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +static inline void fill_node_footer_blkaddr(struct folio *folio, block_t blkaddr) { - struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); + struct f2fs_node *rn = F2FS_NODE(folio); __u64 cp_ver = cur_cp_version(ckpt); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) @@ -306,19 +315,19 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } -static inline bool is_recoverable_dnode(struct page *page) +static inline bool is_recoverable_dnode(const struct folio *folio) { - struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio)); __u64 cp_ver = cur_cp_version(ckpt); /* Don't care crc part, if fsck.f2fs sets it. */ if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) - return (cp_ver << 32) == (cpver_of_node(page) << 32); + return (cp_ver << 32) == (cpver_of_node(folio) << 32); if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); - return cp_ver == cpver_of_node(page); + return cp_ver == cpver_of_node(folio); } /* @@ -342,9 +351,9 @@ static inline bool is_recoverable_dnode(struct page *page) * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) * `- direct node */ -static inline bool IS_DNODE(struct page *node_page) +static inline bool IS_DNODE(const struct folio *node_folio) { - unsigned int ofs = ofs_of_node(node_page); + unsigned int ofs = ofs_of_node(node_folio); if (f2fs_has_xattr_block(ofs)) return true; @@ -360,22 +369,22 @@ static inline bool IS_DNODE(struct page *node_page) return true; } -static inline int set_nid(struct page *p, int off, nid_t nid, bool i) +static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) { - struct f2fs_node *rn = F2FS_NODE(p); + struct f2fs_node *rn = F2FS_NODE(folio); - f2fs_wait_on_page_writeback(p, NODE, true, true); + f2fs_folio_wait_writeback(folio, NODE, true, true); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); else rn->in.nid[off] = cpu_to_le32(nid); - return set_page_dirty(p); + return folio_mark_dirty(folio); } -static inline nid_t get_nid(struct page *p, int off, bool i) +static inline nid_t get_nid(const struct folio *folio, int off, bool i) { - struct f2fs_node *rn = F2FS_NODE(p); + struct f2fs_node *rn = F2FS_NODE(folio); if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); @@ -389,19 +398,19 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold data pages in page cache */ -static inline int is_node(struct page *page, int type) +static inline int is_node(const struct folio *folio, int type) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(folio); return le32_to_cpu(rn->footer.flag) & BIT(type); } -#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) -#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) -#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) +#define is_cold_node(folio) is_node(folio, COLD_BIT_SHIFT) +#define is_fsync_dnode(folio) is_node(folio, FSYNC_BIT_SHIFT) +#define is_dent_dnode(folio) is_node(folio, DENT_BIT_SHIFT) -static inline void set_cold_node(struct page *page, bool is_dir) +static inline void set_cold_node(const struct folio *folio, bool is_dir) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); if (is_dir) @@ -411,9 +420,9 @@ static inline void set_cold_node(struct page *page, bool is_dir) rn->footer.flag = cpu_to_le32(flag); } -static inline void set_mark(struct page *page, int mark, int type) +static inline void set_mark(struct folio *folio, int mark, int type) { - struct f2fs_node *rn = F2FS_NODE(page); + struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); @@ -422,8 +431,8 @@ static inline void set_mark(struct page *page, int mark, int type) rn->footer.flag = cpu_to_le32(flag); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_P_SB(page), page); + f2fs_inode_chksum_set(F2FS_F_SB(folio), folio); #endif } -#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) -#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) +#define set_dentry_mark(folio, mark) set_mark(folio, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(folio, mark) set_mark(folio, mark, FSYNC_BIT_SHIFT) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b8637e88d94f..c3415ebb9f50 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -5,7 +5,7 @@ * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/mm.h> @@ -46,10 +46,6 @@ static struct kmem_cache *fsync_entry_slab; -#if IS_ENABLED(CONFIG_UNICODE) -extern struct kmem_cache *f2fs_cf_name_slab; -#endif - bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) { s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count); @@ -153,26 +149,23 @@ static int init_recovered_filename(const struct inode *dir, if (err) return err; f2fs_hash_filename(dir, fname); -#if IS_ENABLED(CONFIG_UNICODE) /* Case-sensitive match is fine for recovery */ - kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); - fname->cf_name.name = NULL; -#endif + f2fs_free_casefolded_name(fname); } else { f2fs_hash_filename(dir, fname); } return 0; } -static int recover_dentry(struct inode *inode, struct page *ipage, +static int recover_dentry(struct inode *inode, struct folio *ifolio, struct list_head *dir_list) { - struct f2fs_inode *raw_inode = F2FS_INODE(ipage); + struct f2fs_inode *raw_inode = F2FS_INODE(ifolio); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; struct f2fs_filename fname; struct qstr usr_fname; - struct page *page; + struct folio *folio; struct inode *dir, *einode; struct fsync_inode_entry *entry; int err = 0; @@ -194,7 +187,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage, if (err) goto out; retry: - de = __f2fs_find_entry(dir, &fname, &page); + de = __f2fs_find_entry(dir, &fname, &folio); if (de && inode->i_ino == le32_to_cpu(de->ino)) goto out_put; @@ -219,11 +212,11 @@ retry: iput(einode); goto out_put; } - f2fs_delete_entry(de, page, dir, einode); + f2fs_delete_entry(de, folio, dir, einode); iput(einode); goto retry; - } else if (IS_ERR(page)) { - err = PTR_ERR(page); + } else if (IS_ERR(folio)) { + err = PTR_ERR(folio); } else { err = f2fs_add_dentry(dir, &fname, inode, inode->i_ino, inode->i_mode); @@ -233,21 +226,21 @@ retry: goto out; out_put: - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); out: if (file_enc_name(inode)) name = "<encrypted>"; else name = raw_inode->i_name; f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), name, + __func__, ino_of_node(ifolio), name, IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } -static int recover_quota_data(struct inode *inode, struct page *page) +static int recover_quota_data(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(folio); struct iattr attr; uid_t i_uid = le32_to_cpu(raw->i_uid); gid_t i_gid = le32_to_cpu(raw->i_gid); @@ -284,15 +277,16 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) clear_inode_flag(inode, FI_DATA_EXIST); } -static int recover_inode(struct inode *inode, struct page *page) +static int recover_inode(struct inode *inode, struct folio *folio) { - struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode *raw = F2FS_INODE(folio); + struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; inode->i_mode = le16_to_cpu(raw->i_mode); - err = recover_quota_data(inode, page); + err = recover_quota_data(inode, folio); if (err) return err; @@ -309,29 +303,28 @@ static int recover_inode(struct inode *inode, struct page *page) i_projid = (projid_t)le32_to_cpu(raw->i_projid); kprojid = make_kprojid(&init_user_ns, i_projid); - if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) { + if (!projid_eq(kprojid, fi->i_projid)) { err = f2fs_transfer_project_quota(inode, kprojid); if (err) return err; - F2FS_I(inode)->i_projid = kprojid; + fi->i_projid = kprojid; } } } f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); - inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); + inode_set_atime(inode, le64_to_cpu(raw->i_atime), + le32_to_cpu(raw->i_atime_nsec)); inode_set_ctime(inode, le64_to_cpu(raw->i_ctime), le32_to_cpu(raw->i_ctime_nsec)); - inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); + inode_set_mtime(inode, le64_to_cpu(raw->i_mtime), + le32_to_cpu(raw->i_mtime_nsec)); - F2FS_I(inode)->i_advise = raw->i_advise; - F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + fi->i_advise = raw->i_advise; + fi->i_flags = le32_to_cpu(raw->i_flags); f2fs_set_inode_flags(inode); - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = - le16_to_cpu(raw->i_gc_failures); + fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); @@ -340,10 +333,10 @@ static int recover_inode(struct inode *inode, struct page *page) if (file_enc_name(inode)) name = "<encrypted>"; else - name = F2FS_INODE(page)->i_name; + name = F2FS_INODE(folio)->i_name; f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(page), name, raw->i_inline); + ino_of_node(folio), name, raw->i_inline); return 0; } @@ -354,7 +347,7 @@ static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi, if (blkaddr + 1 == next_blkaddr) ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS, ra_blocks * 2); - else if (next_blkaddr % sbi->blocks_per_seg) + else if (next_blkaddr % BLKS_PER_SEG(sbi)) ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS, ra_blocks / 2); return ra_blocks; @@ -365,33 +358,34 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, block_t *blkaddr_fast, bool *is_detecting) { unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; - struct page *page = NULL; int i; if (!*is_detecting) return 0; for (i = 0; i < 2; i++) { + struct folio *folio; + if (!f2fs_is_valid_blkaddr(sbi, *blkaddr_fast, META_POR)) { *is_detecting = false; return 0; } - page = f2fs_get_tmp_page(sbi, *blkaddr_fast); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_tmp_folio(sbi, *blkaddr_fast); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!is_recoverable_dnode(page)) { - f2fs_put_page(page, 1); + if (!is_recoverable_dnode(folio)) { + f2fs_folio_put(folio, true); *is_detecting = false; return 0; } ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, *blkaddr_fast, - next_blkaddr_of_node(page)); + next_blkaddr_of_node(folio)); - *blkaddr_fast = next_blkaddr_of_node(page); - f2fs_put_page(page, 1); + *blkaddr_fast = next_blkaddr_of_node(folio); + f2fs_folio_put(folio, true); f2fs_ra_meta_pages_cond(sbi, *blkaddr_fast, ra_blocks); } @@ -405,10 +399,9 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, - bool check_only) + bool check_only, bool *new_inode) { struct curseg_info *curseg; - struct page *page = NULL; block_t blkaddr, blkaddr_fast; bool is_detecting = true; int err = 0; @@ -420,60 +413,65 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, while (1) { struct fsync_inode_entry *entry; + struct folio *folio; if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; - page = f2fs_get_tmp_page(sbi, blkaddr); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_tmp_folio(sbi, blkaddr); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); break; } - if (!is_recoverable_dnode(page)) { - f2fs_put_page(page, 1); + if (!is_recoverable_dnode(folio)) { + f2fs_folio_put(folio, true); break; } - if (!is_fsync_dnode(page)) + if (!is_fsync_dnode(folio)) goto next; - entry = get_fsync_inode(head, ino_of_node(page)); + entry = get_fsync_inode(head, ino_of_node(folio)); if (!entry) { bool quota_inode = false; if (!check_only && - IS_INODE(page) && is_dent_dnode(page)) { - err = f2fs_recover_inode_page(sbi, page); + IS_INODE(folio) && + is_dent_dnode(folio)) { + err = f2fs_recover_inode_page(sbi, folio); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } quota_inode = true; } - /* - * CP | dnode(F) | inode(DF) - * For this case, we should not give up now. - */ - entry = add_fsync_inode(sbi, head, ino_of_node(page), + entry = add_fsync_inode(sbi, head, ino_of_node(folio), quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); - if (err == -ENOENT) + /* + * CP | dnode(F) | inode(DF) + * For this case, we should not give up now. + */ + if (err == -ENOENT) { + if (check_only) + *new_inode = true; goto next; - f2fs_put_page(page, 1); + } + f2fs_folio_put(folio, true); break; } } entry->blkaddr = blkaddr; - if (IS_INODE(page) && is_dent_dnode(page)) + if (IS_INODE(folio) && is_dent_dnode(folio)) entry->last_dentry = blkaddr; next: /* check next segment */ - blkaddr = next_blkaddr_of_node(page); - f2fs_put_page(page, 1); + blkaddr = next_blkaddr_of_node(folio); + f2fs_folio_put(folio, true); err = sanity_check_node_chain(sbi, blkaddr, &blkaddr_fast, &is_detecting); @@ -499,7 +497,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); struct f2fs_summary_block *sum_node; struct f2fs_summary sum; - struct page *sum_page, *node_page; + struct folio *sum_folio, *node_folio; struct dnode_of_data tdn = *dn; nid_t ino, nid; struct inode *inode; @@ -521,18 +519,18 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, } } - sum_page = f2fs_get_sum_page(sbi, segno); - if (IS_ERR(sum_page)) - return PTR_ERR(sum_page); - sum_node = (struct f2fs_summary_block *)page_address(sum_page); + sum_folio = f2fs_get_sum_folio(sbi, segno); + if (IS_ERR(sum_folio)) + return PTR_ERR(sum_folio); + sum_node = SUM_BLK_PAGE_ADDR(sum_folio, segno); sum = sum_node->entries[blkoff]; - f2fs_put_page(sum_page, 1); + f2fs_folio_put(sum_folio, true); got_it: /* Use the locked dnode page and inode */ nid = le32_to_cpu(sum.nid); ofs_in_node = le16_to_cpu(sum.ofs_in_node); - max_addrs = ADDRS_PER_PAGE(dn->node_page, dn->inode); + max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode); if (ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u", ofs_in_node, dn->inode->i_ino, nid, max_addrs); @@ -542,9 +540,9 @@ got_it: if (dn->inode->i_ino == nid) { tdn.nid = nid; - if (!dn->inode_page_locked) - lock_page(dn->inode_page); - tdn.node_page = dn->inode_page; + if (!dn->inode_folio_locked) + folio_lock(dn->inode_folio); + tdn.node_folio = dn->inode_folio; tdn.ofs_in_node = ofs_in_node; goto truncate_out; } else if (dn->nid == nid) { @@ -553,13 +551,13 @@ got_it: } /* Get the node page */ - node_page = f2fs_get_node_page(sbi, nid); - if (IS_ERR(node_page)) - return PTR_ERR(node_page); + node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR); + if (IS_ERR(node_folio)) + return PTR_ERR(node_folio); - offset = ofs_of_node(node_page); - ino = ino_of_node(node_page); - f2fs_put_page(node_page, 1); + offset = ofs_of_node(node_folio); + ino = ino_of_node(node_folio); + f2fs_folio_put(node_folio, true); if (ino != dn->inode->i_ino) { int ret; @@ -585,8 +583,8 @@ got_it: * if inode page is locked, unlock temporarily, but its reference * count keeps alive. */ - if (ino == dn->inode->i_ino && dn->inode_page_locked) - unlock_page(dn->inode_page); + if (ino == dn->inode->i_ino && dn->inode_folio_locked) + folio_unlock(dn->inode_folio); set_new_dnode(&tdn, inode, NULL, NULL, 0); if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) @@ -599,40 +597,53 @@ got_it: out: if (ino != dn->inode->i_ino) iput(inode); - else if (dn->inode_page_locked) - lock_page(dn->inode_page); + else if (dn->inode_folio_locked) + folio_lock(dn->inode_folio); return 0; truncate_out: if (f2fs_data_blkaddr(&tdn) == blkaddr) f2fs_truncate_data_blocks_range(&tdn, 1); - if (dn->inode->i_ino == nid && !dn->inode_page_locked) - unlock_page(dn->inode_page); + if (dn->inode->i_ino == nid && !dn->inode_folio_locked) + folio_unlock(dn->inode_folio); return 0; } +static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn) +{ + int i, err = 0; + + for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) { + err = f2fs_reserve_new_block(dn); + if (!err) + break; + } + + return err; +} + static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page) + struct folio *folio) { struct dnode_of_data dn; struct node_info ni; - unsigned int start, end; + unsigned int start = 0, end = 0, index; int err = 0, recovered = 0; /* step 1: recover xattr */ - if (IS_INODE(page)) { - err = f2fs_recover_inline_xattr(inode, page); + if (IS_INODE(folio)) { + err = f2fs_recover_inline_xattr(inode, folio); if (err) goto out; - } else if (f2fs_has_xattr_block(ofs_of_node(page))) { - err = f2fs_recover_xattr_data(inode, page); + } else if (f2fs_has_xattr_block(ofs_of_node(folio))) { + err = f2fs_recover_xattr_data(inode, folio); if (!err) recovered++; goto out; } /* step 2: recover inline data */ - err = f2fs_recover_inline_data(inode, page); + err = f2fs_recover_inline_data(inode, folio); if (err) { if (err == 1) err = 0; @@ -640,8 +651,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* step 3: recover data indices */ - start = f2fs_start_bidx_of_node(ofs_of_node(page), inode); - end = start + ADDRS_PER_PAGE(page, inode); + start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode); + end = start + ADDRS_PER_PAGE(folio, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); retry_dn: @@ -654,40 +665,38 @@ retry_dn: goto out; } - f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); + f2fs_folio_wait_writeback(dn.node_folio, NODE, true, true); err = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (err) goto err; - f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); + f2fs_bug_on(sbi, ni.ino != ino_of_node(folio)); - if (ofs_of_node(dn.node_page) != ofs_of_node(page)) { + if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) { f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", - inode->i_ino, ofs_of_node(dn.node_page), - ofs_of_node(page)); + inode->i_ino, ofs_of_node(dn.node_folio), + ofs_of_node(folio)); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); goto err; } - for (; start < end; start++, dn.ofs_in_node++) { + for (index = start; index < end; index++, dn.ofs_in_node++) { block_t src, dest; src = f2fs_data_blkaddr(&dn); - dest = data_blkaddr(dn.inode, page, dn.ofs_in_node); + dest = data_blkaddr(dn.inode, folio, dn.ofs_in_node); if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto err; } if (__is_valid_data_blkaddr(dest) && !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto err; } @@ -702,9 +711,9 @@ retry_dn: } if (!file_keep_isize(inode) && - (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT))) + (i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT))) f2fs_i_size_write(inode, - (loff_t)(start + 1) << PAGE_SHIFT); + (loff_t)(index + 1) << PAGE_SHIFT); /* * dest is reserved block, invalidate src block @@ -712,20 +721,17 @@ retry_dn: */ if (dest == NEW_ADDR) { f2fs_truncate_data_blocks_range(&dn, 1); - f2fs_reserve_new_block(&dn); + + err = f2fs_reserve_new_block_retry(&dn); + if (err) + goto err; continue; } /* dest is valid block, try to recover from src to dest */ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - if (src == NULL_ADDR) { - err = f2fs_reserve_new_block(&dn); - while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) - err = f2fs_reserve_new_block(&dn); - /* We should not get -ENOSPC */ - f2fs_bug_on(sbi, err); + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; } @@ -745,8 +751,6 @@ retry_prev: f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u", dest, inode->i_ino, dn.ofs_in_node); err = -EFSCORRUPTED; - f2fs_handle_error(sbi, - ERROR_INVALID_BLKADDR); goto err; } @@ -757,16 +761,18 @@ retry_prev: } } - copy_node_footer(dn.node_page, page); - fill_node_footer(dn.node_page, dn.nid, ni.ino, - ofs_of_node(page), false); - set_page_dirty(dn.node_page); + copy_node_footer(dn.node_folio, folio); + fill_node_footer(dn.node_folio, dn.nid, ni.ino, + ofs_of_node(folio), false); + folio_mark_dirty(dn.node_folio); err: f2fs_put_dnode(&dn); out: - f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", - inode->i_ino, file_keep_isize(inode) ? "keep" : "recover", - recovered, err); + f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), " + "range (%u, %u), recovered = %d, err = %d", + inode->i_ino, nid_of_node(folio), + file_keep_isize(inode) ? "keep" : "recover", + start, end, recovered, err); return err; } @@ -774,10 +780,17 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, struct list_head *tmp_inode_list, struct list_head *dir_list) { struct curseg_info *curseg; - struct page *page = NULL; int err = 0; block_t blkaddr; unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; + unsigned int recoverable_dnode = 0; + unsigned int fsynced_dnode = 0; + unsigned int total_dnode = 0; + unsigned int recovered_inode = 0; + unsigned int recovered_dentry = 0; + unsigned int recovered_dnode = 0; + + f2fs_notice(sbi, "do_recover_data: start to recover dnode"); /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -785,89 +798,101 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, while (1) { struct fsync_inode_entry *entry; + struct folio *folio; if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; - page = f2fs_get_tmp_page(sbi, blkaddr); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_tmp_folio(sbi, blkaddr); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); break; } - if (!is_recoverable_dnode(page)) { - f2fs_put_page(page, 1); + if (!is_recoverable_dnode(folio)) { + f2fs_folio_put(folio, true); break; } + recoverable_dnode++; - entry = get_fsync_inode(inode_list, ino_of_node(page)); + entry = get_fsync_inode(inode_list, ino_of_node(folio)); if (!entry) goto next; + fsynced_dnode++; /* * inode(x) | CP | inode(x) | dnode(F) * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(page)) { - err = recover_inode(entry->inode, page); + if (IS_INODE(folio)) { + err = recover_inode(entry->inode, folio); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } + recovered_inode++; } if (entry->last_dentry == blkaddr) { - err = recover_dentry(entry->inode, page, dir_list); + err = recover_dentry(entry->inode, folio, dir_list); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } + recovered_dentry++; } - err = do_recover_data(sbi, entry->inode, page); + err = do_recover_data(sbi, entry->inode, folio); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } + recovered_dnode++; if (entry->blkaddr == blkaddr) list_move_tail(&entry->list, tmp_inode_list); next: ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, - next_blkaddr_of_node(page)); + next_blkaddr_of_node(folio)); /* check next segment */ - blkaddr = next_blkaddr_of_node(page); - f2fs_put_page(page, 1); + blkaddr = next_blkaddr_of_node(folio); + f2fs_folio_put(folio, true); f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); + total_dnode++; } if (!err) - f2fs_allocate_new_segments(sbi); + err = f2fs_allocate_new_segments(sbi); + + f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, " + "total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d", + recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode, + recovered_dentry, recovered_dnode, err); return err; } int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { - struct list_head inode_list, tmp_inode_list; - struct list_head dir_list; + LIST_HEAD(inode_list); + LIST_HEAD(tmp_inode_list); + LIST_HEAD(dir_list); int err; int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; - bool fix_curseg_write_pointer = false; + bool new_inode = false; + + f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, " + "check_only: %d", check_only); if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) f2fs_info(sbi, "recover fsync data on readonly fs"); - INIT_LIST_HEAD(&inode_list); - INIT_LIST_HEAD(&tmp_inode_list); - INIT_LIST_HEAD(&dir_list); - /* prevent checkpoint */ f2fs_down_write(&sbi->cp_global_sem); /* step #1: find fsynced inode numbers */ - err = find_fsync_dnodes(sbi, &inode_list, check_only); - if (err || list_empty(&inode_list)) + err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode); + if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode))) goto skip; if (check_only) { @@ -884,8 +909,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) else f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE); skip: - fix_curseg_write_pointer = !check_only || list_empty(&inode_list); - destroy_fsync_dnodes(&inode_list, err); destroy_fsync_dnodes(&tmp_inode_list, err); @@ -903,11 +926,8 @@ skip: * and the f2fs is not read only, check and fix zoned block devices' * write pointer consistency. */ - if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) && - f2fs_sb_has_blkzoned(sbi)) { - err = f2fs_fix_curseg_write_pointer(sbi); - ret = err; - } + if (!err) + err = f2fs_check_and_fix_write_pointer(sbi); if (!err) clear_sbi_flag(sbi, SBI_POR_DOING); @@ -924,6 +944,7 @@ skip: struct cp_control cpc = { .reason = CP_RECOVERY, }; + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); } } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0457d620011f..c26424f47686 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -192,19 +192,33 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) if (!f2fs_is_atomic_file(inode)) return; + if (clean) + truncate_inode_pages_final(inode->i_mapping); + release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_REPLACE); clear_inode_flag(inode, FI_ATOMIC_FILE); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + /* + * The vfs inode keeps clean during commit, but the f2fs inode + * doesn't. So clear the dirty state after commit and let + * f2fs_mark_inode_dirty_sync ensure a consistent dirty state. + */ + f2fs_inode_synced(inode); + f2fs_mark_inode_dirty_sync(inode, true); + } stat_dec_atomic_inode(inode); F2FS_I(inode)->atomic_write_task = NULL; if (clean) { - truncate_inode_pages_final(inode->i_mapping); f2fs_i_size_write(inode, fi->original_i_size); fi->original_i_size = 0; } + /* avoid stale dirty inode during eviction */ + sync_inode_metadata(inode, 0); } static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, @@ -220,7 +234,7 @@ retry: err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) { if (err == -ENOMEM) { - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + memalloc_retry_wait(GFP_NOFS); goto retry; } return err; @@ -237,7 +251,7 @@ retry: if (!__is_valid_data_blkaddr(new_addr)) { if (new_addr == NULL_ADDR) dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_invalidate_blocks(sbi, dn.data_blkaddr, 1); f2fs_update_data_blkaddr(&dn, new_addr); } else { f2fs_replace_block(sbi, &dn, dn.data_blkaddr, @@ -246,7 +260,7 @@ retry: } else { blkcnt_t count = 1; - err = inc_valid_block_count(sbi, inode, &count); + err = inc_valid_block_count(sbi, inode, &count, true); if (err) { f2fs_put_dnode(&dn); return err; @@ -320,7 +334,7 @@ static int __f2fs_commit_atomic_write(struct inode *inode) goto next; } - blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode), + blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, cow_inode), len); index = off; for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) { @@ -332,8 +346,6 @@ static int __f2fs_commit_atomic_write(struct inode *inode) DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); ret = -EFSCORRUPTED; - f2fs_handle_error(sbi, - ERROR_INVALID_BLKADDR); goto out; } @@ -359,11 +371,24 @@ next: } out: + if (time_to_inject(sbi, FAULT_TIMEOUT)) + f2fs_io_schedule_timeout_killable(DEFAULT_FAULT_TIMEOUT); + if (ret) { sbi->revoked_atomic_block += fi->atomic_write_cnt; } else { sbi->committed_atomic_block += fi->atomic_write_cnt; set_inode_flag(inode, FI_ATOMIC_COMMITTED); + + /* + * inode may has no FI_ATOMIC_DIRTIED flag due to no write + * before commit. + */ + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + /* clear atomic dirty status and set vfs dirty status */ + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } } __complete_revoke_list(inode, &revoke_list, ret ? true : false); @@ -398,6 +423,9 @@ int f2fs_commit_atomic_write(struct inode *inode) */ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { + if (f2fs_cp_error(sbi)) + return; + if (time_to_inject(sbi, FAULT_CHECKPOINT)) f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT); @@ -405,7 +433,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi, false); - if (!f2fs_is_checkpoint_ready(sbi)) + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return; /* @@ -427,12 +455,14 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } else { struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, - .init_gc_type = BG_GC, + .init_gc_type = f2fs_sb_has_blkzoned(sbi) ? + FG_GC : BG_GC, .no_bg_gc = true, .should_migrate_blocks = false, .err_gc_skipped = false, .nr_free_secs = 1 }; f2fs_down_write(&sbi->gc_lock); + stat_inc_gc_call_count(sbi, FOREGROUND); f2fs_gc(sbi, &gc_control); } } @@ -445,8 +475,8 @@ static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi) unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES); unsigned int meta = get_pages(sbi, F2FS_DIRTY_META); unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA); - unsigned int threshold = sbi->blocks_per_seg * factor * - DEFAULT_DIRTY_THRESHOLD; + unsigned int threshold = + SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD)); unsigned int global_threshold = threshold * 3 / 2; if (dents >= threshold || qdata >= threshold || @@ -510,8 +540,8 @@ do_sync: mutex_unlock(&sbi->flush_lock); } + stat_inc_cp_call_count(sbi, BACKGROUND); f2fs_sync_fs(sbi->sb, 1); - stat_inc_bg_cp_count(sbi->stat_info); } static int __submit_flush_wait(struct f2fs_sb_info *sbi, @@ -720,7 +750,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) do { ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + f2fs_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); } while (ret && --count); if (ret) { @@ -743,7 +773,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); /* need not be added */ - if (IS_CURSEG(sbi, segno)) + if (is_curseg(sbi, segno)) return; if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) @@ -765,10 +795,12 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, block_t valid_blocks = get_valid_blocks(sbi, segno, true); - f2fs_bug_on(sbi, unlikely(!valid_blocks || - valid_blocks == CAP_BLKS_PER_SEC(sbi))); + f2fs_bug_on(sbi, + (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + !valid_blocks) || + valid_blocks == CAP_BLKS_PER_SEC(sbi)); - if (!IS_CURSEC(sbi, secno)) + if (!is_cursec(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } @@ -807,7 +839,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, return; } - if (!IS_CURSEC(sbi, secno)) + if (!is_cursec(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } @@ -824,7 +856,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) unsigned short valid_blocks, ckpt_valid_blocks; unsigned int usable_blocks; - if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) + if (segno == NULL_SEGNO || is_curseg(sbi, segno)) return; usable_blocks = f2fs_usable_blks_in_seg(sbi, segno); @@ -857,7 +889,7 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { if (get_valid_blocks(sbi, segno, false)) continue; - if (IS_CURSEG(sbi, segno)) + if (is_curseg(sbi, segno)) continue; __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); @@ -869,7 +901,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) { int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); - block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; + block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs); struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); block_t holes[2] = {0, 0}; /* DATA and NODE */ block_t unusable; @@ -898,11 +930,16 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) { int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); + + if (F2FS_OPTION(sbi).unusable_cap_perc == 100) + return 0; if (unusable > F2FS_OPTION(sbi).unusable_cap) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && dirty_segments(sbi) > ovp_hole_segs) return -EAGAIN; + if (has_not_enough_free_secs(sbi, 0, 0)) + return -EAGAIN; return 0; } @@ -1098,9 +1135,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, dc->error = 0; if (dc->error) - printk_ratelimited( - "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", - KERN_INFO, sbi->sb->s_id, + f2fs_info_ratelimited(sbi, + "Issue discard(%u, %u, %u) failed, ret: %d", dc->di.lstart, dc->di.start, dc->di.len, dc->error); __detach_discard_cmd(dcc, dc); } @@ -1129,8 +1165,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, struct seg_entry *sentry; unsigned int segno; block_t blk = start; - unsigned long offset, size, max_blocks = sbi->blocks_per_seg; - unsigned long *map; + unsigned long offset, size, *map; while (blk < end) { segno = GET_SEGNO(sbi, blk); @@ -1140,7 +1175,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, if (end < START_BLOCK(sbi, segno + 1)) size = GET_BLKOFF_FROM_SEG0(sbi, end); else - size = max_blocks; + size = BLKS_PER_SEG(sbi); map = (unsigned long *)(sentry->cur_valid_map); offset = __find_rev_next_bit(map, size, offset); f2fs_bug_on(sbi, offset != size); @@ -1169,7 +1204,10 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->min_interval = dcc->min_discard_issue_time; dpolicy->mid_interval = dcc->mid_discard_issue_time; dpolicy->max_interval = dcc->max_discard_issue_time; - dpolicy->io_aware = true; + if (dcc->discard_io_aware == DPOLICY_IO_AWARE_ENABLE) + dpolicy->io_aware = true; + else if (dcc->discard_io_aware == DPOLICY_IO_AWARE_DISABLE) + dpolicy->io_aware = false; dpolicy->sync = false; dpolicy->ordered = true; if (utilization(sbi) > dcc->discard_urgent_util) { @@ -1258,11 +1296,28 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) { - __submit_zone_reset_cmd(sbi, dc, flag, wait_list, issued); - return 0; + int devi = f2fs_bdev_index(sbi, bdev); + + if (devi < 0) + return -EINVAL; + + if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) { + __submit_zone_reset_cmd(sbi, dc, flag, + wait_list, issued); + return 0; + } } #endif + /* + * stop issuing discard for any of below cases: + * 1. device is conventional zone, but it doesn't support discard. + * 2. device is regulare device, after snapshot it doesn't support + * discard. + */ + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; + trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len); lstart = dc->di.lstart; @@ -1288,15 +1343,9 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->di.len += len; + err = 0; if (time_to_inject(sbi, FAULT_DISCARD)) { err = -EIO; - } else { - err = __blkdev_issue_discard(bdev, - SECTOR_FROM_BLOCK(start), - SECTOR_FROM_BLOCK(len), - GFP_NOFS, &bio); - } - if (err) { spin_lock_irqsave(&dc->lock, flags); if (dc->state == D_PARTIAL) dc->state = D_SUBMIT; @@ -1305,6 +1354,8 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, break; } + __blkdev_issue_discard(bdev, SECTOR_FROM_BLOCK(start), + SECTOR_FROM_BLOCK(len), GFP_NOFS, &bio); f2fs_bug_on(sbi, !bio); /* @@ -1369,7 +1420,8 @@ static void __insert_discard_cmd(struct f2fs_sb_info *sbi, p = &(*p)->rb_right; leftmost = false; } else { - f2fs_bug_on(sbi, 1); + /* Let's skip to add, if exists */ + return; } } @@ -1785,15 +1837,24 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) dc = __lookup_discard_cmd(sbi, blkaddr); #ifdef CONFIG_BLK_DEV_ZONED if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) { - /* force submit zone reset */ - if (dc->state == D_PREP) - __submit_zone_reset_cmd(sbi, dc, REQ_SYNC, - &dcc->wait_list, NULL); - dc->ref++; - mutex_unlock(&dcc->cmd_lock); - /* wait zone reset */ - __wait_one_discard_bio(sbi, dc); - return; + int devi = f2fs_bdev_index(sbi, dc->bdev); + + if (devi < 0) { + mutex_unlock(&dcc->cmd_lock); + return; + } + + if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) { + /* force submit zone reset */ + if (dc->state == D_PREP) + __submit_zone_reset_cmd(sbi, dc, REQ_SYNC, + &dcc->wait_list, NULL); + dc->ref++; + mutex_unlock(&dcc->cmd_lock); + /* wait zone reset */ + __wait_one_discard_bio(sbi, dc); + return; + } } #endif if (dc) { @@ -1863,9 +1924,8 @@ static int issue_discard_thread(void *data) set_freezable(); do { - wait_event_interruptible_timeout(*q, - kthread_should_stop() || freezing(current) || - dcc->discard_wake, + wait_event_freezable_timeout(*q, + kthread_should_stop() || dcc->discard_wake, msecs_to_jiffies(wait_ms)); if (sbi->gc_mode == GC_URGENT_HIGH || @@ -1883,8 +1943,6 @@ static int issue_discard_thread(void *data) if (atomic_read(&dcc->queued_discard)) __wait_all_discard_cmd(sbi, NULL); - if (try_to_freeze()) - continue; if (f2fs_readonly(sbi->sb)) continue; if (kthread_should_stop()) @@ -1950,9 +2008,15 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) { + unsigned int nofs_flags; + int ret; + trace_f2fs_issue_reset_zone(bdev, blkstart); - return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - sector, nr_sects, GFP_NOFS); + nofs_flags = memalloc_nofs_save(); + ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, + sector, nr_sects); + memalloc_nofs_restore(nofs_flags); + return ret; } __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen); @@ -2021,7 +2085,6 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, bool check_only) { int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); - int max_blocks = sbi->blocks_per_seg; struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; @@ -2033,12 +2096,15 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; - if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) || - !f2fs_block_unit_discard(sbi)) + if (se->valid_blocks == BLKS_PER_SEG(sbi) || + !f2fs_hw_support_discard(sbi) || + !f2fs_block_unit_discard(sbi)) return false; if (!force) { - if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || + if (!f2fs_realtime_discard_enable(sbi) || + (!se->valid_blocks && + !is_curseg(sbi, cpc->trim_start)) || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -2051,13 +2117,14 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, while (force || SM_I(sbi)->dcc_info->nr_discards <= SM_I(sbi)->dcc_info->max_discards) { - start = __find_rev_next_bit(dmap, max_blocks, end + 1); - if (start >= max_blocks) + start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1); + if (start >= BLKS_PER_SEG(sbi)) break; - end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); - if (force && start && end != max_blocks - && (end - start) < cpc->trim_minlen) + end = __find_rev_next_zero_bit(dmap, + BLKS_PER_SEG(sbi), start + 1); + if (force && start && end != BLKS_PER_SEG(sbi) && + (end - start) < cpc->trim_minlen) continue; if (check_only) @@ -2139,8 +2206,8 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, start + 1); if (section_alignment) { - start = rounddown(start, sbi->segs_per_sec); - end = roundup(end, sbi->segs_per_sec); + start = rounddown(start, SEGS_PER_SEC(sbi)); + end = roundup(end, SEGS_PER_SEC(sbi)); } for (i = start; i < end; i++) { @@ -2159,18 +2226,18 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, if (!f2fs_sb_has_blkzoned(sbi) && (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), - (end - start) << sbi->log_blocks_per_seg); + SEGS_TO_BLKS(sbi, end - start)); continue; } next: secno = GET_SEC_FROM_SEG(sbi, start); start_segno = GET_SEG_FROM_SEC(sbi, secno); - if (!IS_CURSEC(sbi, secno) && + if (!is_cursec(sbi, secno) && !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), - sbi->segs_per_sec << sbi->log_blocks_per_seg); + BLKS_PER_SEC(sbi)); - start = start_segno + sbi->segs_per_sec; + start = start_segno + SEGS_PER_SEC(sbi); if (start < end) goto next; else @@ -2189,11 +2256,11 @@ next: find_next: if (is_valid) { next_pos = find_next_zero_bit_le(entry->discard_map, - sbi->blocks_per_seg, cur_pos); + BLKS_PER_SEG(sbi), cur_pos); len = next_pos - cur_pos; if (f2fs_sb_has_blkzoned(sbi) || - !force || len < cpc->trim_minlen) + (force && len < cpc->trim_minlen)) goto skip; f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, @@ -2201,13 +2268,13 @@ find_next: total_len += len; } else { next_pos = find_next_bit_le(entry->discard_map, - sbi->blocks_per_seg, cur_pos); + BLKS_PER_SEG(sbi), cur_pos); } skip: cur_pos = next_pos; is_valid = !is_valid; - if (cur_pos < sbi->blocks_per_seg) + if (cur_pos < BLKS_PER_SEG(sbi)) goto find_next; release_discard_addr(entry); @@ -2224,6 +2291,12 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; int err = 0; + if (f2fs_sb_has_readonly(sbi)) { + f2fs_info(sbi, + "Skip to start discard thread for readonly image"); + return 0; + } + if (!f2fs_realtime_discard_enable(sbi)) return 0; @@ -2254,10 +2327,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->discard_io_aware_gran = MAX_PLIST_NUM; dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY; - if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) - dcc->discard_granularity = sbi->blocks_per_seg; - else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) - dcc->discard_granularity = BLKS_PER_SEC(sbi); + dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE; + if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT || + F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) + dcc->discard_granularity = BLKS_PER_SEG(sbi); INIT_LIST_HEAD(&dcc->entry_list); for (i = 0; i < MAX_PLIST_NUM; i++) @@ -2269,7 +2342,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->queued_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; - dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi)); dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST; dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; @@ -2366,76 +2439,38 @@ static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, SIT_I(sbi)->max_mtime = ctime; } -static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +/* + * NOTE: when updating multiple blocks at the same time, please ensure + * that the consecutive input blocks belong to the same segment. + */ +static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_entry *se, + unsigned int segno, block_t blkaddr, unsigned int offset, int del) { - struct seg_entry *se; - unsigned int segno, offset; - long int new_vblocks; bool exist; #ifdef CONFIG_F2FS_CHECK_FS bool mir_exist; #endif + int i; + int del_count = -del; - segno = GET_SEGNO(sbi, blkaddr); - - se = get_seg_entry(sbi, segno); - new_vblocks = se->valid_blocks + del; - offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); - - f2fs_bug_on(sbi, (new_vblocks < 0 || - (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); - - se->valid_blocks = new_vblocks; - - /* Update valid block bitmap */ - if (del > 0) { - exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); -#ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_set_bit(offset, - se->cur_valid_map_mir); - if (unlikely(exist != mir_exist)) { - f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); - f2fs_bug_on(sbi, 1); - } -#endif - if (unlikely(exist)) { - f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", - blkaddr); - f2fs_bug_on(sbi, 1); - se->valid_blocks--; - del = 0; - } - - if (f2fs_block_unit_discard(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) - sbi->discard_blks--; + f2fs_bug_on(sbi, GET_SEGNO(sbi, blkaddr) != GET_SEGNO(sbi, blkaddr + del_count - 1)); - /* - * SSR should never reuse block which is checkpointed - * or newly invalidated. - */ - if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { - if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks++; - } - } else { - exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); + for (i = 0; i < del_count; i++) { + exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_clear_bit(offset, + mir_exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); + blkaddr + i, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(!exist)) { - f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", - blkaddr); + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr + i); f2fs_bug_on(sbi, 1); se->valid_blocks++; - del = 0; + del += 1; } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { /* * If checkpoints are off, we must not reuse data that @@ -2443,7 +2478,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) * before, we must track that to know how much space we * really have. */ - if (f2fs_test_bit(offset, se->ckpt_valid_map)) { + if (f2fs_test_bit(offset + i, se->ckpt_valid_map)) { spin_lock(&sbi->stat_lock); sbi->unusable_block_count++; spin_unlock(&sbi->stat_lock); @@ -2451,11 +2486,105 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) } if (f2fs_block_unit_discard(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) + f2fs_test_and_clear_bit(offset + i, se->discard_map)) sbi->discard_blks++; + + if (!f2fs_test_bit(offset + i, se->ckpt_valid_map)) { + se->ckpt_valid_blocks -= 1; + if (__is_large_section(sbi)) + get_sec_entry(sbi, segno)->ckpt_valid_blocks -= 1; + } + } + + if (__is_large_section(sbi)) + sanity_check_valid_blocks(sbi, segno); + + return del; +} + +static int update_sit_entry_for_alloc(struct f2fs_sb_info *sbi, struct seg_entry *se, + unsigned int segno, block_t blkaddr, unsigned int offset, int del) +{ + bool exist; +#ifdef CONFIG_F2FS_CHECK_FS + bool mir_exist; +#endif + + exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + mir_exist = f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); + f2fs_bug_on(sbi, 1); + } +#endif + if (unlikely(exist)) { + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); + se->valid_blocks--; + del = 0; } - if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + + if (f2fs_block_unit_discard(sbi) && + !f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; + + /* + * SSR should never reuse block which is checkpointed + * or newly invalidated. + */ + if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { + if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) { + se->ckpt_valid_blocks++; + if (__is_large_section(sbi)) + get_sec_entry(sbi, segno)->ckpt_valid_blocks++; + } + } + + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) { se->ckpt_valid_blocks += del; + if (__is_large_section(sbi)) + get_sec_entry(sbi, segno)->ckpt_valid_blocks += del; + } + + if (__is_large_section(sbi)) + sanity_check_valid_blocks(sbi, segno); + + return del; +} + +/* + * If releasing blocks, this function supports updating multiple consecutive blocks + * at one time, but please note that these consecutive blocks need to belong to the + * same segment. + */ +static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +{ + struct seg_entry *se; + unsigned int segno, offset; + long int new_vblocks; + + segno = GET_SEGNO(sbi, blkaddr); + if (segno == NULL_SEGNO) + return; + + se = get_seg_entry(sbi, segno); + new_vblocks = se->valid_blocks + del; + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + + f2fs_bug_on(sbi, (new_vblocks < 0 || + (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); + + se->valid_blocks = new_vblocks; + + /* Update valid block bitmap */ + if (del > 0) { + del = update_sit_entry_for_alloc(sbi, se, segno, blkaddr, offset, del); + } else { + del = update_sit_entry_for_release(sbi, se, segno, blkaddr, offset, del); + } __mark_sit_entry_dirty(sbi, segno); @@ -2466,26 +2595,43 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) get_sec_entry(sbi, segno)->valid_blocks += del; } -void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len) { unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); + block_t addr_start = addr, addr_end = addr + len - 1; + unsigned int seg_num = GET_SEGNO(sbi, addr_end) - segno + 1; + unsigned int i = 1, max_blocks = sbi->blocks_per_seg, cnt; f2fs_bug_on(sbi, addr == NULL_ADDR); if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; - invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); - f2fs_invalidate_compress_page(sbi, addr); + f2fs_invalidate_internal_cache(sbi, addr, len); /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); - update_segment_mtime(sbi, addr, 0); - update_sit_entry(sbi, addr, -1); + if (seg_num == 1) + cnt = len; + else + cnt = max_blocks - GET_BLKOFF_FROM_SEG0(sbi, addr); + + do { + update_segment_mtime(sbi, addr_start, 0); + update_sit_entry(sbi, addr_start, -cnt); - /* add it into dirty seglist */ - locate_dirty_segment(sbi, segno); + /* add it into dirty seglist */ + locate_dirty_segment(sbi, segno); + + /* update @addr_start and @cnt and @segno */ + addr_start = START_BLOCK(sbi, ++segno); + if (++i == seg_num) + cnt = GET_BLKOFF_FROM_SEG0(sbi, addr_end) + 1; + else + cnt = max_blocks; + } while (i <= seg_num); up_write(&sit_i->sentry_lock); } @@ -2519,7 +2665,7 @@ static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int typ struct curseg_info *curseg = CURSEG_I(sbi, type); if (sbi->ckpt->alloc_type[type] == SSR) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); return curseg->next_blkoff; } @@ -2550,40 +2696,60 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) } /* - * Caller should put this summary page + * Caller should put this summary folio */ -struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) +struct folio *f2fs_get_sum_folio(struct f2fs_sb_info *sbi, unsigned int segno) { if (unlikely(f2fs_cp_error(sbi))) return ERR_PTR(-EIO); - return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno)); + return f2fs_get_meta_folio_retry(sbi, GET_SUM_BLOCK(sbi, segno)); } void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { - struct page *page = f2fs_grab_meta_page(sbi, blk_addr); + struct folio *folio; - memcpy(page_address(page), src, PAGE_SIZE); - set_page_dirty(page); - f2fs_put_page(page, 1); + if (SUMS_PER_BLOCK == 1) + folio = f2fs_grab_meta_folio(sbi, blk_addr); + else + folio = f2fs_get_meta_folio_retry(sbi, blk_addr); + + if (IS_ERR(folio)) + return; + + memcpy(folio_address(folio), src, PAGE_SIZE); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } static void write_sum_page(struct f2fs_sb_info *sbi, - struct f2fs_summary_block *sum_blk, block_t blk_addr) + struct f2fs_summary_block *sum_blk, unsigned int segno) { - f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr); + struct folio *folio; + + if (SUMS_PER_BLOCK == 1) + return f2fs_update_meta_page(sbi, (void *)sum_blk, + GET_SUM_BLOCK(sbi, segno)); + + folio = f2fs_get_sum_folio(sbi, segno); + if (IS_ERR(folio)) + return; + + memcpy(SUM_BLK_PAGE_ADDR(folio, segno), sum_blk, sizeof(*sum_blk)); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } static void write_current_sum_page(struct f2fs_sb_info *sbi, int type, block_t blk_addr) { struct curseg_info *curseg = CURSEG_I(sbi, type); - struct page *page = f2fs_grab_meta_page(sbi, blk_addr); + struct folio *folio = f2fs_grab_meta_folio(sbi, blk_addr); struct f2fs_summary_block *src = curseg->sum_blk; struct f2fs_summary_block *dst; - dst = (struct f2fs_summary_block *)page_address(page); + dst = folio_address(folio); memset(dst, 0, PAGE_SIZE); mutex_lock(&curseg->curseg_mutex); @@ -2597,17 +2763,17 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, mutex_unlock(&curseg->curseg_mutex); - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } static int is_next_segment_free(struct f2fs_sb_info *sbi, - struct curseg_info *curseg, int type) + struct curseg_info *curseg) { unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); - if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) + if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi)) return !test_bit(segno, free_i->free_segmap); return 0; } @@ -2616,54 +2782,93 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG */ -static void get_new_segment(struct f2fs_sb_info *sbi, - unsigned int *newseg, bool new_sec, int dir) +static int get_new_segment(struct f2fs_sb_info *sbi, + unsigned int *newseg, bool new_sec, bool pinning) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); - unsigned int left_start = hint; + unsigned int alloc_policy = sbi->allocate_section_policy; + unsigned int alloc_hint = sbi->allocate_section_hint; bool init = true; - int go_left = 0; int i; + int ret = 0; spin_lock(&free_i->segmap_lock); - if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { + if (time_to_inject(sbi, FAULT_NO_SEGMENT)) { + ret = -ENOSPC; + goto out_unlock; + } + + if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) { segno = find_next_zero_bit(free_i->free_segmap, GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) goto got_it; } + +#ifdef CONFIG_BLK_DEV_ZONED + /* + * If we format f2fs on zoned storage, let's try to get pinned sections + * from beginning of the storage, which should be a conventional one. + */ + if (f2fs_sb_has_blkzoned(sbi)) { + /* Prioritize writing to conventional zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning) + segno = 0; + else + segno = max(sbi->first_seq_zone_segno, *newseg); + hint = GET_SEC_FROM_SEG(sbi, segno); + } +#endif + + /* + * Prevent allocate_section_hint from exceeding MAIN_SECS() + * due to desynchronization. + */ + if (alloc_policy != ALLOCATE_FORWARD_NOHINT && + alloc_hint > MAIN_SECS(sbi)) + alloc_hint = MAIN_SECS(sbi); + + if (alloc_policy == ALLOCATE_FORWARD_FROM_HINT && + hint < alloc_hint) + hint = alloc_hint; + else if (alloc_policy == ALLOCATE_FORWARD_WITHIN_HINT && + hint >= alloc_hint) + hint = 0; + find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); - if (secno >= MAIN_SECS(sbi)) { - if (dir == ALLOC_RIGHT) { + +#ifdef CONFIG_BLK_DEV_ZONED + if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) { + /* Write only to sequential zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) { + hint = GET_SEC_FROM_SEG(sbi, sbi->first_seq_zone_segno); + secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + } else secno = find_first_zero_bit(free_i->free_secmap, - MAIN_SECS(sbi)); - f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); - } else { - go_left = 1; - left_start = hint - 1; + MAIN_SECS(sbi)); + if (secno >= MAIN_SECS(sbi)) { + ret = -ENOSPC; + f2fs_bug_on(sbi, 1); + goto out_unlock; } } - if (go_left == 0) - goto skip_left; +#endif - while (test_bit(left_start, free_i->free_secmap)) { - if (left_start > 0) { - left_start--; - continue; - } - left_start = find_first_zero_bit(free_i->free_secmap, + if (secno >= MAIN_SECS(sbi)) { + secno = find_first_zero_bit(free_i->free_secmap, MAIN_SECS(sbi)); - f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); - break; + if (secno >= MAIN_SECS(sbi)) { + ret = -ENOSPC; + f2fs_bug_on(sbi, !pinning); + goto out_unlock; + } } - secno = left_start; -skip_left: segno = GET_SEG_FROM_SEC(sbi, secno); zoneno = GET_ZONE_FROM_SEC(sbi, secno); @@ -2674,21 +2879,13 @@ skip_left: goto got_it; if (zoneno == old_zoneno) goto got_it; - if (dir == ALLOC_LEFT) { - if (!go_left && zoneno + 1 >= total_zones) - goto got_it; - if (go_left && zoneno == 0) - goto got_it; - } for (i = 0; i < NR_CURSEG_TYPE; i++) if (CURSEG_I(sbi, i)->zone == zoneno) break; if (i < NR_CURSEG_TYPE) { /* zone is in user, try another */ - if (go_left) - hint = zoneno * sbi->secs_per_zone - 1; - else if (zoneno + 1 >= total_zones) + if (zoneno + 1 >= total_zones) hint = 0; else hint = (zoneno + 1) * sbi->secs_per_zone; @@ -2697,10 +2894,26 @@ skip_left: } got_it: /* set it as dirty segment in free segmap */ - f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); + if (test_bit(segno, free_i->free_segmap)) { + ret = -EFSCORRUPTED; + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_FREE_BITMAP); + goto out_unlock; + } + + /* no free section in conventional device or conventional zone */ + if (new_sec && pinning && + f2fs_is_sequential_zone_area(sbi, START_BLOCK(sbi, segno))) { + ret = -EAGAIN; + goto out_unlock; + } __set_inuse(sbi, segno); *newseg = segno; +out_unlock: spin_unlock(&free_i->segmap_lock); + + if (ret == -ENOSPC && !pinning) + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); + return ret; } static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) @@ -2709,6 +2922,10 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) struct summary_footer *sum_footer; unsigned short seg_type = curseg->seg_type; + /* only happen when get_new_segment() fails */ + if (curseg->next_segno == NULL_SEGNO) + return; + curseg->inited = true; curseg->segno = curseg->next_segno; curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); @@ -2733,12 +2950,19 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) unsigned short seg_type = curseg->seg_type; sanity_check_seg_type(sbi, seg_type); - if (f2fs_need_rand_seg(sbi)) - return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); + if (__is_large_section(sbi)) { + if (f2fs_need_rand_seg(sbi)) { + unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno); - /* if segs_per_sec is large than 1, we need to keep original policy. */ - if (__is_large_section(sbi)) + if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint) + return curseg->segno; + return get_random_u32_inclusive(curseg->segno + 1, + GET_SEG_FROM_SEC(sbi, hint + 1) - 1); + } return curseg->segno; + } else if (f2fs_need_rand_seg(sbi)) { + return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); + } /* inmem log may not locate on any segment after mount */ if (!curseg->inited) @@ -2747,8 +2971,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; - if (test_opt(sbi, NOHEAP) && - (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))) + if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) @@ -2761,34 +2984,42 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) return curseg->segno; } +static void reset_curseg_fields(struct curseg_info *curseg) +{ + curseg->inited = false; + curseg->segno = NULL_SEGNO; + curseg->next_segno = 0; +} + /* * Allocate a current working segment. * This function always allocates a free segment in LFS manner. */ -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) { struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned short seg_type = curseg->seg_type; unsigned int segno = curseg->segno; - int dir = ALLOC_LEFT; + bool pinning = type == CURSEG_COLD_DATA_PINNED; + int ret; if (curseg->inited) - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, segno)); - if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA) - dir = ALLOC_RIGHT; - - if (test_opt(sbi, NOHEAP)) - dir = ALLOC_RIGHT; + write_sum_page(sbi, curseg->sum_blk, segno); segno = __get_next_segno(sbi, type); - get_new_segment(sbi, &segno, new_sec, dir); + ret = get_new_segment(sbi, &segno, new_sec, pinning); + if (ret) { + if (ret == -ENOSPC) + reset_curseg_fields(curseg); + return ret; + } + curseg->next_segno = segno; reset_curseg(sbi, type, 1); curseg->alloc_type = LFS; if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) curseg->fragment_remained_chunk = get_random_u32_inclusive(1, sbi->max_fragment_chunk); + return 0; } static int __next_free_blkoff(struct f2fs_sb_info *sbi, @@ -2804,7 +3035,7 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi, for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; - return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); + return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start); } static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi, @@ -2815,22 +3046,23 @@ static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi, bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) { - return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg; + return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi); } /* * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ -static void change_curseg(struct f2fs_sb_info *sbi, int type) +static int change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int new_segno = curseg->next_segno; struct f2fs_summary_block *sum_node; - struct page *sum_page; + struct folio *sum_folio; - write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); + if (curseg->inited) + write_sum_page(sbi, curseg->sum_blk, curseg->segno); __set_test_and_inuse(sbi, new_segno); @@ -2843,25 +3075,27 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) curseg->alloc_type = SSR; curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0); - sum_page = f2fs_get_sum_page(sbi, new_segno); - if (IS_ERR(sum_page)) { + sum_folio = f2fs_get_sum_folio(sbi, new_segno); + if (IS_ERR(sum_folio)) { /* GC won't be able to use stale summary pages by cp_error */ memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); - return; + return PTR_ERR(sum_folio); } - sum_node = (struct f2fs_summary_block *)page_address(sum_page); + sum_node = SUM_BLK_PAGE_ADDR(sum_folio, new_segno); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); - f2fs_put_page(sum_page, 1); + f2fs_folio_put(sum_folio, true); + return 0; } static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, int alloc_mode, unsigned long long age); -static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, +static int get_atssr_segment(struct f2fs_sb_info *sbi, int type, int target_type, int alloc_mode, unsigned long long age) { struct curseg_info *curseg = CURSEG_I(sbi, type); + int ret = 0; curseg->seg_type = target_type; @@ -2869,38 +3103,62 @@ static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno); curseg->seg_type = se->type; - change_curseg(sbi, type); + ret = change_curseg(sbi, type); } else { /* allocate cold segment by default */ curseg->seg_type = CURSEG_COLD_DATA; - new_curseg(sbi, type, true); + ret = new_curseg(sbi, type, true); } stat_inc_seg_type(sbi, curseg); + return ret; } -static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) +static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC); + int ret = 0; - if (!sbi->am.atgc_enabled) - return; + if (!sbi->am.atgc_enabled && !force) + return 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); - get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0); + ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, + CURSEG_COLD_DATA, SSR, 0); up_write(&SIT_I(sbi)->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); + return ret; +} +int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) +{ + return __f2fs_init_atgc_curseg(sbi, false); } -void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) + +int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi) { - __f2fs_init_atgc_curseg(sbi); + int ret; + + if (!test_opt(sbi, ATGC)) + return 0; + if (sbi->am.atgc_enabled) + return 0; + if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) < + sbi->am.age_threshold) + return 0; + + ret = __f2fs_init_atgc_curseg(sbi, true); + if (!ret) { + sbi->am.atgc_enabled = true; + f2fs_info(sbi, "reenabled age threshold GC"); + } + return ret; } static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) @@ -2912,8 +3170,7 @@ static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) goto out; if (get_valid_blocks(sbi, curseg->segno, false)) { - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); + write_sum_page(sbi, curseg->sum_blk, curseg->segno); } else { mutex_lock(&DIRTY_I(sbi)->seglist_lock); __set_test_and_free(sbi, curseg->segno, true); @@ -2968,7 +3225,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, sanity_check_seg_type(sbi, seg_type); /* f2fs_need_SSR() already forces to do this */ - if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } @@ -2995,7 +3253,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, for (; cnt-- > 0; reversed ? i-- : i++) { if (i == seg_type) continue; - if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, i, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } @@ -3019,8 +3278,7 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type) if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && curseg->seg_type == CURSEG_WARM_NODE) return true; - if (curseg->alloc_type == LFS && - is_next_segment_free(sbi, curseg, type) && + if (curseg->alloc_type == LFS && is_next_segment_free(sbi, curseg) && likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0)) @@ -3028,11 +3286,12 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type) return false; } -void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno; + int ret = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); @@ -3043,9 +3302,9 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, goto unlock; if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) - change_curseg(sbi, type); + ret = change_curseg(sbi, type); else - new_curseg(sbi, type, true); + ret = new_curseg(sbi, type, true); stat_inc_seg_type(sbi, curseg); @@ -3059,45 +3318,85 @@ unlock: mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); + return ret; } -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, bool new_sec, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; + int err = 0; + + if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited) + goto allocate; if (!force && curseg->inited && !curseg->next_blkoff && !get_valid_blocks(sbi, curseg->segno, new_sec) && !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) - return; + return 0; +allocate: old_segno = curseg->segno; - new_curseg(sbi, type, true); + err = new_curseg(sbi, type, true); + if (err) + return err; stat_inc_seg_type(sbi, curseg); locate_dirty_segment(sbi, old_segno); + return 0; } -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { + int ret; + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); - __allocate_new_segment(sbi, type, true, force); + ret = __allocate_new_segment(sbi, type, true, force); up_write(&SIT_I(sbi)->sentry_lock); f2fs_up_read(&SM_I(sbi)->curseg_lock); + + return ret; } -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) +{ + int err; + bool gc_required = true; + +retry: + f2fs_lock_op(sbi); + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); + f2fs_unlock_op(sbi); + + if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) { + f2fs_down_write(&sbi->gc_lock); + err = f2fs_gc_range(sbi, 0, sbi->first_seq_zone_segno - 1, + true, ZONED_PIN_SEC_REQUIRED_COUNT); + f2fs_up_write(&sbi->gc_lock); + + gc_required = false; + if (!err) + goto retry; + } + + return err; +} + +int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { int i; + int err = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) - __allocate_new_segment(sbi, i, false, false); + err += __allocate_new_segment(sbi, i, false, false); up_write(&SIT_I(sbi)->sentry_lock); f2fs_up_read(&SM_I(sbi)->curseg_lock); + + return err; } bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, @@ -3168,7 +3467,7 @@ next: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); trimmed += __wait_all_discard_cmd(sbi, NULL); - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + f2fs_schedule_timeout(DEFAULT_DISCARD_INTERVAL); goto next; } skip: @@ -3215,8 +3514,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); if (need_align) { - start_segno = rounddown(start_segno, sbi->segs_per_sec); - end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1; + start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi)); + end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1; } cpc.reason = CP_DISCARD; @@ -3228,6 +3527,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) goto out; f2fs_down_write(&sbi->gc_lock); + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); f2fs_up_write(&sbi->gc_lock); if (err) @@ -3257,8 +3557,14 @@ out: return err; } -int f2fs_rw_hint_to_seg_type(enum rw_hint hint) +int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint) { + if (F2FS_OPTION(sbi).active_logs == 2) + return CURSEG_HOT_DATA; + else if (F2FS_OPTION(sbi).active_logs == 4) + return CURSEG_COLD_DATA; + + /* active_log == 6 */ switch (hint) { case WRITE_LIFE_SHORT: return CURSEG_HOT_DATA; @@ -3269,6 +3575,65 @@ int f2fs_rw_hint_to_seg_type(enum rw_hint hint) } } +/* + * This returns write hints for each segment type. This hints will be + * passed down to block layer as below by default. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_NONE|REQ_META + * HOT_NODE WRITE_LIFE_NONE + * WARM_NODE WRITE_LIFE_MEDIUM + * COLD_NODE WRITE_LIFE_LONG + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * COLD_DATA WRITE_LIFE_EXTREME + * HOT_DATA WRITE_LIFE_SHORT + * WARM_DATA WRITE_LIFE_NOT_SET + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + */ +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) +{ + switch (type) { + case DATA: + switch (temp) { + case WARM: + return WRITE_LIFE_NOT_SET; + case HOT: + return WRITE_LIFE_SHORT; + case COLD: + return WRITE_LIFE_EXTREME; + default: + return WRITE_LIFE_NONE; + } + case NODE: + switch (temp) { + case WARM: + return WRITE_LIFE_MEDIUM; + case HOT: + return WRITE_LIFE_NONE; + case COLD: + return WRITE_LIFE_LONG; + default: + return WRITE_LIFE_NONE; + } + case META: + return WRITE_LIFE_NONE; + default: + return WRITE_LIFE_NONE; + } +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -3280,14 +3645,14 @@ static int __get_segment_type_2(struct f2fs_io_info *fio) static int __get_segment_type_4(struct f2fs_io_info *fio) { if (fio->type == DATA) { - struct inode *inode = fio->page->mapping->host; + struct inode *inode = fio_inode(fio); if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(fio->page) && is_cold_node(fio->page)) + if (IS_DNODE(fio->folio) && is_cold_node(fio->folio)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; @@ -3314,7 +3679,7 @@ static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs) static int __get_segment_type_6(struct f2fs_io_info *fio) { if (fio->type == DATA) { - struct inode *inode = fio->page->mapping->host; + struct inode *inode = fio_inode(fio); int type; if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) @@ -3323,7 +3688,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (page_private_gcing(fio->page)) { if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && - (fio->sbi->gc_mode != GC_URGENT_HIGH)) + (fio->sbi->gc_mode != GC_URGENT_HIGH) && + __is_valid_data_blkaddr(fio->old_blkaddr) && + !is_inode_flag_set(inode, FI_OPU_WRITE)) return CURSEG_ALL_DATA_ATGC; else return CURSEG_COLD_DATA; @@ -3331,26 +3698,54 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; - type = __get_age_segment_type(inode, fio->page->index); + type = __get_age_segment_type(inode, fio->folio->index); if (type != NO_CHECK_TYPE) return type; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || - f2fs_is_cow_file(inode)) + f2fs_is_cow_file(inode) || + is_inode_flag_set(inode, FI_NEED_IPU)) return CURSEG_HOT_DATA; - return f2fs_rw_hint_to_seg_type(inode->i_write_hint); + return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), + inode->i_write_hint); } else { - if (IS_DNODE(fio->page)) - return is_cold_node(fio->page) ? CURSEG_WARM_NODE : + if (IS_DNODE(fio->folio)) + return is_cold_node(fio->folio) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; } } +enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, + enum log_type type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + enum temp_type temp = COLD; + + switch (curseg->seg_type) { + case CURSEG_HOT_NODE: + case CURSEG_HOT_DATA: + temp = HOT; + break; + case CURSEG_WARM_NODE: + case CURSEG_WARM_DATA: + temp = WARM; + break; + case CURSEG_COLD_NODE: + case CURSEG_COLD_DATA: + temp = COLD; + break; + default: + f2fs_bug_on(sbi, 1); + } + + return temp; +} + static int __get_segment_type(struct f2fs_io_info *fio) { - int type = 0; + enum log_type type = CURSEG_HOT_DATA; switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: @@ -3366,12 +3761,8 @@ static int __get_segment_type(struct f2fs_io_info *fio) f2fs_bug_on(fio->sbi, true); } - if (IS_HOT(type)) - fio->temp = HOT; - else if (IS_WARM(type)) - fio->temp = WARM; - else - fio->temp = COLD; + fio->temp = f2fs_get_segment_temp(fio->sbi, type); + return type; } @@ -3388,7 +3779,7 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi, get_random_u32_inclusive(1, sbi->max_fragment_hole); } -void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio) @@ -3399,12 +3790,18 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; bool segment_full = false; + int ret = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); + if (curseg->segno == NULL_SEGNO) { + ret = -ENOSPC; + goto out_err; + } + if (from_gc) { f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO); se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr)); @@ -3413,7 +3810,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg); + f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi)); f2fs_wait_discard_bio(sbi, *new_blkaddr); @@ -3442,25 +3839,36 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, * since SSR needs latest valid block information. */ update_sit_entry(sbi, *new_blkaddr, 1); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) - update_sit_entry(sbi, old_blkaddr, -1); + update_sit_entry(sbi, old_blkaddr, -1); /* * If the current segment is full, flush it out and replace it with a * new segment. */ if (segment_full) { + if (type == CURSEG_COLD_DATA_PINNED && + !((curseg->segno + 1) % sbi->segs_per_sec)) { + write_sum_page(sbi, curseg->sum_blk, curseg->segno); + reset_curseg_fields(curseg); + goto skip_new_segment; + } + if (from_gc) { - get_atssr_segment(sbi, type, se->type, + ret = get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); } else { if (need_new_seg(sbi, type)) - new_curseg(sbi, type, false); + ret = new_curseg(sbi, type, false); else - change_curseg(sbi, type); + ret = change_curseg(sbi, type); stat_inc_seg_type(sbi, curseg); } + + if (ret) + goto out_err; } + +skip_new_segment: /* * segment dirty status should be updated after segment allocation, * so we just need to update status only one time after previous @@ -3469,23 +3877,25 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); - if (IS_DATASEG(type)) - atomic64_inc(&sbi->allocated_data_blocks); + if (IS_DATASEG(curseg->seg_type)) { + unsigned long long new_val; + + new_val = atomic64_inc_return(&sbi->allocated_data_blocks); + if (unlikely(new_val == ULLONG_MAX)) + atomic64_set(&sbi->allocated_data_blocks, 0); + } up_write(&sit_i->sentry_lock); - if (page && IS_NODESEG(type)) { - fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + if (folio && IS_NODESEG(curseg->seg_type)) { + fill_node_footer_blkaddr(folio, NEXT_FREE_BLKADDR(sbi, curseg)); - f2fs_inode_chksum_set(sbi, page); + f2fs_inode_chksum_set(sbi, folio); } if (fio) { struct f2fs_bio_info *io; - if (F2FS_IO_ALIGNED(sbi)) - fio->retry = 0; - INIT_LIST_HEAD(&fio->list); fio->in_list = 1; io = sbi->write_io[fio->type] + fio->temp; @@ -3495,8 +3905,15 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } mutex_unlock(&curseg->curseg_mutex); + f2fs_up_read(&SM_I(sbi)->curseg_lock); + return 0; +out_err: + *new_blkaddr = NULL_ADDR; + up_write(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); + return ret; } void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, @@ -3526,36 +3943,74 @@ void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, } } +static int log_type_to_seg_type(enum log_type type) +{ + int seg_type = CURSEG_COLD_DATA; + + switch (type) { + case CURSEG_HOT_DATA: + case CURSEG_WARM_DATA: + case CURSEG_COLD_DATA: + case CURSEG_HOT_NODE: + case CURSEG_WARM_NODE: + case CURSEG_COLD_NODE: + seg_type = (int)type; + break; + case CURSEG_COLD_DATA_PINNED: + case CURSEG_ALL_DATA_ATGC: + seg_type = CURSEG_COLD_DATA; + break; + default: + break; + } + return seg_type; +} + static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(fio); - bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); + struct folio *folio = fio->folio; + enum log_type type = __get_segment_type(fio); + int seg_type = log_type_to_seg_type(type); + bool keep_order = (f2fs_lfs_mode(fio->sbi) && + seg_type == CURSEG_COLD_DATA); + int err; if (keep_order) f2fs_down_read(&fio->sbi->io_order_lock); -reallocate: - f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + + err = f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio); - if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) { - invalidate_mapping_pages(META_MAPPING(fio->sbi), - fio->old_blkaddr, fio->old_blkaddr); - f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr); + if (unlikely(err)) { + f2fs_err_ratelimited(fio->sbi, + "%s Failed to allocate data block, ino:%u, index:%lu, type:%d, old_blkaddr:0x%x, new_blkaddr:0x%x, err:%d", + __func__, fio->ino, folio->index, type, + fio->old_blkaddr, fio->new_blkaddr, err); + if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host)) + fscrypt_finalize_bounce_page(&fio->encrypted_page); + folio_end_writeback(folio); + if (f2fs_in_warm_node_list(fio->sbi, folio)) + f2fs_del_fsync_node_entry(fio->sbi, folio); + f2fs_bug_on(fio->sbi, !is_set_ckpt_flags(fio->sbi, + CP_ERROR_FLAG)); + goto out; } + f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr_raw(fio->sbi, + fio->new_blkaddr, DATA_GENERIC_ENHANCE)); + + if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) + f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1); + /* writeout dirty page into bdev */ f2fs_submit_page_write(fio); - if (fio->retry) { - fio->old_blkaddr = fio->new_blkaddr; - goto reallocate; - } f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); - +out: if (keep_order) f2fs_up_read(&fio->sbi->io_order_lock); } -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type) { struct f2fs_io_info fio = { @@ -3564,20 +4019,20 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, - .old_blkaddr = page->index, - .new_blkaddr = page->index, - .page = page, + .old_blkaddr = folio->index, + .new_blkaddr = folio->index, + .folio = folio, .encrypted_page = NULL, .in_list = 0, }; - if (unlikely(page->index >= MAIN_BLKADDR(sbi))) + if (unlikely(folio->index >= MAIN_BLKADDR(sbi))) fio.op_flags &= ~REQ_META; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_submit_page_write(&fio); - stat_inc_meta_count(sbi, page->index); + stat_inc_meta_count(sbi, folio->index); f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE); } @@ -3633,9 +4088,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) goto drop_bio; } - if (fio->post_read) - invalidate_mapping_pages(META_MAPPING(sbi), - fio->new_blkaddr, fio->new_blkaddr); + if (fio->meta_gc) + f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1); stat_inc_inplace_blocks(fio->sbi); @@ -3646,7 +4100,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) if (!err) { f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); - f2fs_update_iostat(fio->sbi, fio->page->mapping->host, + f2fs_update_iostat(fio->sbi, fio_inode(fio), fio->io_type, F2FS_BLKSIZE); } @@ -3695,14 +4149,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (!recover_curseg) { /* for recovery flow */ - if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (se->valid_blocks == 0 && !is_curseg(sbi, segno)) { if (old_blkaddr == NULL_ADDR) type = CURSEG_COLD_DATA; else type = CURSEG_WARM_DATA; } } else { - if (IS_CURSEG(sbi, segno)) { + if (is_curseg(sbi, segno)) { /* se->type is volatile as SSR allocation */ type = __f2fs_get_curseg(sbi, segno); f2fs_bug_on(sbi, type == NO_CHECK_TYPE); @@ -3711,8 +4165,8 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } } - f2fs_bug_on(sbi, !IS_DATASEG(type)); curseg = CURSEG_I(sbi, type); + f2fs_bug_on(sbi, !IS_DATASEG(curseg->seg_type)); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); @@ -3724,7 +4178,8 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; - change_curseg(sbi, type); + if (change_curseg(sbi, type)) + goto out_unlock; } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); @@ -3736,9 +4191,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, update_sit_entry(sbi, new_blkaddr, 1); } if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { - invalidate_mapping_pages(META_MAPPING(sbi), - old_blkaddr, old_blkaddr); - f2fs_invalidate_compress_page(sbi, old_blkaddr); + f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); if (!from_gc) update_segment_mtime(sbi, old_blkaddr, 0); update_sit_entry(sbi, old_blkaddr, -1); @@ -3752,12 +4205,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; - change_curseg(sbi, type); + if (change_curseg(sbi, type)) + goto out_unlock; } curseg->next_blkoff = old_blkoff; curseg->alloc_type = old_alloc_type; } +out_unlock: up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_write(&SM_I(sbi)->curseg_lock); @@ -3778,21 +4233,21 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, f2fs_update_data_blkaddr(dn, new_addr); } -void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered, bool locked) +void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type, + bool ordered, bool locked) { - if (PageWriteback(page)) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); + if (folio_test_writeback(folio)) { + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); /* submit cached LFS IO */ - f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); + f2fs_submit_merged_write_cond(sbi, NULL, folio, 0, type); /* submit cached IPU IO */ - f2fs_submit_merged_ipu_write(sbi, NULL, page); + f2fs_submit_merged_ipu_write(sbi, NULL, folio); if (ordered) { - wait_on_page_writeback(page); - f2fs_bug_on(sbi, locked && PageWriteback(page)); + folio_wait_writeback(folio); + f2fs_bug_on(sbi, locked && folio_test_writeback(folio)); } else { - wait_for_stable_page(page); + folio_wait_stable(folio); } } } @@ -3800,18 +4255,18 @@ void f2fs_wait_on_page_writeback(struct page *page, void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *cpage; + struct folio *cfolio; - if (!f2fs_post_read_required(inode)) + if (!f2fs_meta_inode_gc_required(inode)) return; if (!__is_valid_data_blkaddr(blkaddr)) return; - cpage = find_lock_page(META_MAPPING(sbi), blkaddr); - if (cpage) { - f2fs_wait_on_page_writeback(cpage, DATA, true, true); - f2fs_put_page(cpage, 1); + cfolio = filemap_lock_folio(META_MAPPING(sbi), blkaddr); + if (!IS_ERR(cfolio)) { + f2fs_folio_wait_writeback(cfolio, DATA, true, true); + f2fs_folio_put(cfolio, true); } } @@ -3821,13 +4276,13 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); block_t i; - if (!f2fs_post_read_required(inode)) + if (!f2fs_meta_inode_gc_required(inode)) return; for (i = 0; i < len; i++) f2fs_wait_on_block_writeback(inode, blkaddr + i); - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1); + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); } static int read_compacted_summaries(struct f2fs_sb_info *sbi) @@ -3835,16 +4290,16 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *seg_i; unsigned char *kaddr; - struct page *page; + struct folio *folio; block_t start; int i, j, offset; start = start_sum_block(sbi); - page = f2fs_get_meta_page(sbi, start++); - if (IS_ERR(page)) - return PTR_ERR(page); - kaddr = (unsigned char *)page_address(page); + folio = f2fs_get_meta_folio(sbi, start++); + if (IS_ERR(folio)) + return PTR_ERR(folio); + kaddr = folio_address(folio); /* Step 1: restore nat cache */ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -3869,7 +4324,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) seg_i->next_blkoff = blk_off; if (seg_i->alloc_type == SSR) - blk_off = sbi->blocks_per_seg; + blk_off = BLKS_PER_SEG(sbi); for (j = 0; j < blk_off; j++) { struct f2fs_summary *s; @@ -3881,17 +4336,16 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) SUM_FOOTER_SIZE) continue; - f2fs_put_page(page, 1); - page = NULL; + f2fs_folio_put(folio, true); - page = f2fs_get_meta_page(sbi, start++); - if (IS_ERR(page)) - return PTR_ERR(page); - kaddr = (unsigned char *)page_address(page); + folio = f2fs_get_meta_folio(sbi, start++); + if (IS_ERR(folio)) + return PTR_ERR(folio); + kaddr = folio_address(folio); offset = 0; } } - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return 0; } @@ -3900,7 +4354,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_summary_block *sum; struct curseg_info *curseg; - struct page *new; + struct folio *new; unsigned short blk_off; unsigned int segno = 0; block_t blk_addr = 0; @@ -3927,17 +4381,17 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) blk_addr = GET_SUM_BLOCK(sbi, segno); } - new = f2fs_get_meta_page(sbi, blk_addr); + new = f2fs_get_meta_folio(sbi, blk_addr); if (IS_ERR(new)) return PTR_ERR(new); - sum = (struct f2fs_summary_block *)page_address(new); + sum = folio_address(new); if (IS_NODESEG(type)) { if (__exist_node_summaries(sbi)) { struct f2fs_summary *ns = &sum->entries[0]; int i; - for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { + for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) { ns->version = 0; ns->ofs_in_node = 0; } @@ -3965,7 +4419,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) curseg->next_blkoff = blk_off; mutex_unlock(&curseg->curseg_mutex); out: - f2fs_put_page(new, 1); + f2fs_folio_put(new, true); return err; } @@ -4014,15 +4468,15 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) { - struct page *page; + struct folio *folio; unsigned char *kaddr; struct f2fs_summary *summary; struct curseg_info *seg_i; int written_size = 0; int i, j; - page = f2fs_grab_meta_page(sbi, blkaddr++); - kaddr = (unsigned char *)page_address(page); + folio = f2fs_grab_meta_folio(sbi, blkaddr++); + kaddr = folio_address(folio); memset(kaddr, 0, PAGE_SIZE); /* Step 1: write nat cache */ @@ -4039,9 +4493,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { seg_i = CURSEG_I(sbi, i); for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) { - if (!page) { - page = f2fs_grab_meta_page(sbi, blkaddr++); - kaddr = (unsigned char *)page_address(page); + if (!folio) { + folio = f2fs_grab_meta_folio(sbi, blkaddr++); + kaddr = folio_address(folio); memset(kaddr, 0, PAGE_SIZE); written_size = 0; } @@ -4053,14 +4507,14 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) SUM_FOOTER_SIZE) continue; - set_page_dirty(page); - f2fs_put_page(page, 1); - page = NULL; + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); + folio = NULL; } } - if (page) { - set_page_dirty(page); - f2fs_put_page(page, 1); + if (folio) { + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } } @@ -4113,29 +4567,29 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, return -1; } -static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, +static struct folio *get_current_sit_folio(struct f2fs_sb_info *sbi, unsigned int segno) { - return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno)); + return f2fs_get_meta_folio(sbi, current_sit_addr(sbi, segno)); } -static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, +static struct folio *get_next_sit_folio(struct f2fs_sb_info *sbi, unsigned int start) { struct sit_info *sit_i = SIT_I(sbi); - struct page *page; + struct folio *folio; pgoff_t src_off, dst_off; src_off = current_sit_addr(sbi, start); dst_off = next_sit_addr(sbi, src_off); - page = f2fs_grab_meta_page(sbi, dst_off); - seg_info_to_sit_page(sbi, page, start); + folio = f2fs_grab_meta_folio(sbi, dst_off); + seg_info_to_sit_folio(sbi, folio, start); - set_page_dirty(page); + folio_mark_dirty(folio); set_to_next_sit(sit_i, start); - return page; + return folio; } static struct sit_entry_set *grab_sit_entry_set(void) @@ -4265,7 +4719,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * #2, flush sit entries to sit page. */ list_for_each_entry_safe(ses, tmp, head, set_list) { - struct page *page = NULL; + struct folio *folio = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start_segno = ses->start_segno; unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, @@ -4279,8 +4733,8 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (to_journal) { down_write(&curseg->journal_rwsem); } else { - page = get_next_sit_page(sbi, start_segno); - raw_sit = page_address(page); + folio = get_next_sit_folio(sbi, start_segno); + raw_sit = folio_address(folio); } /* flush dirty sit entries in region of current sit set */ @@ -4318,6 +4772,12 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) &raw_sit->entries[sit_offset]); } + /* update ckpt_valid_block */ + if (__is_large_section(sbi)) { + set_ckpt_valid_blocks(sbi, segno); + sanity_check_valid_blocks(sbi, segno); + } + __clear_bit(segno, bitmap); sit_i->dirty_sentries--; ses->entry_cnt--; @@ -4326,7 +4786,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (to_journal) up_write(&curseg->journal_rwsem); else - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); f2fs_bug_on(sbi, ses->entry_cnt); release_sit_entry_set(ses); @@ -4443,7 +4903,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) #endif sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); - sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; + sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs); sit_i->written_valid_blocks = 0; sit_i->bitmap_size = sit_bitmap_size; sit_i->dirty_sentries = 0; @@ -4510,15 +4970,8 @@ static int build_curseg(struct f2fs_sb_info *sbi) sizeof(struct f2fs_journal), GFP_KERNEL); if (!array[i].journal) return -ENOMEM; - if (i < NR_PERSISTENT_LOG) - array[i].seg_type = CURSEG_HOT_DATA + i; - else if (i == CURSEG_COLD_DATA_PINNED) - array[i].seg_type = CURSEG_COLD_DATA; - else if (i == CURSEG_ALL_DATA_ATGC) - array[i].seg_type = CURSEG_COLD_DATA; - array[i].segno = NULL_SEGNO; - array[i].next_blkoff = 0; - array[i].inited = false; + array[i].seg_type = log_type_to_seg_type(i); + reset_curseg_fields(&array[i]); } return restore_curseg_summaries(sbi); } @@ -4545,15 +4998,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) for (; start < end && start < MAIN_SEGS(sbi); start++) { struct f2fs_sit_block *sit_blk; - struct page *page; + struct folio *folio; se = &sit_i->sentries[start]; - page = get_current_sit_page(sbi, start); - if (IS_ERR(page)) - return PTR_ERR(page); - sit_blk = (struct f2fs_sit_block *)page_address(page); + folio = get_current_sit_folio(sbi, start); + if (IS_ERR(folio)) + return PTR_ERR(folio); + sit_blk = folio_address(folio); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); err = check_block_count(sbi, start, &sit); if (err) @@ -4570,21 +5023,20 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; - if (f2fs_block_unit_discard(sbi)) { - /* build discard map only one time */ - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, + if (!f2fs_block_unit_discard(sbi)) + goto init_discard_map_done; + + /* build discard map only one time */ + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, + goto init_discard_map_done; + } + memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - + sbi->discard_blks += BLKS_PER_SEG(sbi) - se->valid_blocks; - } - } - +init_discard_map_done: if (__is_large_section(sbi)) get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; @@ -4647,6 +5099,16 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) } up_read(&curseg->journal_rwsem); + /* update ckpt_valid_block */ + if (__is_large_section(sbi)) { + unsigned int segno; + + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { + set_ckpt_valid_blocks(sbi, segno); + sanity_check_valid_blocks(sbi, segno); + } + } + if (err) return err; @@ -4724,13 +5186,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) return; mutex_lock(&dirty_i->seglist_lock); - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { valid_blocks = get_valid_blocks(sbi, segno, true); secno = GET_SEC_FROM_SEG(sbi, segno); if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi)) continue; - if (IS_CURSEC(sbi, secno)) + if (is_cursec(sbi, secno)) continue; set_bit(secno, dirty_i->dirty_secmap); } @@ -4823,7 +5285,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) if (curseg->alloc_type == SSR) continue; - for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { + for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) { if (!f2fs_test_bit(blkofs, se->cur_valid_map)) continue; out: @@ -4839,100 +5301,82 @@ out: } #ifdef CONFIG_BLK_DEV_ZONED - static int check_zone_write_pointer(struct f2fs_sb_info *sbi, struct f2fs_dev_info *fdev, struct blk_zone *zone) { - unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno; - block_t zone_block, wp_block, last_valid_block; - int i, s, b, ret; - struct seg_entry *se; + unsigned int zone_segno; + block_t zone_block, valid_block_cnt; + unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; + int ret; + unsigned int nofs_flags; if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; - wp_block = fdev->start_blk + (zone->wp >> sbi->log_sectors_per_block); - wp_segno = GET_SEGNO(sbi, wp_block); - wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); - zone_block = fdev->start_blk + (zone->start >> - sbi->log_sectors_per_block); + zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block); zone_segno = GET_SEGNO(sbi, zone_block); - zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno); - - if (zone_segno >= MAIN_SEGS(sbi)) - return 0; /* * Skip check of zones cursegs point to, since * fix_curseg_write_pointer() checks them. */ - for (i = 0; i < NO_CHECK_TYPE; i++) - if (zone_secno == GET_SEC_FROM_SEG(sbi, - CURSEG_I(sbi, i)->segno)) - return 0; + if (zone_segno >= MAIN_SEGS(sbi)) + return 0; /* - * Get last valid block of the zone. + * Get # of valid block of the zone. */ - last_valid_block = zone_block - 1; - for (s = sbi->segs_per_sec - 1; s >= 0; s--) { - segno = zone_segno + s; - se = get_seg_entry(sbi, segno); - for (b = sbi->blocks_per_seg - 1; b >= 0; b--) - if (f2fs_test_bit(b, se->cur_valid_map)) { - last_valid_block = START_BLOCK(sbi, segno) + b; - break; - } - if (last_valid_block >= zone_block) - break; + valid_block_cnt = get_valid_blocks(sbi, zone_segno, true); + if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { + f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]", + zone_segno, valid_block_cnt, + blk_zone_cond_str(zone->cond)); + return 0; } - /* - * The write pointer matches with the valid blocks or - * already points to the end of the zone. - */ - if ((last_valid_block + 1 == wp_block) || - (zone->wp == zone->start + zone->len)) + if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) || + (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL)) return 0; - if (last_valid_block + 1 == zone_block) { - /* - * If there is no valid block in the zone and if write pointer - * is not at zone start, reset the write pointer. - */ - f2fs_notice(sbi, - "Zone without valid block has non-zero write " - "pointer. Reset the write pointer: wp[0x%x,0x%x]", - wp_segno, wp_blkoff); + if (!valid_block_cnt) { + f2fs_notice(sbi, "Zone without valid block has non-zero write " + "pointer. Reset the write pointer: cond[%s]", + blk_zone_cond_str(zone->cond)); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, - zone->len >> sbi->log_sectors_per_block); + zone->len >> log_sectors_per_block); if (ret) f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", fdev->path, ret); - return ret; } /* - * If there are valid blocks and the write pointer doesn't - * match with them, we need to report the inconsistency and - * fill the zone till the end to close the zone. This inconsistency - * does not cause write error because the zone will not be selected - * for write operation until it get discarded. + * If there are valid blocks and the write pointer doesn't match + * with them, we need to report the inconsistency and fill + * the zone till the end to close the zone. This inconsistency + * does not cause write error because the zone will not be + * selected for write operation until it get discarded. */ - f2fs_notice(sbi, "Valid blocks are not aligned with write pointer: " - "valid block[0x%x,0x%x] wp[0x%x,0x%x]", - GET_SEGNO(sbi, last_valid_block), - GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), - wp_segno, wp_blkoff); - - ret = blkdev_issue_zeroout(fdev->bdev, zone->wp, - zone->len - (zone->wp - zone->start), - GFP_NOFS, 0); - if (ret) - f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)", - fdev->path, ret); + f2fs_notice(sbi, "Valid blocks are not aligned with write " + "pointer: valid block[0x%x,0x%x] cond[%s]", + zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond)); + + nofs_flags = memalloc_nofs_save(); + ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH, + zone->start, zone->len); + memalloc_nofs_restore(nofs_flags); + if (ret == -EOPNOTSUPP) { + ret = blkdev_issue_zeroout(fdev->bdev, zone->wp, + zone->len - (zone->wp - zone->start), + GFP_NOFS, 0); + if (ret) + f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)", + fdev->path, ret); + } else if (ret) { + f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)", + fdev->path, ret); + } return ret; } @@ -4960,13 +5404,14 @@ static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, return 0; } -static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) +static int do_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) { struct curseg_info *cs = CURSEG_I(sbi, type); struct f2fs_dev_info *zbd; struct blk_zone zone; unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off; block_t cs_zone_block, wp_block; + unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; sector_t zone_sector; int err; @@ -4978,8 +5423,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) return 0; /* report zone for the sector the curseg points to */ - zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << - sbi->log_sectors_per_block; + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) + << log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { @@ -4991,23 +5436,36 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; - wp_block = zbd->start_blk + (zone.wp >> sbi->log_sectors_per_block); - wp_segno = GET_SEGNO(sbi, wp_block); - wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); - wp_sector_off = zone.wp & GENMASK(sbi->log_sectors_per_block - 1, 0); - - if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && - wp_sector_off == 0) - return 0; + /* + * When safely unmounted in the previous mount, we could use current + * segments. Otherwise, allocate new sections. + */ + if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block); + wp_segno = GET_SEGNO(sbi, wp_block); + wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); + wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0); + + if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && + wp_sector_off == 0) + return 0; - f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: " - "curseg[0x%x,0x%x] wp[0x%x,0x%x]", - type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff); + f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: " + "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno, + cs->next_blkoff, wp_segno, wp_blkoff); + } - f2fs_notice(sbi, "Assign new section to curseg[%d]: " - "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); + /* Allocate a new section if it's not new. */ + if (cs->next_blkoff || + cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) { + unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff; - f2fs_allocate_new_section(sbi, type, true); + f2fs_allocate_new_section(sbi, type, true); + f2fs_notice(sbi, "Assign new section to curseg[%d]: " + "[0x%x,0x%x] -> [0x%x,0x%x]", + type, old_segno, old_blkoff, + cs->segno, cs->next_blkoff); + } /* check consistency of the zone curseg pointed to */ if (check_zone_write_pointer(sbi, zbd, &zone)) @@ -5021,8 +5479,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) if (!zbd) return 0; - zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << - sbi->log_sectors_per_block; + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) + << log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { @@ -5040,7 +5498,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) "Reset the zone: curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block, - zone.len >> sbi->log_sectors_per_block); + zone.len >> log_sectors_per_block); if (err) { f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", zbd->path, err); @@ -5051,12 +5509,12 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) return 0; } -int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) +static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi) { int i, ret; for (i = 0; i < NR_PERSISTENT_LOG; i++) { - ret = fix_curseg_write_pointer(sbi, i); + ret = do_fix_curseg_write_pointer(sbi, i); if (ret) return ret; } @@ -5079,7 +5537,7 @@ static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, return check_zone_write_pointer(args->sbi, args->fdev, zone); } -int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) +static int check_write_pointer(struct f2fs_sb_info *sbi) { int i, ret; struct check_zone_write_pointer_args args; @@ -5099,6 +5557,21 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) return 0; } +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) +{ + int ret; + + if (!f2fs_sb_has_blkzoned(sbi) || f2fs_readonly(sbi->sb) || + f2fs_hw_is_readonly(sbi)) + return 0; + + f2fs_notice(sbi, "Checking entire write pointers"); + ret = fix_curseg_write_pointer(sbi); + if (!ret) + ret = check_write_pointer(sbi); + return ret; +} + /* * Return the number of usable blocks in a segment. The number of blocks * returned is always equal to the number of blocks in a segment for @@ -5114,7 +5587,7 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( unsigned int secno; if (!sbi->unusable_blocks_per_sec) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); secno = GET_SEC_FROM_SEG(sbi, segno); seg_start = START_BLOCK(sbi, segno); @@ -5129,18 +5602,13 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( */ if (seg_start >= sec_cap_blkaddr) return 0; - if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr) + if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr) return sec_cap_blkaddr - seg_start; - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); } #else -int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) -{ - return 0; -} - -int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) { return 0; } @@ -5158,16 +5626,50 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, if (f2fs_sb_has_blkzoned(sbi)) return f2fs_usable_zone_blks_in_seg(sbi, segno); - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); } -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno) +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi) { if (f2fs_sb_has_blkzoned(sbi)) return CAP_SEGS_PER_SEC(sbi); - return sbi->segs_per_sec; + return SEGS_PER_SEC(sbi); +} + +unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); + unsigned int secno = 0, start = 0; + unsigned int total_valid_blocks = 0; + unsigned long long mtime = 0; + unsigned int i = 0; + + secno = GET_SEC_FROM_SEG(sbi, segno); + start = GET_SEG_FROM_SEC(sbi, secno); + + if (!__is_large_section(sbi)) { + mtime = get_seg_entry(sbi, start + i)->mtime; + goto out; + } + + for (i = 0; i < usable_segs_per_sec; i++) { + /* for large section, only check the mtime of valid segments */ + struct seg_entry *se = get_seg_entry(sbi, start+i); + + mtime += se->mtime * se->valid_blocks; + total_valid_blocks += se->valid_blocks; + } + + if (total_valid_blocks == 0) + return INVALID_MTIME; + + mtime = div_u64(mtime, total_valid_blocks); +out: + if (unlikely(mtime == INVALID_MTIME)) + mtime -= 1; + return mtime; } /* @@ -5182,14 +5684,10 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) sit_i->min_mtime = ULLONG_MAX; - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { - unsigned int i; + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { unsigned long long mtime = 0; - for (i = 0; i < sbi->segs_per_sec; i++) - mtime += get_seg_entry(sbi, segno + i)->mtime; - - mtime = div_u64(mtime, sbi->segs_per_sec); + mtime = f2fs_get_section_mtime(sbi, segno); if (sit_i->min_mtime > mtime) sit_i->min_mtime = mtime; @@ -5228,7 +5726,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC); sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; - sm_info->min_seq_blocks = sbi->blocks_per_seg; + sm_info->min_seq_blocks = BLKS_PER_SEG(sbi); sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->min_ssr_sections = reserved_sections(sbi); @@ -5357,9 +5855,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; - kvfree(sit_i->sit_bitmap); + kfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS - kvfree(sit_i->sit_bitmap_mir); + kfree(sit_i->sit_bitmap_mir); kvfree(sit_i->invalid_segmap); #endif kfree(sit_i); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2ca8fb5d0dc4..07dcbcbeb7c6 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -18,6 +18,8 @@ #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ #define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */ +#define INVALID_MTIME ULLONG_MAX /* no valid blocks in a segment/section */ + /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno) #define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno) @@ -32,38 +34,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG); } -#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA) -#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA) -#define IS_COLD(t) ((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA) - -#define IS_CURSEG(sbi, seg) \ - (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \ - ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno)) - -#define IS_CURSEC(sbi, secno) \ - (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - (sbi)->segs_per_sec) || \ - ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - (sbi)->segs_per_sec) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - (sbi)->segs_per_sec) || \ - ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - (sbi)->segs_per_sec) || \ - ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - (sbi)->segs_per_sec) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - (sbi)->segs_per_sec) || \ - ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \ - (sbi)->segs_per_sec) || \ - ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \ - (sbi)->segs_per_sec)) - #define MAIN_BLKADDR(sbi) \ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) @@ -77,47 +47,55 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define TOTAL_SEGS(sbi) \ (SM_I(sbi) ? SM_I(sbi)->segment_count : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) -#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) +#define TOTAL_BLKS(sbi) (SEGS_TO_BLKS(sbi, TOTAL_SEGS(sbi))) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) #define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \ (sbi)->log_blocks_per_seg)) #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ - (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg)) + (SEGS_TO_BLKS(sbi, GET_R2L_SEGNO(FREE_I(sbi), segno)))) #define NEXT_FREE_BLKADDR(sbi, curseg) \ (START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff) #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg) + (BLKS_TO_SEGS(sbi, GET_SEGOFF_FROM_SEG0(sbi, blk_addr))) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (BLKS_PER_SEG(sbi) - 1)) #define GET_SEGNO(sbi, blk_addr) \ ((!__is_valid_data_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) -#define BLKS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) +#ifdef CONFIG_BLK_DEV_ZONED #define CAP_BLKS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \ - (sbi)->unusable_blocks_per_sec) + (BLKS_PER_SEC(sbi) - (sbi)->unusable_blocks_per_sec) #define CAP_SEGS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\ - (sbi)->log_blocks_per_seg)) + (SEGS_PER_SEC(sbi) - \ + BLKS_TO_SEGS(sbi, (sbi)->unusable_blocks_per_sec)) +#else +#define CAP_BLKS_PER_SEC(sbi) BLKS_PER_SEC(sbi) +#define CAP_SEGS_PER_SEC(sbi) SEGS_PER_SEC(sbi) +#endif +#define GET_START_SEG_FROM_SEC(sbi, segno) \ + (rounddown(segno, SEGS_PER_SEC(sbi))) #define GET_SEC_FROM_SEG(sbi, segno) \ - (((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec) + (((segno) == -1) ? -1 : (segno) / SEGS_PER_SEC(sbi)) #define GET_SEG_FROM_SEC(sbi, secno) \ - ((secno) * (sbi)->segs_per_sec) + ((secno) * SEGS_PER_SEC(sbi)) #define GET_ZONE_FROM_SEC(sbi, secno) \ - (((secno) == -1) ? -1: (secno) / (sbi)->secs_per_zone) + (((secno) == -1) ? -1 : (secno) / (sbi)->secs_per_zone) #define GET_ZONE_FROM_SEG(sbi, segno) \ GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno)) -#define GET_SUM_BLOCK(sbi, segno) \ - ((sbi)->sm_info->ssa_blkaddr + (segno)) +#define SUMS_PER_BLOCK (F2FS_BLKSIZE / F2FS_SUM_BLKSIZE) +#define GET_SUM_BLOCK(sbi, segno) \ + (SM_I(sbi)->ssa_blkaddr + (segno / SUMS_PER_BLOCK)) +#define GET_SUM_BLKOFF(segno) (segno % SUMS_PER_BLOCK) +#define SUM_BLK_PAGE_ADDR(folio, segno) \ + (folio_address(folio) + GET_SUM_BLKOFF(segno) * F2FS_SUM_BLKSIZE) #define GET_SUM_TYPE(footer) ((footer)->entry_type) #define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type)) @@ -139,16 +117,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) /* - * indicate a block allocation direction: RIGHT and LEFT. - * RIGHT means allocating new sections towards the end of volume. - * LEFT means the opposite direction. - */ -enum { - ALLOC_RIGHT = 0, - ALLOC_LEFT -}; - -/* * In the victim_sel_policy->alloc_mode, there are three block allocation modes. * LFS writes data sequentially with cleaning operations. * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. @@ -201,6 +169,7 @@ struct victim_sel_policy { unsigned int min_segno; /* segment # having min. cost */ unsigned long long age; /* mtime of GCed section*/ unsigned long long age_threshold;/* age threshold */ + bool one_time_gc; /* one time GC */ }; struct seg_entry { @@ -223,6 +192,7 @@ struct seg_entry { struct sec_entry { unsigned int valid_blocks; /* # of valid blocks in a section */ + unsigned int ckpt_valid_blocks; /* # of valid blocks last cp in a section */ }; #define MAX_SKIP_GC_COUNT 16 @@ -329,6 +299,28 @@ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); } +static inline bool is_curseg(struct f2fs_sb_info *sbi, unsigned int segno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (segno == CURSEG_I(sbi, i)->segno) + return true; + } + return false; +} + +static inline bool is_cursec(struct f2fs_sb_info *sbi, unsigned int secno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (secno == GET_SEC_FROM_SEG(sbi, CURSEG_I(sbi, i)->segno)) + return true; + } + return false; +} + static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, unsigned int segno) { @@ -359,21 +351,57 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno, bool use_section) { - if (use_section && __is_large_section(sbi)) { - unsigned int start_segno = START_SEGNO(segno); - unsigned int blocks = 0; - int i; + if (use_section && __is_large_section(sbi)) + return get_sec_entry(sbi, segno)->ckpt_valid_blocks; + else + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; +} - for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) { - struct seg_entry *se = get_seg_entry(sbi, start_segno); +static inline void set_ckpt_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); + unsigned int blocks = 0; + int i; - blocks += se->ckpt_valid_blocks; - } - return blocks; + for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) { + struct seg_entry *se = get_seg_entry(sbi, start_segno); + + blocks += se->ckpt_valid_blocks; } - return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + get_sec_entry(sbi, segno)->ckpt_valid_blocks = blocks; } +#ifdef CONFIG_F2FS_CHECK_FS +static inline void sanity_check_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); + unsigned int blocks = 0; + int i; + + for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) { + struct seg_entry *se = get_seg_entry(sbi, start_segno); + + blocks += se->ckpt_valid_blocks; + } + + if (blocks != get_sec_entry(sbi, segno)->ckpt_valid_blocks) { + f2fs_err(sbi, + "Inconsistent ckpt valid blocks: " + "seg entry(%d) vs sec entry(%d) at secno %d", + blocks, get_sec_entry(sbi, segno)->ckpt_valid_blocks, secno); + f2fs_bug_on(sbi, 1); + } +} +#else +static inline void sanity_check_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ +} +#endif static inline void seg_info_from_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { @@ -398,8 +426,8 @@ static inline void __seg_info_to_raw_sit(struct seg_entry *se, rs->mtime = cpu_to_le64(se->mtime); } -static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi, - struct page *page, unsigned int start) +static inline void seg_info_to_sit_folio(struct f2fs_sb_info *sbi, + struct folio *folio, unsigned int start) { struct f2fs_sit_block *raw_sit; struct seg_entry *se; @@ -408,7 +436,7 @@ static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi, (unsigned long)MAIN_SEGS(sbi)); int i; - raw_sit = (struct f2fs_sit_block *)page_address(page); + raw_sit = folio_address(folio); memset(raw_sit, 0, PAGE_SIZE); for (i = 0; i < end - start; i++) { rs = &raw_sit->entries[i]; @@ -442,15 +470,14 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); free_i->free_segments++; next = find_next_bit(free_i->free_segmap, - start_segno + sbi->segs_per_sec, start_segno); - if (next >= start_segno + usable_segs) { + start_segno + SEGS_PER_SEC(sbi), start_segno); + if (next >= start_segno + f2fs_usable_segs_in_sec(sbi)) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } @@ -476,22 +503,36 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); + bool ret; spin_lock(&free_i->segmap_lock); - if (test_and_clear_bit(segno, free_i->free_segmap)) { - free_i->free_segments++; - - if (!inmem && IS_CURSEC(sbi, secno)) - goto skip_free; - next = find_next_bit(free_i->free_segmap, - start_segno + sbi->segs_per_sec, start_segno); - if (next >= start_segno + usable_segs) { - if (test_and_clear_bit(secno, free_i->free_secmap)) - free_i->free_sections++; - } - } -skip_free: + ret = test_and_clear_bit(segno, free_i->free_segmap); + if (!ret) + goto unlock_out; + + free_i->free_segments++; + + if (!inmem && is_cursec(sbi, secno)) + goto unlock_out; + + /* check large section */ + next = find_next_bit(free_i->free_segmap, + start_segno + SEGS_PER_SEC(sbi), start_segno); + if (next < start_segno + f2fs_usable_segs_in_sec(sbi)) + goto unlock_out; + + ret = test_and_clear_bit(secno, free_i->free_secmap); + if (!ret) + goto unlock_out; + + free_i->free_sections++; + + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[BG_GC]) == secno) + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[FG_GC]) == secno) + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + +unlock_out: spin_unlock(&free_i->segmap_lock); } @@ -535,8 +576,7 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi) static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi) { - return SM_I(sbi)->reserved_segments + - SM_I(sbi)->additional_reserved_segments; + return SM_I(sbi)->reserved_segments; } static inline unsigned int free_sections(struct f2fs_sb_info *sbi) @@ -569,35 +609,56 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi)); } -static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, - unsigned int node_blocks, unsigned int dent_blocks) +static inline unsigned int get_left_section_blocks(struct f2fs_sb_info *sbi, + enum log_type type, unsigned int segno) { + if (f2fs_lfs_mode(sbi)) { + unsigned int used_blocks = __is_large_section(sbi) ? SEGS_TO_BLKS(sbi, + (segno - GET_START_SEG_FROM_SEC(sbi, segno))) : 0; + return CAP_BLKS_PER_SEC(sbi) - used_blocks - + CURSEG_I(sbi, type)->next_blkoff; + } + return CAP_BLKS_PER_SEC(sbi) - get_ckpt_valid_blocks(sbi, segno, true); +} - unsigned int segno, left_blocks; +static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, + unsigned int node_blocks, unsigned int data_blocks, + unsigned int dent_blocks) +{ + unsigned int segno, left_blocks, blocks; int i; - /* check current node segment */ - for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { + /* check current data/node sections in the worst case. */ + for (i = CURSEG_HOT_DATA; i < NR_PERSISTENT_LOG; i++) { segno = CURSEG_I(sbi, i)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; - if (node_blocks > left_blocks) + if (unlikely(segno == NULL_SEGNO)) + return false; + + left_blocks = get_left_section_blocks(sbi, i, segno); + + blocks = i <= CURSEG_COLD_DATA ? data_blocks : node_blocks; + if (blocks > left_blocks) return false; } - /* check current data segment */ + /* check current data section for dentry blocks. */ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; + + if (unlikely(segno == NULL_SEGNO)) + return false; + + left_blocks = get_left_section_blocks(sbi, CURSEG_HOT_DATA, segno); + if (dent_blocks > left_blocks) return false; return true; } /* - * calculate needed sections for dirty node/dentry - * and call has_curseg_enough_space + * calculate needed sections for dirty node/dentry and call + * has_curseg_enough_space, please note that, it needs to account + * dirty data as well in lfs mode when checkpoint is disabled. */ static inline void __get_secs_required(struct f2fs_sb_info *sbi, unsigned int *lower_p, unsigned int *upper_p, bool *curseg_p) @@ -606,19 +667,29 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi, get_pages(sbi, F2FS_DIRTY_DENTS) + get_pages(sbi, F2FS_DIRTY_IMETA); unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int total_data_blocks = 0; unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi); unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi); + unsigned int data_secs = 0; unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); + unsigned int data_blocks = 0; + + if (f2fs_lfs_mode(sbi)) { + total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA); + data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi); + data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi); + } if (lower_p) - *lower_p = node_secs + dent_secs; + *lower_p = node_secs + dent_secs + data_secs; if (upper_p) - *upper_p = node_secs + dent_secs + - (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0); + *upper_p = node_secs + dent_secs + data_secs + + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) + + (data_blocks ? 1 : 0); if (curseg_p) *curseg_p = has_curseg_enough_space(sbi, - node_blocks, dent_blocks); + node_blocks, data_blocks, dent_blocks); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, @@ -638,7 +709,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, if (free_secs > upper_secs) return false; - else if (free_secs <= lower_secs) + if (free_secs <= lower_secs) return true; return !curseg_space; } @@ -649,12 +720,30 @@ static inline bool has_enough_free_secs(struct f2fs_sb_info *sbi, return !has_not_enough_free_secs(sbi, freed, needed); } +static inline bool has_enough_free_blks(struct f2fs_sb_info *sbi) +{ + unsigned int total_free_blocks = 0; + unsigned int avail_user_block_count; + + spin_lock(&sbi->stat_lock); + + avail_user_block_count = get_available_block_count(sbi, NULL, true); + total_free_blocks = avail_user_block_count - (unsigned int)valid_user_blocks(sbi); + + spin_unlock(&sbi->stat_lock); + + return total_free_blocks > 0; +} + static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) { if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; if (likely(has_enough_free_secs(sbi, 0, 0))) return true; + if (!f2fs_lfs_mode(sbi) && + likely(has_enough_free_blks(sbi))) + return true; return false; } @@ -793,10 +882,10 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } - if (usable_blks_per_seg < sbi->blocks_per_seg) + if (usable_blks_per_seg < BLKS_PER_SEG(sbi)) f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map, - sbi->blocks_per_seg, - usable_blks_per_seg) != sbi->blocks_per_seg); + BLKS_PER_SEG(sbi), + usable_blks_per_seg) != BLKS_PER_SEG(sbi)); /* check segment usage, and check boundary of a given segment number */ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg @@ -897,7 +986,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) { - if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) + if (is_cursec(sbi, secno) || (sbi->cur_victim_sec == secno)) return true; return false; } @@ -915,9 +1004,9 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) return 0; if (type == DATA) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); else if (type == NODE) - return 8 * sbi->blocks_per_seg; + return SEGS_TO_BLKS(sbi, 8); else if (type == META) return 8 * BIO_MAX_VECS; else diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 83d6fb97dcae..b88babcf6ab4 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -73,7 +73,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, mutex_unlock(&sbi->umount_mutex); } spin_unlock(&f2fs_list_lock); - return count; + return count ?: SHRINK_EMPTY; } unsigned long f2fs_shrink_scan(struct shrinker *shrink, @@ -130,6 +130,103 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, return freed; } +unsigned int f2fs_donate_files(void) +{ + struct f2fs_sb_info *sbi; + struct list_head *p; + unsigned int donate_files = 0; + + spin_lock(&f2fs_list_lock); + p = f2fs_list.next; + while (p != &f2fs_list) { + sbi = list_entry(p, struct f2fs_sb_info, s_list); + + /* stop f2fs_put_super */ + if (!mutex_trylock(&sbi->umount_mutex)) { + p = p->next; + continue; + } + spin_unlock(&f2fs_list_lock); + + donate_files += sbi->donate_files; + + spin_lock(&f2fs_list_lock); + p = p->next; + mutex_unlock(&sbi->umount_mutex); + } + spin_unlock(&f2fs_list_lock); + + return donate_files; +} + +static unsigned int do_reclaim_caches(struct f2fs_sb_info *sbi, + unsigned int reclaim_caches_kb) +{ + struct inode *inode; + struct f2fs_inode_info *fi; + unsigned int nfiles = sbi->donate_files; + pgoff_t npages = reclaim_caches_kb >> (PAGE_SHIFT - 10); + + while (npages && nfiles--) { + pgoff_t len; + + spin_lock(&sbi->inode_lock[DONATE_INODE]); + if (list_empty(&sbi->inode_list[DONATE_INODE])) { + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + break; + } + fi = list_first_entry(&sbi->inode_list[DONATE_INODE], + struct f2fs_inode_info, gdonate_list); + list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + + if (!inode) + continue; + + inode_lock(inode); + if (!is_inode_flag_set(inode, FI_DONATE_FINISHED)) { + len = fi->donate_end - fi->donate_start + 1; + npages = npages < len ? 0 : npages - len; + + invalidate_inode_pages2_range(inode->i_mapping, + fi->donate_start, fi->donate_end); + set_inode_flag(inode, FI_DONATE_FINISHED); + } + inode_unlock(inode); + + iput(inode); + cond_resched(); + } + return npages << (PAGE_SHIFT - 10); +} + +void f2fs_reclaim_caches(unsigned int reclaim_caches_kb) +{ + struct f2fs_sb_info *sbi; + struct list_head *p; + + spin_lock(&f2fs_list_lock); + p = f2fs_list.next; + while (p != &f2fs_list && reclaim_caches_kb) { + sbi = list_entry(p, struct f2fs_sb_info, s_list); + + /* stop f2fs_put_super */ + if (!mutex_trylock(&sbi->umount_mutex)) { + p = p->next; + continue; + } + spin_unlock(&f2fs_list_lock); + + reclaim_caches_kb = do_reclaim_caches(sbi, reclaim_caches_kb); + + spin_lock(&f2fs_list_lock); + p = p->next; + mutex_unlock(&sbi->umount_mutex); + } + spin_unlock(&f2fs_list_lock); +} + void f2fs_join_shrinker(struct f2fs_sb_info *sbi) { spin_lock(&f2fs_list_lock); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index aa1f9a3a8037..c4c225e09dc4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -11,7 +11,6 @@ #include <linux/fs_context.h> #include <linux/sched/mm.h> #include <linux/statfs.h> -#include <linux/buffer_head.h> #include <linux/kthread.h> #include <linux/parser.h> #include <linux/mount.h> @@ -28,6 +27,8 @@ #include <linux/part_stat.h> #include <linux/zstd.h> #include <linux/lz4.h> +#include <linux/ctype.h> +#include <linux/fs_parser.h> #include "f2fs.h" #include "node.h" @@ -44,103 +45,118 @@ static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION const char *f2fs_fault_name[FAULT_MAX] = { - [FAULT_KMALLOC] = "kmalloc", - [FAULT_KVMALLOC] = "kvmalloc", - [FAULT_PAGE_ALLOC] = "page alloc", - [FAULT_PAGE_GET] = "page get", - [FAULT_ALLOC_NID] = "alloc nid", - [FAULT_ORPHAN] = "orphan", - [FAULT_BLOCK] = "no more block", - [FAULT_DIR_DEPTH] = "too big dir depth", - [FAULT_EVICT_INODE] = "evict_inode fail", - [FAULT_TRUNCATE] = "truncate fail", - [FAULT_READ_IO] = "read IO error", - [FAULT_CHECKPOINT] = "checkpoint error", - [FAULT_DISCARD] = "discard error", - [FAULT_WRITE_IO] = "write IO error", - [FAULT_SLAB_ALLOC] = "slab alloc", - [FAULT_DQUOT_INIT] = "dquot initialize", - [FAULT_LOCK_OP] = "lock_op", - [FAULT_BLKADDR] = "invalid blkaddr", + [FAULT_KMALLOC] = "kmalloc", + [FAULT_KVMALLOC] = "kvmalloc", + [FAULT_PAGE_ALLOC] = "page alloc", + [FAULT_PAGE_GET] = "page get", + [FAULT_ALLOC_BIO] = "alloc bio(obsolete)", + [FAULT_ALLOC_NID] = "alloc nid", + [FAULT_ORPHAN] = "orphan", + [FAULT_BLOCK] = "no more block", + [FAULT_DIR_DEPTH] = "too big dir depth", + [FAULT_EVICT_INODE] = "evict_inode fail", + [FAULT_TRUNCATE] = "truncate fail", + [FAULT_READ_IO] = "read IO error", + [FAULT_CHECKPOINT] = "checkpoint error", + [FAULT_DISCARD] = "discard error", + [FAULT_WRITE_IO] = "write IO error", + [FAULT_SLAB_ALLOC] = "slab alloc", + [FAULT_DQUOT_INIT] = "dquot initialize", + [FAULT_LOCK_OP] = "lock_op", + [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr", + [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr", + [FAULT_NO_SEGMENT] = "no free segment", + [FAULT_INCONSISTENT_FOOTER] = "inconsistent footer", + [FAULT_TIMEOUT] = "timeout", + [FAULT_VMALLOC] = "vmalloc", }; -void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, - unsigned int type) +int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, + unsigned long type, enum fault_option fo) { struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; - if (rate) { + if (fo & FAULT_ALL) { + memset(ffi, 0, sizeof(struct f2fs_fault_info)); + return 0; + } + + if (fo & FAULT_RATE) { + if (rate > INT_MAX) + return -EINVAL; atomic_set(&ffi->inject_ops, 0); - ffi->inject_rate = rate; + ffi->inject_rate = (int)rate; + f2fs_info(sbi, "build fault injection rate: %lu", rate); } - if (type) - ffi->inject_type = type; + if (fo & FAULT_TYPE) { + if (type >= BIT(FAULT_MAX)) + return -EINVAL; + ffi->inject_type = (unsigned int)type; + f2fs_info(sbi, "build fault injection type: 0x%lx", type); + } - if (!rate && !type) - memset(ffi, 0, sizeof(struct f2fs_fault_info)); + return 0; } #endif /* f2fs-wide shrinker description */ -static struct shrinker f2fs_shrinker_info = { - .scan_objects = f2fs_shrink_scan, - .count_objects = f2fs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *f2fs_shrinker_info; + +static int __init f2fs_init_shrinker(void) +{ + f2fs_shrinker_info = shrinker_alloc(0, "f2fs-shrinker"); + if (!f2fs_shrinker_info) + return -ENOMEM; + + f2fs_shrinker_info->count_objects = f2fs_shrink_count; + f2fs_shrinker_info->scan_objects = f2fs_shrink_scan; + + shrinker_register(f2fs_shrinker_info); + + return 0; +} + +static void f2fs_exit_shrinker(void) +{ + shrinker_free(f2fs_shrinker_info); +} enum { Opt_gc_background, Opt_disable_roll_forward, Opt_norecovery, Opt_discard, - Opt_nodiscard, Opt_noheap, Opt_heap, Opt_user_xattr, - Opt_nouser_xattr, Opt_acl, - Opt_noacl, Opt_active_logs, Opt_disable_ext_identify, Opt_inline_xattr, - Opt_noinline_xattr, Opt_inline_xattr_size, Opt_inline_data, Opt_inline_dentry, - Opt_noinline_dentry, Opt_flush_merge, - Opt_noflush_merge, Opt_barrier, - Opt_nobarrier, Opt_fastboot, Opt_extent_cache, - Opt_noextent_cache, - Opt_noinline_data, Opt_data_flush, Opt_reserve_root, + Opt_reserve_node, Opt_resgid, Opt_resuid, Opt_mode, - Opt_io_size_bits, Opt_fault_injection, Opt_fault_type, Opt_lazytime, - Opt_nolazytime, Opt_quota, - Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_usrjquota, Opt_grpjquota, Opt_prjjquota, - Opt_offusrjquota, - Opt_offgrpjquota, - Opt_offprjjquota, - Opt_jqfmt_vfsold, - Opt_jqfmt_vfsv0, - Opt_jqfmt_vfsv1, Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, @@ -150,105 +166,221 @@ enum { Opt_checkpoint_disable_cap_perc, Opt_checkpoint_enable, Opt_checkpoint_merge, - Opt_nocheckpoint_merge, Opt_compress_algorithm, Opt_compress_log_size, - Opt_compress_extension, Opt_nocompress_extension, + Opt_compress_extension, Opt_compress_chksum, Opt_compress_mode, Opt_compress_cache, Opt_atgc, Opt_gc_merge, - Opt_nogc_merge, Opt_discard_unit, Opt_memory_mode, Opt_age_extent_cache, Opt_errors, + Opt_nat_bits, + Opt_jqfmt, + Opt_checkpoint, + Opt_lookup_mode, Opt_err, }; -static match_table_t f2fs_tokens = { - {Opt_gc_background, "background_gc=%s"}, - {Opt_disable_roll_forward, "disable_roll_forward"}, - {Opt_norecovery, "norecovery"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_noheap, "no_heap"}, - {Opt_heap, "heap"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_active_logs, "active_logs=%u"}, - {Opt_disable_ext_identify, "disable_ext_identify"}, - {Opt_inline_xattr, "inline_xattr"}, - {Opt_noinline_xattr, "noinline_xattr"}, - {Opt_inline_xattr_size, "inline_xattr_size=%u"}, - {Opt_inline_data, "inline_data"}, - {Opt_inline_dentry, "inline_dentry"}, - {Opt_noinline_dentry, "noinline_dentry"}, - {Opt_flush_merge, "flush_merge"}, - {Opt_noflush_merge, "noflush_merge"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_fastboot, "fastboot"}, - {Opt_extent_cache, "extent_cache"}, - {Opt_noextent_cache, "noextent_cache"}, - {Opt_noinline_data, "noinline_data"}, - {Opt_data_flush, "data_flush"}, - {Opt_reserve_root, "reserve_root=%u"}, - {Opt_resgid, "resgid=%u"}, - {Opt_resuid, "resuid=%u"}, - {Opt_mode, "mode=%s"}, - {Opt_io_size_bits, "io_bits=%u"}, - {Opt_fault_injection, "fault_injection=%u"}, - {Opt_fault_type, "fault_type=%u"}, - {Opt_lazytime, "lazytime"}, - {Opt_nolazytime, "nolazytime"}, - {Opt_quota, "quota"}, - {Opt_noquota, "noquota"}, - {Opt_usrquota, "usrquota"}, - {Opt_grpquota, "grpquota"}, - {Opt_prjquota, "prjquota"}, - {Opt_usrjquota, "usrjquota=%s"}, - {Opt_grpjquota, "grpjquota=%s"}, - {Opt_prjjquota, "prjjquota=%s"}, - {Opt_offusrjquota, "usrjquota="}, - {Opt_offgrpjquota, "grpjquota="}, - {Opt_offprjjquota, "prjjquota="}, - {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, - {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, - {Opt_alloc, "alloc_mode=%s"}, - {Opt_fsync, "fsync_mode=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption"}, - {Opt_inlinecrypt, "inlinecrypt"}, - {Opt_checkpoint_disable, "checkpoint=disable"}, - {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, - {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, - {Opt_checkpoint_enable, "checkpoint=enable"}, - {Opt_checkpoint_merge, "checkpoint_merge"}, - {Opt_nocheckpoint_merge, "nocheckpoint_merge"}, - {Opt_compress_algorithm, "compress_algorithm=%s"}, - {Opt_compress_log_size, "compress_log_size=%u"}, - {Opt_compress_extension, "compress_extension=%s"}, - {Opt_nocompress_extension, "nocompress_extension=%s"}, - {Opt_compress_chksum, "compress_chksum"}, - {Opt_compress_mode, "compress_mode=%s"}, - {Opt_compress_cache, "compress_cache"}, - {Opt_atgc, "atgc"}, - {Opt_gc_merge, "gc_merge"}, - {Opt_nogc_merge, "nogc_merge"}, - {Opt_discard_unit, "discard_unit=%s"}, - {Opt_memory_mode, "memory=%s"}, - {Opt_age_extent_cache, "age_extent_cache"}, - {Opt_errors, "errors=%s"}, +static const struct constant_table f2fs_param_background_gc[] = { + {"on", BGGC_MODE_ON}, + {"off", BGGC_MODE_OFF}, + {"sync", BGGC_MODE_SYNC}, + {} +}; + +static const struct constant_table f2fs_param_mode[] = { + {"adaptive", FS_MODE_ADAPTIVE}, + {"lfs", FS_MODE_LFS}, + {"fragment:segment", FS_MODE_FRAGMENT_SEG}, + {"fragment:block", FS_MODE_FRAGMENT_BLK}, + {} +}; + +static const struct constant_table f2fs_param_jqfmt[] = { + {"vfsold", QFMT_VFS_OLD}, + {"vfsv0", QFMT_VFS_V0}, + {"vfsv1", QFMT_VFS_V1}, + {} +}; + +static const struct constant_table f2fs_param_alloc_mode[] = { + {"default", ALLOC_MODE_DEFAULT}, + {"reuse", ALLOC_MODE_REUSE}, + {} +}; +static const struct constant_table f2fs_param_fsync_mode[] = { + {"posix", FSYNC_MODE_POSIX}, + {"strict", FSYNC_MODE_STRICT}, + {"nobarrier", FSYNC_MODE_NOBARRIER}, + {} +}; + +static const struct constant_table f2fs_param_compress_mode[] = { + {"fs", COMPR_MODE_FS}, + {"user", COMPR_MODE_USER}, + {} +}; + +static const struct constant_table f2fs_param_discard_unit[] = { + {"block", DISCARD_UNIT_BLOCK}, + {"segment", DISCARD_UNIT_SEGMENT}, + {"section", DISCARD_UNIT_SECTION}, + {} +}; + +static const struct constant_table f2fs_param_memory_mode[] = { + {"normal", MEMORY_MODE_NORMAL}, + {"low", MEMORY_MODE_LOW}, + {} +}; + +static const struct constant_table f2fs_param_errors[] = { + {"remount-ro", MOUNT_ERRORS_READONLY}, + {"continue", MOUNT_ERRORS_CONTINUE}, + {"panic", MOUNT_ERRORS_PANIC}, + {} +}; + +static const struct constant_table f2fs_param_lookup_mode[] = { + {"perf", LOOKUP_PERF}, + {"compat", LOOKUP_COMPAT}, + {"auto", LOOKUP_AUTO}, + {} +}; + +static const struct fs_parameter_spec f2fs_param_specs[] = { + fsparam_enum("background_gc", Opt_gc_background, f2fs_param_background_gc), + fsparam_flag("disable_roll_forward", Opt_disable_roll_forward), + fsparam_flag("norecovery", Opt_norecovery), + fsparam_flag_no("discard", Opt_discard), + fsparam_flag("no_heap", Opt_noheap), + fsparam_flag("heap", Opt_heap), + fsparam_flag_no("user_xattr", Opt_user_xattr), + fsparam_flag_no("acl", Opt_acl), + fsparam_s32("active_logs", Opt_active_logs), + fsparam_flag("disable_ext_identify", Opt_disable_ext_identify), + fsparam_flag_no("inline_xattr", Opt_inline_xattr), + fsparam_s32("inline_xattr_size", Opt_inline_xattr_size), + fsparam_flag_no("inline_data", Opt_inline_data), + fsparam_flag_no("inline_dentry", Opt_inline_dentry), + fsparam_flag_no("flush_merge", Opt_flush_merge), + fsparam_flag_no("barrier", Opt_barrier), + fsparam_flag("fastboot", Opt_fastboot), + fsparam_flag_no("extent_cache", Opt_extent_cache), + fsparam_flag("data_flush", Opt_data_flush), + fsparam_u32("reserve_root", Opt_reserve_root), + fsparam_u32("reserve_node", Opt_reserve_node), + fsparam_gid("resgid", Opt_resgid), + fsparam_uid("resuid", Opt_resuid), + fsparam_enum("mode", Opt_mode, f2fs_param_mode), + fsparam_s32("fault_injection", Opt_fault_injection), + fsparam_u32("fault_type", Opt_fault_type), + fsparam_flag_no("lazytime", Opt_lazytime), + fsparam_flag_no("quota", Opt_quota), + fsparam_flag("usrquota", Opt_usrquota), + fsparam_flag("grpquota", Opt_grpquota), + fsparam_flag("prjquota", Opt_prjquota), + fsparam_string_empty("usrjquota", Opt_usrjquota), + fsparam_string_empty("grpjquota", Opt_grpjquota), + fsparam_string_empty("prjjquota", Opt_prjjquota), + fsparam_flag("nat_bits", Opt_nat_bits), + fsparam_enum("jqfmt", Opt_jqfmt, f2fs_param_jqfmt), + fsparam_enum("alloc_mode", Opt_alloc, f2fs_param_alloc_mode), + fsparam_enum("fsync_mode", Opt_fsync, f2fs_param_fsync_mode), + fsparam_string("test_dummy_encryption", Opt_test_dummy_encryption), + fsparam_flag("test_dummy_encryption", Opt_test_dummy_encryption), + fsparam_flag("inlinecrypt", Opt_inlinecrypt), + fsparam_string("checkpoint", Opt_checkpoint), + fsparam_flag_no("checkpoint_merge", Opt_checkpoint_merge), + fsparam_string("compress_algorithm", Opt_compress_algorithm), + fsparam_u32("compress_log_size", Opt_compress_log_size), + fsparam_string("compress_extension", Opt_compress_extension), + fsparam_string("nocompress_extension", Opt_nocompress_extension), + fsparam_flag("compress_chksum", Opt_compress_chksum), + fsparam_enum("compress_mode", Opt_compress_mode, f2fs_param_compress_mode), + fsparam_flag("compress_cache", Opt_compress_cache), + fsparam_flag("atgc", Opt_atgc), + fsparam_flag_no("gc_merge", Opt_gc_merge), + fsparam_enum("discard_unit", Opt_discard_unit, f2fs_param_discard_unit), + fsparam_enum("memory", Opt_memory_mode, f2fs_param_memory_mode), + fsparam_flag("age_extent_cache", Opt_age_extent_cache), + fsparam_enum("errors", Opt_errors, f2fs_param_errors), + fsparam_enum("lookup_mode", Opt_lookup_mode, f2fs_param_lookup_mode), + {} +}; + +/* Resort to a match_table for this interestingly formatted option */ +static match_table_t f2fs_checkpoint_tokens = { + {Opt_checkpoint_disable, "disable"}, + {Opt_checkpoint_disable_cap, "disable:%u"}, + {Opt_checkpoint_disable_cap_perc, "disable:%u%%"}, + {Opt_checkpoint_enable, "enable"}, {Opt_err, NULL}, }; -void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) +#define F2FS_SPEC_background_gc (1 << 0) +#define F2FS_SPEC_inline_xattr_size (1 << 1) +#define F2FS_SPEC_active_logs (1 << 2) +#define F2FS_SPEC_reserve_root (1 << 3) +#define F2FS_SPEC_resgid (1 << 4) +#define F2FS_SPEC_resuid (1 << 5) +#define F2FS_SPEC_mode (1 << 6) +#define F2FS_SPEC_fault_injection (1 << 7) +#define F2FS_SPEC_fault_type (1 << 8) +#define F2FS_SPEC_jqfmt (1 << 9) +#define F2FS_SPEC_alloc_mode (1 << 10) +#define F2FS_SPEC_fsync_mode (1 << 11) +#define F2FS_SPEC_checkpoint_disable_cap (1 << 12) +#define F2FS_SPEC_checkpoint_disable_cap_perc (1 << 13) +#define F2FS_SPEC_compress_level (1 << 14) +#define F2FS_SPEC_compress_algorithm (1 << 15) +#define F2FS_SPEC_compress_log_size (1 << 16) +#define F2FS_SPEC_compress_extension (1 << 17) +#define F2FS_SPEC_nocompress_extension (1 << 18) +#define F2FS_SPEC_compress_chksum (1 << 19) +#define F2FS_SPEC_compress_mode (1 << 20) +#define F2FS_SPEC_discard_unit (1 << 21) +#define F2FS_SPEC_memory_mode (1 << 22) +#define F2FS_SPEC_errors (1 << 23) +#define F2FS_SPEC_lookup_mode (1 << 24) +#define F2FS_SPEC_reserve_node (1 << 25) + +struct f2fs_fs_context { + struct f2fs_mount_info info; + unsigned long long opt_mask; /* Bits changed */ + unsigned int spec_mask; + unsigned short qname_mask; +}; + +#define F2FS_CTX_INFO(ctx) ((ctx)->info) + +static inline void ctx_set_opt(struct f2fs_fs_context *ctx, + enum f2fs_mount_opt flag) +{ + ctx->info.opt |= BIT(flag); + ctx->opt_mask |= BIT(flag); +} + +static inline void ctx_clear_opt(struct f2fs_fs_context *ctx, + enum f2fs_mount_opt flag) +{ + ctx->info.opt &= ~BIT(flag); + ctx->opt_mask |= BIT(flag); +} + +static inline bool ctx_test_opt(struct f2fs_fs_context *ctx, + enum f2fs_mount_opt flag) +{ + return ctx->info.opt & BIT(flag); +} + +void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, + const char *fmt, ...) { struct va_format vaf; va_list args; @@ -259,8 +391,20 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) level = printk_get_level(fmt); vaf.fmt = printk_skip_level(fmt); vaf.va = &args; - printk("%c%cF2FS-fs (%s): %pV\n", - KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + if (limit_rate) + if (sbi) + printk_ratelimited("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk_ratelimited("%c%cF2FS-fs: %pV\n", + KERN_SOH_ASCII, level, &vaf); + else + if (sbi) + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk("%c%cF2FS-fs: %pV\n", + KERN_SOH_ASCII, level, &vaf); va_end(args); } @@ -291,7 +435,7 @@ struct kmem_cache *f2fs_cf_name_slab; static int __init f2fs_create_casefold_cache(void) { f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name", - F2FS_NAME_LEN); + F2FS_NAME_LEN); return f2fs_cf_name_slab ? 0 : -ENOMEM; } @@ -306,68 +450,36 @@ static void f2fs_destroy_casefold_cache(void) { } static inline void limit_reserve_root(struct f2fs_sb_info *sbi) { - block_t limit = min((sbi->user_block_count >> 3), + block_t block_limit = min((sbi->user_block_count >> 3), sbi->user_block_count - sbi->reserved_blocks); + block_t node_limit = sbi->total_node_count >> 3; /* limit is 12.5% */ if (test_opt(sbi, RESERVE_ROOT) && - F2FS_OPTION(sbi).root_reserved_blocks > limit) { - F2FS_OPTION(sbi).root_reserved_blocks = limit; + F2FS_OPTION(sbi).root_reserved_blocks > block_limit) { + F2FS_OPTION(sbi).root_reserved_blocks = block_limit; f2fs_info(sbi, "Reduce reserved blocks for root = %u", F2FS_OPTION(sbi).root_reserved_blocks); } - if (!test_opt(sbi, RESERVE_ROOT) && + if (test_opt(sbi, RESERVE_NODE) && + F2FS_OPTION(sbi).root_reserved_nodes > node_limit) { + F2FS_OPTION(sbi).root_reserved_nodes = node_limit; + f2fs_info(sbi, "Reduce reserved nodes for root = %u", + F2FS_OPTION(sbi).root_reserved_nodes); + } + if (!test_opt(sbi, RESERVE_ROOT) && !test_opt(sbi, RESERVE_NODE) && (!uid_eq(F2FS_OPTION(sbi).s_resuid, make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || !gid_eq(F2FS_OPTION(sbi).s_resgid, make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) - f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", + f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root" + " and reserve_node", from_kuid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resuid), from_kgid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resgid)); } -static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi) -{ - unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec; - unsigned int avg_vblocks; - unsigned int wanted_reserved_segments; - block_t avail_user_block_count; - - if (!F2FS_IO_ALIGNED(sbi)) - return 0; - - /* average valid block count in section in worst case */ - avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi); - - /* - * we need enough free space when migrating one section in worst case - */ - wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) * - reserved_segments(sbi); - wanted_reserved_segments -= reserved_segments(sbi); - - avail_user_block_count = sbi->user_block_count - - sbi->current_reserved_blocks - - F2FS_OPTION(sbi).root_reserved_blocks; - - if (wanted_reserved_segments * sbi->blocks_per_seg > - avail_user_block_count) { - f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u", - wanted_reserved_segments, - avail_user_block_count >> sbi->log_blocks_per_seg); - return -ENOSPC; - } - - SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments; - - f2fs_info(sbi, "IO align feature needs additional reserved segment: %u", - wanted_reserved_segments); - - return 0; -} - static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi) { if (!F2FS_OPTION(sbi).unusable_cap_perc) @@ -389,164 +501,123 @@ static void init_once(void *foo) struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; inode_init_once(&fi->vfs_inode); +#ifdef CONFIG_FS_ENCRYPTION + fi->i_crypt_info = NULL; +#endif +#ifdef CONFIG_FS_VERITY + fi->i_verity_info = NULL; +#endif } #ifdef CONFIG_QUOTA static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) -static int f2fs_set_qf_name(struct super_block *sb, int qtype, - substring_t *args) +/* + * Note the name of the specified quota file. + */ +static int f2fs_note_qf_name(struct fs_context *fc, int qtype, + struct fs_parameter *param) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_fs_context *ctx = fc->fs_private; char *qname; - int ret = -EINVAL; - if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); + if (param->size < 1) { + f2fs_err(NULL, "Missing quota name"); return -EINVAL; } - if (f2fs_sb_has_quota_ino(sbi)) { - f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); + if (strchr(param->string, '/')) { + f2fs_err(NULL, "quotafile must be on filesystem root"); + return -EINVAL; + } + if (ctx->info.s_qf_names[qtype]) { + if (strcmp(ctx->info.s_qf_names[qtype], param->string) != 0) { + f2fs_err(NULL, "Quota file already specified"); + return -EINVAL; + } return 0; } - qname = match_strdup(args); + qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); if (!qname) { - f2fs_err(sbi, "Not enough memory for storing quotafile name"); + f2fs_err(NULL, "Not enough memory for storing quotafile name"); return -ENOMEM; } - if (F2FS_OPTION(sbi).s_qf_names[qtype]) { - if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) - ret = 0; - else - f2fs_err(sbi, "%s quota file already specified", - QTYPE2NAME(qtype)); - goto errout; - } - if (strchr(qname, '/')) { - f2fs_err(sbi, "quotafile must be on filesystem root"); - goto errout; - } - F2FS_OPTION(sbi).s_qf_names[qtype] = qname; - set_opt(sbi, QUOTA); + F2FS_CTX_INFO(ctx).s_qf_names[qtype] = qname; + ctx->qname_mask |= 1 << qtype; return 0; -errout: - kfree(qname); - return ret; } -static int f2fs_clear_qf_name(struct super_block *sb, int qtype) +/* + * Clear the name of the specified quota file. + */ +static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_fs_context *ctx = fc->fs_private; - if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); - return -EINVAL; - } - kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); - F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; + kfree(ctx->info.s_qf_names[qtype]); + ctx->info.s_qf_names[qtype] = NULL; + ctx->qname_mask |= 1 << qtype; return 0; } -static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) +static void f2fs_unnote_qf_name_all(struct fs_context *fc) { - /* - * We do the test below only for project quotas. 'usrquota' and - * 'grpquota' mount options are allowed even without quota feature - * to support legacy quotas in quota files. - */ - if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { - f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); - return -1; - } - if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || - F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || - F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) { - if (test_opt(sbi, USRQUOTA) && - F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) - clear_opt(sbi, USRQUOTA); - - if (test_opt(sbi, GRPQUOTA) && - F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) - clear_opt(sbi, GRPQUOTA); - - if (test_opt(sbi, PRJQUOTA) && - F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) - clear_opt(sbi, PRJQUOTA); - - if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || - test_opt(sbi, PRJQUOTA)) { - f2fs_err(sbi, "old and new quota format mixing"); - return -1; - } - - if (!F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_err(sbi, "journaled quota format not specified"); - return -1; - } - } + int i; - if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); - F2FS_OPTION(sbi).s_jquota_fmt = 0; - } - return 0; + for (i = 0; i < MAXQUOTAS; i++) + f2fs_unnote_qf_name(fc, i); } #endif -static int f2fs_set_test_dummy_encryption(struct super_block *sb, - const char *opt, - const substring_t *arg, - bool is_remount) +static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param, + struct f2fs_fs_context *ctx) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct fs_parameter param = { - .type = fs_value_is_string, - .string = arg->from ? arg->from : "", - }; - struct fscrypt_dummy_policy *policy = - &F2FS_OPTION(sbi).dummy_enc_policy; int err; if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) { - f2fs_warn(sbi, "test_dummy_encryption option not supported"); - return -EINVAL; - } - - if (!f2fs_sb_has_encrypt(sbi)) { - f2fs_err(sbi, "Encrypt feature is off"); - return -EINVAL; - } - - /* - * This mount option is just for testing, and it's not worthwhile to - * implement the extra complexity (e.g. RCU protection) that would be - * needed to allow it to be set or changed during remount. We do allow - * it to be specified during remount, but only if there is no change. - */ - if (is_remount && !fscrypt_is_dummy_policy_set(policy)) { - f2fs_warn(sbi, "Can't set test_dummy_encryption on remount"); + f2fs_warn(NULL, "test_dummy_encryption option not supported"); return -EINVAL; } - - err = fscrypt_parse_test_dummy_encryption(¶m, policy); + err = fscrypt_parse_test_dummy_encryption(param, + &ctx->info.dummy_enc_policy); if (err) { - if (err == -EEXIST) - f2fs_warn(sbi, - "Can't change test_dummy_encryption on remount"); - else if (err == -EINVAL) - f2fs_warn(sbi, "Value of option \"%s\" is unrecognized", - opt); + if (err == -EINVAL) + f2fs_warn(NULL, "Value of option \"%s\" is unrecognized", + param->key); + else if (err == -EEXIST) + f2fs_warn(NULL, "Conflicting test_dummy_encryption options"); else - f2fs_warn(sbi, "Error processing option \"%s\" [%d]", - opt, err); + f2fs_warn(NULL, "Error processing option \"%s\" [%d]", + param->key, err); return -EINVAL; } - f2fs_warn(sbi, "Test dummy encryption mode enabled"); return 0; } #ifdef CONFIG_F2FS_FS_COMPRESSION +static bool is_compress_extension_exist(struct f2fs_mount_info *info, + const char *new_ext, bool is_ext) +{ + unsigned char (*ext)[F2FS_EXTENSION_LEN]; + int ext_cnt; + int i; + + if (is_ext) { + ext = info->extensions; + ext_cnt = info->compress_ext_cnt; + } else { + ext = info->noextensions; + ext_cnt = info->nocompress_ext_cnt; + } + + for (i = 0; i < ext_cnt; i++) { + if (!strcasecmp(new_ext, ext[i])) + return true; + } + + return false; +} + /* * 1. The same extension name cannot not appear in both compress and non-compress extension * at the same time. @@ -554,28 +625,28 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb, * extension will be treated as special cases and will not be compressed. * 3. Don't allow the non-compress extension specifies all files. */ -static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi) +static int f2fs_test_compress_extension(unsigned char (*noext)[F2FS_EXTENSION_LEN], + int noext_cnt, + unsigned char (*ext)[F2FS_EXTENSION_LEN], + int ext_cnt) { - unsigned char (*ext)[F2FS_EXTENSION_LEN]; - unsigned char (*noext)[F2FS_EXTENSION_LEN]; - int ext_cnt, noext_cnt, index = 0, no_index = 0; - - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + int index = 0, no_index = 0; if (!noext_cnt) return 0; for (no_index = 0; no_index < noext_cnt; no_index++) { + if (strlen(noext[no_index]) == 0) + continue; if (!strcasecmp("*", noext[no_index])) { - f2fs_info(sbi, "Don't allow the nocompress extension specifies all files"); + f2fs_info(NULL, "Don't allow the nocompress extension specifies all files"); return -EINVAL; } for (index = 0; index < ext_cnt; index++) { + if (strlen(ext[index]) == 0) + continue; if (!strcasecmp(ext[index], noext[no_index])) { - f2fs_info(sbi, "Don't allow the same extension %s appear in both compress and nocompress extension", + f2fs_info(NULL, "Don't allow the same extension %s appear in both compress and nocompress extension", ext[index]); return -EINVAL; } @@ -585,751 +656,866 @@ static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi) } #ifdef CONFIG_F2FS_FS_LZ4 -static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) +static int f2fs_set_lz4hc_level(struct f2fs_fs_context *ctx, const char *str) { #ifdef CONFIG_F2FS_FS_LZ4HC unsigned int level; if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = LZ4HC_DEFAULT_CLEVEL; + F2FS_CTX_INFO(ctx).compress_level = 0; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } str += 3; if (str[0] != ':') { - f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>"); + f2fs_info(NULL, "wrong format, e.g. <alg_name>:<compr_level>"); return -EINVAL; } if (kstrtouint(str + 1, 10, &level)) return -EINVAL; if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) { - f2fs_info(sbi, "invalid lz4hc compress level: %d", level); + f2fs_info(NULL, "invalid lz4hc compress level: %d", level); return -EINVAL; } - F2FS_OPTION(sbi).compress_level = level; + F2FS_CTX_INFO(ctx).compress_level = level; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; #else if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_level = 0; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } - f2fs_info(sbi, "kernel doesn't support lz4hc compression"); + f2fs_info(NULL, "kernel doesn't support lz4hc compression"); return -EINVAL; #endif } #endif #ifdef CONFIG_F2FS_FS_ZSTD -static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) +static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str) { - unsigned int level; + int level; int len = 4; if (strlen(str) == len) { - F2FS_OPTION(sbi).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + F2FS_CTX_INFO(ctx).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } str += len; if (str[0] != ':') { - f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>"); + f2fs_info(NULL, "wrong format, e.g. <alg_name>:<compr_level>"); return -EINVAL; } - if (kstrtouint(str + 1, 10, &level)) + if (kstrtoint(str + 1, 10, &level)) return -EINVAL; + /* f2fs does not support negative compress level now */ + if (level < 0) { + f2fs_info(NULL, "do not support negative compress level: %d", level); + return -ERANGE; + } + if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { - f2fs_info(sbi, "invalid zstd compress level: %d", level); + f2fs_info(NULL, "invalid zstd compress level: %d", level); return -EINVAL; } - F2FS_OPTION(sbi).compress_level = level; + F2FS_CTX_INFO(ctx).compress_level = level; + ctx->spec_mask |= F2FS_SPEC_compress_level; return 0; } #endif #endif -static int parse_options(struct super_block *sb, char *options, bool is_remount) +static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); - substring_t args[MAX_OPT_ARGS]; + struct f2fs_fs_context *ctx = fc->fs_private; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; unsigned char (*noext)[F2FS_EXTENSION_LEN]; int ext_cnt, noext_cnt; + char *name; #endif - char *p, *name; - int arg = 0; - kuid_t uid; - kgid_t gid; - int ret; - - if (!options) - goto default_check; - - while ((p = strsep(&options, ",")) != NULL) { - int token; + substring_t args[MAX_OPT_ARGS]; + struct fs_parse_result result; + int token, ret, arg; - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, f2fs_tokens, args); + token = fs_parse(fc, f2fs_param_specs, param, &result); + if (token < 0) + return token; - switch (token) { - case Opt_gc_background: - name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "on")) { - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - } else if (!strcmp(name, "off")) { - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; - } else if (!strcmp(name, "sync")) { - F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); - break; - case Opt_norecovery: - /* this option mounts f2fs with ro */ - set_opt(sbi, NORECOVERY); - if (!f2fs_readonly(sb)) - return -EINVAL; - break; - case Opt_discard: - if (!f2fs_hw_support_discard(sbi)) { - f2fs_warn(sbi, "device does not support discard"); - break; - } - set_opt(sbi, DISCARD); - break; - case Opt_nodiscard: - if (f2fs_hw_should_discard(sbi)) { - f2fs_warn(sbi, "discard is required for zoned block devices"); - return -EINVAL; - } - clear_opt(sbi, DISCARD); - break; - case Opt_noheap: - set_opt(sbi, NOHEAP); - break; - case Opt_heap: - clear_opt(sbi, NOHEAP); - break; + switch (token) { + case Opt_gc_background: + F2FS_CTX_INFO(ctx).bggc_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_background_gc; + break; + case Opt_disable_roll_forward: + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_ROLL_FORWARD); + break; + case Opt_norecovery: + /* requires ro mount, checked in f2fs_validate_options */ + ctx_set_opt(ctx, F2FS_MOUNT_NORECOVERY); + break; + case Opt_discard: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); + else + ctx_set_opt(ctx, F2FS_MOUNT_DISCARD); + break; + case Opt_noheap: + case Opt_heap: + f2fs_warn(NULL, "heap/no_heap options were deprecated"); + break; #ifdef CONFIG_F2FS_FS_XATTR - case Opt_user_xattr: - set_opt(sbi, XATTR_USER); - break; - case Opt_nouser_xattr: - clear_opt(sbi, XATTR_USER); - break; - case Opt_inline_xattr: - set_opt(sbi, INLINE_XATTR); - break; - case Opt_noinline_xattr: - clear_opt(sbi, INLINE_XATTR); - break; - case Opt_inline_xattr_size: - if (args->from && match_int(args, &arg)) - return -EINVAL; - set_opt(sbi, INLINE_XATTR_SIZE); - F2FS_OPTION(sbi).inline_xattr_size = arg; - break; + case Opt_user_xattr: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_XATTR_USER); + else + ctx_set_opt(ctx, F2FS_MOUNT_XATTR_USER); + break; + case Opt_inline_xattr: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_XATTR); + else + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR); + break; + case Opt_inline_xattr_size: + if (result.int_32 < MIN_INLINE_XATTR_SIZE || + result.int_32 > MAX_INLINE_XATTR_SIZE) { + f2fs_err(NULL, "inline xattr size is out of range: %u ~ %u", + (u32)MIN_INLINE_XATTR_SIZE, (u32)MAX_INLINE_XATTR_SIZE); + return -EINVAL; + } + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE); + F2FS_CTX_INFO(ctx).inline_xattr_size = result.int_32; + ctx->spec_mask |= F2FS_SPEC_inline_xattr_size; + break; #else - case Opt_user_xattr: - f2fs_info(sbi, "user_xattr options not supported"); - break; - case Opt_nouser_xattr: - f2fs_info(sbi, "nouser_xattr options not supported"); - break; - case Opt_inline_xattr: - f2fs_info(sbi, "inline_xattr options not supported"); - break; - case Opt_noinline_xattr: - f2fs_info(sbi, "noinline_xattr options not supported"); - break; + case Opt_user_xattr: + case Opt_inline_xattr: + case Opt_inline_xattr_size: + f2fs_info(NULL, "%s options not supported", param->key); + break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL - case Opt_acl: - set_opt(sbi, POSIX_ACL); - break; - case Opt_noacl: - clear_opt(sbi, POSIX_ACL); - break; + case Opt_acl: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_POSIX_ACL); + else + ctx_set_opt(ctx, F2FS_MOUNT_POSIX_ACL); + break; #else - case Opt_acl: - f2fs_info(sbi, "acl options not supported"); - break; - case Opt_noacl: - f2fs_info(sbi, "noacl options not supported"); - break; + case Opt_acl: + f2fs_info(NULL, "%s options not supported", param->key); + break; #endif - case Opt_active_logs: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg != 2 && arg != 4 && - arg != NR_CURSEG_PERSIST_TYPE) - return -EINVAL; - F2FS_OPTION(sbi).active_logs = arg; - break; - case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); - break; - case Opt_inline_data: - set_opt(sbi, INLINE_DATA); - break; - case Opt_inline_dentry: - set_opt(sbi, INLINE_DENTRY); - break; - case Opt_noinline_dentry: - clear_opt(sbi, INLINE_DENTRY); - break; - case Opt_flush_merge: - set_opt(sbi, FLUSH_MERGE); - break; - case Opt_noflush_merge: - clear_opt(sbi, FLUSH_MERGE); - break; - case Opt_nobarrier: - set_opt(sbi, NOBARRIER); - break; - case Opt_barrier: - clear_opt(sbi, NOBARRIER); - break; - case Opt_fastboot: - set_opt(sbi, FASTBOOT); - break; - case Opt_extent_cache: - set_opt(sbi, READ_EXTENT_CACHE); - break; - case Opt_noextent_cache: - clear_opt(sbi, READ_EXTENT_CACHE); - break; - case Opt_noinline_data: - clear_opt(sbi, INLINE_DATA); - break; - case Opt_data_flush: - set_opt(sbi, DATA_FLUSH); - break; - case Opt_reserve_root: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_info(sbi, "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); - } else { - F2FS_OPTION(sbi).root_reserved_blocks = arg; - set_opt(sbi, RESERVE_ROOT); - } - break; - case Opt_resuid: - if (args->from && match_int(args, &arg)) - return -EINVAL; - uid = make_kuid(current_user_ns(), arg); - if (!uid_valid(uid)) { - f2fs_err(sbi, "Invalid uid value %d", arg); - return -EINVAL; - } - F2FS_OPTION(sbi).s_resuid = uid; - break; - case Opt_resgid: - if (args->from && match_int(args, &arg)) - return -EINVAL; - gid = make_kgid(current_user_ns(), arg); - if (!gid_valid(gid)) { - f2fs_err(sbi, "Invalid gid value %d", arg); - return -EINVAL; - } - F2FS_OPTION(sbi).s_resgid = gid; - break; - case Opt_mode: - name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "adaptive")) { - if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature"); - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; - } else if (!strcmp(name, "lfs")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; - } else if (!strcmp(name, "fragment:segment")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG; - } else if (!strcmp(name, "fragment:block")) { - F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_io_size_bits: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) { - f2fs_warn(sbi, "Not support %ld, larger than %d", - BIT(arg), BIO_MAX_VECS); - return -EINVAL; - } - F2FS_OPTION(sbi).write_io_size_bits = arg; - break; + case Opt_active_logs: + if (result.int_32 != 2 && result.int_32 != 4 && + result.int_32 != NR_CURSEG_PERSIST_TYPE) + return -EINVAL; + ctx->spec_mask |= F2FS_SPEC_active_logs; + F2FS_CTX_INFO(ctx).active_logs = result.int_32; + break; + case Opt_disable_ext_identify: + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_EXT_IDENTIFY); + break; + case Opt_inline_data: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DATA); + else + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DATA); + break; + case Opt_inline_dentry: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DENTRY); + else + ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DENTRY); + break; + case Opt_flush_merge: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_FLUSH_MERGE); + else + ctx_set_opt(ctx, F2FS_MOUNT_FLUSH_MERGE); + break; + case Opt_barrier: + if (result.negated) + ctx_set_opt(ctx, F2FS_MOUNT_NOBARRIER); + else + ctx_clear_opt(ctx, F2FS_MOUNT_NOBARRIER); + break; + case Opt_fastboot: + ctx_set_opt(ctx, F2FS_MOUNT_FASTBOOT); + break; + case Opt_extent_cache: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); + else + ctx_set_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE); + break; + case Opt_data_flush: + ctx_set_opt(ctx, F2FS_MOUNT_DATA_FLUSH); + break; + case Opt_reserve_root: + ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_reserve_root; + break; + case Opt_reserve_node: + ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_NODE); + F2FS_CTX_INFO(ctx).root_reserved_nodes = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_reserve_node; + break; + case Opt_resuid: + F2FS_CTX_INFO(ctx).s_resuid = result.uid; + ctx->spec_mask |= F2FS_SPEC_resuid; + break; + case Opt_resgid: + F2FS_CTX_INFO(ctx).s_resgid = result.gid; + ctx->spec_mask |= F2FS_SPEC_resgid; + break; + case Opt_mode: + F2FS_CTX_INFO(ctx).fs_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_mode; + break; #ifdef CONFIG_F2FS_FAULT_INJECTION - case Opt_fault_injection: - if (args->from && match_int(args, &arg)) - return -EINVAL; - f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE); - set_opt(sbi, FAULT_INJECTION); - break; + case Opt_fault_injection: + F2FS_CTX_INFO(ctx).fault_info.inject_rate = result.int_32; + ctx->spec_mask |= F2FS_SPEC_fault_injection; + ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); + break; - case Opt_fault_type: - if (args->from && match_int(args, &arg)) - return -EINVAL; - f2fs_build_fault_attr(sbi, 0, arg); - set_opt(sbi, FAULT_INJECTION); - break; + case Opt_fault_type: + if (result.uint_32 > BIT(FAULT_MAX)) + return -EINVAL; + F2FS_CTX_INFO(ctx).fault_info.inject_type = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_fault_type; + ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION); + break; #else - case Opt_fault_injection: - f2fs_info(sbi, "fault_injection options not supported"); - break; - - case Opt_fault_type: - f2fs_info(sbi, "fault_type options not supported"); - break; + case Opt_fault_injection: + case Opt_fault_type: + f2fs_info(NULL, "%s options not supported", param->key); + break; #endif - case Opt_lazytime: - sb->s_flags |= SB_LAZYTIME; - break; - case Opt_nolazytime: - sb->s_flags &= ~SB_LAZYTIME; - break; + case Opt_lazytime: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_LAZYTIME); + else + ctx_set_opt(ctx, F2FS_MOUNT_LAZYTIME); + break; #ifdef CONFIG_QUOTA - case Opt_quota: - case Opt_usrquota: - set_opt(sbi, USRQUOTA); - break; - case Opt_grpquota: - set_opt(sbi, GRPQUOTA); - break; - case Opt_prjquota: - set_opt(sbi, PRJQUOTA); - break; - case Opt_usrjquota: - ret = f2fs_set_qf_name(sb, USRQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_grpjquota: - ret = f2fs_set_qf_name(sb, GRPQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_prjjquota: - ret = f2fs_set_qf_name(sb, PRJQUOTA, &args[0]); - if (ret) - return ret; - break; - case Opt_offusrjquota: - ret = f2fs_clear_qf_name(sb, USRQUOTA); - if (ret) - return ret; - break; - case Opt_offgrpjquota: - ret = f2fs_clear_qf_name(sb, GRPQUOTA); - if (ret) - return ret; - break; - case Opt_offprjjquota: - ret = f2fs_clear_qf_name(sb, PRJQUOTA); - if (ret) - return ret; - break; - case Opt_jqfmt_vfsold: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD; - break; - case Opt_jqfmt_vfsv0: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0; - break; - case Opt_jqfmt_vfsv1: - F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1; - break; - case Opt_noquota: - clear_opt(sbi, QUOTA); - clear_opt(sbi, USRQUOTA); - clear_opt(sbi, GRPQUOTA); - clear_opt(sbi, PRJQUOTA); - break; + case Opt_quota: + if (result.negated) { + ctx_clear_opt(ctx, F2FS_MOUNT_QUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA); + ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA); + } else + ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA); + break; + case Opt_usrquota: + ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA); + break; + case Opt_grpquota: + ctx_set_opt(ctx, F2FS_MOUNT_GRPQUOTA); + break; + case Opt_prjquota: + ctx_set_opt(ctx, F2FS_MOUNT_PRJQUOTA); + break; + case Opt_usrjquota: + if (!*param->string) + ret = f2fs_unnote_qf_name(fc, USRQUOTA); + else + ret = f2fs_note_qf_name(fc, USRQUOTA, param); + if (ret) + return ret; + break; + case Opt_grpjquota: + if (!*param->string) + ret = f2fs_unnote_qf_name(fc, GRPQUOTA); + else + ret = f2fs_note_qf_name(fc, GRPQUOTA, param); + if (ret) + return ret; + break; + case Opt_prjjquota: + if (!*param->string) + ret = f2fs_unnote_qf_name(fc, PRJQUOTA); + else + ret = f2fs_note_qf_name(fc, PRJQUOTA, param); + if (ret) + return ret; + break; + case Opt_jqfmt: + F2FS_CTX_INFO(ctx).s_jquota_fmt = result.int_32; + ctx->spec_mask |= F2FS_SPEC_jqfmt; + break; #else - case Opt_quota: - case Opt_usrquota: - case Opt_grpquota: - case Opt_prjquota: - case Opt_usrjquota: - case Opt_grpjquota: - case Opt_prjjquota: - case Opt_offusrjquota: - case Opt_offgrpjquota: - case Opt_offprjjquota: - case Opt_jqfmt_vfsold: - case Opt_jqfmt_vfsv0: - case Opt_jqfmt_vfsv1: - case Opt_noquota: - f2fs_info(sbi, "quota operations not supported"); - break; + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: + case Opt_prjquota: + case Opt_usrjquota: + case Opt_grpjquota: + case Opt_prjjquota: + f2fs_info(NULL, "quota operations not supported"); + break; #endif - case Opt_alloc: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - if (!strcmp(name, "default")) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; - } else if (!strcmp(name, "reuse")) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_fsync: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "posix")) { - F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; - } else if (!strcmp(name, "strict")) { - F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; - } else if (!strcmp(name, "nobarrier")) { - F2FS_OPTION(sbi).fsync_mode = - FSYNC_MODE_NOBARRIER; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_test_dummy_encryption: - ret = f2fs_set_test_dummy_encryption(sb, p, &args[0], - is_remount); - if (ret) - return ret; - break; - case Opt_inlinecrypt: + case Opt_alloc: + F2FS_CTX_INFO(ctx).alloc_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_alloc_mode; + break; + case Opt_fsync: + F2FS_CTX_INFO(ctx).fsync_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_fsync_mode; + break; + case Opt_test_dummy_encryption: + ret = f2fs_parse_test_dummy_encryption(param, ctx); + if (ret) + return ret; + break; + case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - sb->s_flags |= SB_INLINECRYPT; + ctx_set_opt(ctx, F2FS_MOUNT_INLINECRYPT); #else - f2fs_info(sbi, "inline encryption not supported"); + f2fs_info(NULL, "inline encryption not supported"); #endif - break; + break; + case Opt_checkpoint: + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].from = args[0].to = NULL; + arg = 0; + + /* revert to match_table for checkpoint= options */ + token = match_token(param->string, f2fs_checkpoint_tokens, args); + switch (token) { case Opt_checkpoint_disable_cap_perc: if (args->from && match_int(args, &arg)) return -EINVAL; if (arg < 0 || arg > 100) return -EINVAL; - F2FS_OPTION(sbi).unusable_cap_perc = arg; - set_opt(sbi, DISABLE_CHECKPOINT); + F2FS_CTX_INFO(ctx).unusable_cap_perc = arg; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc; + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable_cap: if (args->from && match_int(args, &arg)) return -EINVAL; - F2FS_OPTION(sbi).unusable_cap = arg; - set_opt(sbi, DISABLE_CHECKPOINT); + F2FS_CTX_INFO(ctx).unusable_cap = arg; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap; + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable: - set_opt(sbi, DISABLE_CHECKPOINT); + ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; case Opt_checkpoint_enable: - clear_opt(sbi, DISABLE_CHECKPOINT); - break; - case Opt_checkpoint_merge: - set_opt(sbi, MERGE_CHECKPOINT); - break; - case Opt_nocheckpoint_merge: - clear_opt(sbi, MERGE_CHECKPOINT); + F2FS_CTX_INFO(ctx).unusable_cap_perc = 0; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc; + F2FS_CTX_INFO(ctx).unusable_cap = 0; + ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap; + ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT); break; + default: + return -EINVAL; + } + break; + case Opt_checkpoint_merge: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT); + else + ctx_set_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT); + break; #ifdef CONFIG_F2FS_FS_COMPRESSION - case Opt_compress_algorithm: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "lzo")) { + case Opt_compress_algorithm: + name = param->string; + if (!strcmp(name, "lzo")) { #ifdef CONFIG_F2FS_FS_LZO - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZO; + F2FS_CTX_INFO(ctx).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZO; + ctx->spec_mask |= F2FS_SPEC_compress_level; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else - f2fs_info(sbi, "kernel doesn't support lzo compression"); + f2fs_info(NULL, "kernel doesn't support lzo compression"); #endif - } else if (!strncmp(name, "lz4", 3)) { + } else if (!strncmp(name, "lz4", 3)) { #ifdef CONFIG_F2FS_FS_LZ4 - ret = f2fs_set_lz4hc_level(sbi, name); - if (ret) { - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZ4; + ret = f2fs_set_lz4hc_level(ctx, name); + if (ret) + return -EINVAL; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZ4; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else - f2fs_info(sbi, "kernel doesn't support lz4 compression"); + f2fs_info(NULL, "kernel doesn't support lz4 compression"); #endif - } else if (!strncmp(name, "zstd", 4)) { + } else if (!strncmp(name, "zstd", 4)) { #ifdef CONFIG_F2FS_FS_ZSTD - ret = f2fs_set_zstd_level(sbi, name); - if (ret) { - kfree(name); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_ZSTD; + ret = f2fs_set_zstd_level(ctx, name); + if (ret) + return -EINVAL; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_ZSTD; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else - f2fs_info(sbi, "kernel doesn't support zstd compression"); + f2fs_info(NULL, "kernel doesn't support zstd compression"); #endif - } else if (!strcmp(name, "lzo-rle")) { + } else if (!strcmp(name, "lzo-rle")) { #ifdef CONFIG_F2FS_FS_LZORLE - F2FS_OPTION(sbi).compress_level = 0; - F2FS_OPTION(sbi).compress_algorithm = - COMPRESS_LZORLE; + F2FS_CTX_INFO(ctx).compress_level = 0; + F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZORLE; + ctx->spec_mask |= F2FS_SPEC_compress_level; + ctx->spec_mask |= F2FS_SPEC_compress_algorithm; #else - f2fs_info(sbi, "kernel doesn't support lzorle compression"); + f2fs_info(NULL, "kernel doesn't support lzorle compression"); #endif - } else { - kfree(name); - return -EINVAL; - } - kfree(name); + } else + return -EINVAL; + break; + case Opt_compress_log_size: + if (result.uint_32 < MIN_COMPRESS_LOG_SIZE || + result.uint_32 > MAX_COMPRESS_LOG_SIZE) { + f2fs_err(NULL, + "Compress cluster log size is out of range"); + return -EINVAL; + } + F2FS_CTX_INFO(ctx).compress_log_size = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_compress_log_size; + break; + case Opt_compress_extension: + name = param->string; + ext = F2FS_CTX_INFO(ctx).extensions; + ext_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + ext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(NULL, "invalid extension length/number"); + return -EINVAL; + } + + if (is_compress_extension_exist(&ctx->info, name, true)) break; - case Opt_compress_log_size: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg < MIN_COMPRESS_LOG_SIZE || - arg > MAX_COMPRESS_LOG_SIZE) { - f2fs_err(sbi, - "Compress cluster log size is out of range"); - return -EINVAL; - } - F2FS_OPTION(sbi).compress_log_size = arg; + + ret = strscpy(ext[ext_cnt], name, F2FS_EXTENSION_LEN); + if (ret < 0) + return ret; + F2FS_CTX_INFO(ctx).compress_ext_cnt++; + ctx->spec_mask |= F2FS_SPEC_compress_extension; + break; + case Opt_nocompress_extension: + name = param->string; + noext = F2FS_CTX_INFO(ctx).noextensions; + noext_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + noext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(NULL, "invalid extension length/number"); + return -EINVAL; + } + + if (is_compress_extension_exist(&ctx->info, name, false)) break; - case Opt_compress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - ext = F2FS_OPTION(sbi).extensions; - ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + ret = strscpy(noext[noext_cnt], name, F2FS_EXTENSION_LEN); + if (ret < 0) + return ret; + F2FS_CTX_INFO(ctx).nocompress_ext_cnt++; + ctx->spec_mask |= F2FS_SPEC_nocompress_extension; + break; + case Opt_compress_chksum: + F2FS_CTX_INFO(ctx).compress_chksum = true; + ctx->spec_mask |= F2FS_SPEC_compress_chksum; + break; + case Opt_compress_mode: + F2FS_CTX_INFO(ctx).compress_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_compress_mode; + break; + case Opt_compress_cache: + ctx_set_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE); + break; +#else + case Opt_compress_algorithm: + case Opt_compress_log_size: + case Opt_compress_extension: + case Opt_nocompress_extension: + case Opt_compress_chksum: + case Opt_compress_mode: + case Opt_compress_cache: + f2fs_info(NULL, "compression options not supported"); + break; +#endif + case Opt_atgc: + ctx_set_opt(ctx, F2FS_MOUNT_ATGC); + break; + case Opt_gc_merge: + if (result.negated) + ctx_clear_opt(ctx, F2FS_MOUNT_GC_MERGE); + else + ctx_set_opt(ctx, F2FS_MOUNT_GC_MERGE); + break; + case Opt_discard_unit: + F2FS_CTX_INFO(ctx).discard_unit = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_discard_unit; + break; + case Opt_memory_mode: + F2FS_CTX_INFO(ctx).memory_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_memory_mode; + break; + case Opt_age_extent_cache: + ctx_set_opt(ctx, F2FS_MOUNT_AGE_EXTENT_CACHE); + break; + case Opt_errors: + F2FS_CTX_INFO(ctx).errors = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_errors; + break; + case Opt_nat_bits: + ctx_set_opt(ctx, F2FS_MOUNT_NAT_BITS); + break; + case Opt_lookup_mode: + F2FS_CTX_INFO(ctx).lookup_mode = result.uint_32; + ctx->spec_mask |= F2FS_SPEC_lookup_mode; + break; + } + return 0; +} - if (strlen(name) >= F2FS_EXTENSION_LEN || - ext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, - "invalid extension length/number"); - kfree(name); - return -EINVAL; - } +/* + * Check quota settings consistency. + */ +static int f2fs_check_quota_consistency(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + #ifdef CONFIG_QUOTA + struct f2fs_fs_context *ctx = fc->fs_private; + bool quota_feature = f2fs_sb_has_quota_ino(sbi); + bool quota_turnon = sb_any_quota_loaded(sb); + char *old_qname, *new_qname; + bool usr_qf_name, grp_qf_name, prj_qf_name, usrquota, grpquota, prjquota; + int i; - strcpy(ext[ext_cnt], name); - F2FS_OPTION(sbi).compress_ext_cnt++; - kfree(name); - break; - case Opt_nocompress_extension: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; + /* + * We do the test below only for project quotas. 'usrquota' and + * 'grpquota' mount options are allowed even without quota feature + * to support legacy quotas in quota files. + */ + if (ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA) && + !f2fs_sb_has_project_quota(sbi)) { + f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); + return -EINVAL; + } - noext = F2FS_OPTION(sbi).noextensions; - noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + if (ctx->qname_mask) { + for (i = 0; i < MAXQUOTAS; i++) { + if (!(ctx->qname_mask & (1 << i))) + continue; - if (strlen(name) >= F2FS_EXTENSION_LEN || - noext_cnt >= COMPRESS_EXT_NUM) { - f2fs_err(sbi, - "invalid extension length/number"); - kfree(name); - return -EINVAL; + old_qname = F2FS_OPTION(sbi).s_qf_names[i]; + new_qname = F2FS_CTX_INFO(ctx).s_qf_names[i]; + if (quota_turnon && + !!old_qname != !!new_qname) + goto err_jquota_change; + + if (old_qname) { + if (!new_qname) { + f2fs_info(sbi, "remove qf_name %s", + old_qname); + continue; + } else if (strcmp(old_qname, new_qname) == 0) { + ctx->qname_mask &= ~(1 << i); + continue; + } + goto err_jquota_specified; } - strcpy(noext[noext_cnt], name); - F2FS_OPTION(sbi).nocompress_ext_cnt++; - kfree(name); - break; - case Opt_compress_chksum: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - F2FS_OPTION(sbi).compress_chksum = true; - break; - case Opt_compress_mode: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; + if (quota_feature) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); + ctx->qname_mask &= ~(1 << i); + kfree(F2FS_CTX_INFO(ctx).s_qf_names[i]); + F2FS_CTX_INFO(ctx).s_qf_names[i] = NULL; } - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "fs")) { - F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; - } else if (!strcmp(name, "user")) { - F2FS_OPTION(sbi).compress_mode = COMPR_MODE_USER; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_compress_cache: - if (!f2fs_sb_has_compression(sbi)) { - f2fs_info(sbi, "Image doesn't support compression"); - break; - } - set_opt(sbi, COMPRESS_CACHE); - break; + } + } + + /* Make sure we don't mix old and new quota format */ + usr_qf_name = F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[USRQUOTA]; + grp_qf_name = F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[GRPQUOTA]; + prj_qf_name = F2FS_OPTION(sbi).s_qf_names[PRJQUOTA] || + F2FS_CTX_INFO(ctx).s_qf_names[PRJQUOTA]; + usrquota = test_opt(sbi, USRQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_USRQUOTA); + grpquota = test_opt(sbi, GRPQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_GRPQUOTA); + prjquota = test_opt(sbi, PRJQUOTA) || + ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA); + + if (usr_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA); + usrquota = false; + } + if (grp_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA); + grpquota = false; + } + if (prj_qf_name) { + ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA); + prjquota = false; + } + if (usr_qf_name || grp_qf_name || prj_qf_name) { + if (grpquota || usrquota || prjquota) { + f2fs_err(sbi, "old and new quota format mixing"); + return -EINVAL; + } + if (!(ctx->spec_mask & F2FS_SPEC_jqfmt || + F2FS_OPTION(sbi).s_jquota_fmt)) { + f2fs_err(sbi, "journaled quota format not specified"); + return -EINVAL; + } + } + return 0; + +err_jquota_change: + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); + return -EINVAL; +err_jquota_specified: + f2fs_err(sbi, "%s quota file already specified", + QTYPE2NAME(i)); + return -EINVAL; + #else - case Opt_compress_algorithm: - case Opt_compress_log_size: - case Opt_compress_extension: - case Opt_nocompress_extension: - case Opt_compress_chksum: - case Opt_compress_mode: - case Opt_compress_cache: - f2fs_info(sbi, "compression options not supported"); - break; + if (f2fs_readonly(sbi->sb)) + return 0; + if (f2fs_sb_has_quota_ino(sbi)) { + f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } + if (f2fs_sb_has_project_quota(sbi)) { + f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } + + return 0; #endif - case Opt_atgc: - set_opt(sbi, ATGC); - break; - case Opt_gc_merge: - set_opt(sbi, GC_MERGE); - break; - case Opt_nogc_merge: - clear_opt(sbi, GC_MERGE); - break; - case Opt_discard_unit: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "block")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_BLOCK; - } else if (!strcmp(name, "segment")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SEGMENT; - } else if (!strcmp(name, "section")) { - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SECTION; - } else { - kfree(name); - return -EINVAL; - } - kfree(name); - break; - case Opt_memory_mode: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "normal")) { - F2FS_OPTION(sbi).memory_mode = - MEMORY_MODE_NORMAL; - } else if (!strcmp(name, "low")) { - F2FS_OPTION(sbi).memory_mode = - MEMORY_MODE_LOW; - } else { - kfree(name); - return -EINVAL; +} + +static int f2fs_check_test_dummy_encryption(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy)) + return 0; + + if (!f2fs_sb_has_encrypt(sbi)) { + f2fs_err(sbi, "Encrypt feature is off"); + return -EINVAL; + } + + /* + * This mount option is just for testing, and it's not worthwhile to + * implement the extra complexity (e.g. RCU protection) that would be + * needed to allow it to be set or changed during remount. We do allow + * it to be specified during remount, but only if there is no change. + */ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + if (fscrypt_dummy_policies_equal(&F2FS_OPTION(sbi).dummy_enc_policy, + &F2FS_CTX_INFO(ctx).dummy_enc_policy)) + return 0; + f2fs_warn(sbi, "Can't set or change test_dummy_encryption on remount"); + return -EINVAL; + } + return 0; +} + +static inline bool test_compression_spec(unsigned int mask) +{ + return mask & (F2FS_SPEC_compress_algorithm + | F2FS_SPEC_compress_log_size + | F2FS_SPEC_compress_extension + | F2FS_SPEC_nocompress_extension + | F2FS_SPEC_compress_chksum + | F2FS_SPEC_compress_mode); +} + +static inline void clear_compression_spec(struct f2fs_fs_context *ctx) +{ + ctx->spec_mask &= ~(F2FS_SPEC_compress_algorithm + | F2FS_SPEC_compress_log_size + | F2FS_SPEC_compress_extension + | F2FS_SPEC_nocompress_extension + | F2FS_SPEC_compress_chksum + | F2FS_SPEC_compress_mode); +} + +static int f2fs_check_compression(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i, cnt; + + if (!f2fs_sb_has_compression(sbi)) { + if (test_compression_spec(ctx->spec_mask) || + ctx_test_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE)) + f2fs_info(sbi, "Image doesn't support compression"); + clear_compression_spec(ctx); + ctx->opt_mask &= ~BIT(F2FS_MOUNT_COMPRESS_CACHE); + return 0; + } + if (ctx->spec_mask & F2FS_SPEC_compress_extension) { + cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + for (i = 0; i < F2FS_CTX_INFO(ctx).compress_ext_cnt; i++) { + if (is_compress_extension_exist(&F2FS_OPTION(sbi), + F2FS_CTX_INFO(ctx).extensions[i], true)) { + F2FS_CTX_INFO(ctx).extensions[i][0] = '\0'; + cnt--; } - kfree(name); - break; - case Opt_age_extent_cache: - set_opt(sbi, AGE_EXTENT_CACHE); - break; - case Opt_errors: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - if (!strcmp(name, "remount-ro")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_READONLY; - } else if (!strcmp(name, "continue")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_CONTINUE; - } else if (!strcmp(name, "panic")) { - F2FS_OPTION(sbi).errors = - MOUNT_ERRORS_PANIC; - } else { - kfree(name); - return -EINVAL; + } + if (F2FS_OPTION(sbi).compress_ext_cnt + cnt > COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid extension length/number"); + return -EINVAL; + } + } + if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) { + cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + for (i = 0; i < F2FS_CTX_INFO(ctx).nocompress_ext_cnt; i++) { + if (is_compress_extension_exist(&F2FS_OPTION(sbi), + F2FS_CTX_INFO(ctx).noextensions[i], false)) { + F2FS_CTX_INFO(ctx).noextensions[i][0] = '\0'; + cnt--; } - kfree(name); - break; - default: - f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", - p); + } + if (F2FS_OPTION(sbi).nocompress_ext_cnt + cnt > COMPRESS_EXT_NUM) { + f2fs_err(sbi, "invalid noextension length/number"); return -EINVAL; } } -default_check: -#ifdef CONFIG_QUOTA - if (f2fs_check_quota_options(sbi)) + + if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions, + F2FS_CTX_INFO(ctx).nocompress_ext_cnt, + F2FS_CTX_INFO(ctx).extensions, + F2FS_CTX_INFO(ctx).compress_ext_cnt)) { + f2fs_err(sbi, "new noextensions conflicts with new extensions"); return -EINVAL; -#else - if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + } + if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions, + F2FS_CTX_INFO(ctx).nocompress_ext_cnt, + F2FS_OPTION(sbi).extensions, + F2FS_OPTION(sbi).compress_ext_cnt)) { + f2fs_err(sbi, "new noextensions conflicts with old extensions"); return -EINVAL; } - if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); + if (f2fs_test_compress_extension(F2FS_OPTION(sbi).noextensions, + F2FS_OPTION(sbi).nocompress_ext_cnt, + F2FS_CTX_INFO(ctx).extensions, + F2FS_CTX_INFO(ctx).compress_ext_cnt)) { + f2fs_err(sbi, "new extensions conflicts with old noextensions"); return -EINVAL; } #endif -#if !IS_ENABLED(CONFIG_UNICODE) - if (f2fs_sb_has_casefold(sbi)) { + return 0; +} + +static int f2fs_check_opt_consistency(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int err; + + if (ctx_test_opt(ctx, F2FS_MOUNT_NORECOVERY) && !f2fs_readonly(sb)) + return -EINVAL; + + if (f2fs_hw_should_discard(sbi) && + (ctx->opt_mask & BIT(F2FS_MOUNT_DISCARD)) && + !ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { + f2fs_warn(sbi, "discard is required for zoned block devices"); + return -EINVAL; + } + + if (!f2fs_hw_support_discard(sbi) && + (ctx->opt_mask & BIT(F2FS_MOUNT_DISCARD)) && + ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) { + f2fs_warn(sbi, "device does not support discard"); + ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD); + ctx->opt_mask &= ~BIT(F2FS_MOUNT_DISCARD); + } + + if (f2fs_sb_has_device_alias(sbi) && + (ctx->opt_mask & BIT(F2FS_MOUNT_READ_EXTENT_CACHE)) && + !ctx_test_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE)) { + f2fs_err(sbi, "device aliasing requires extent cache"); + return -EINVAL; + } + + if (test_opt(sbi, RESERVE_ROOT) && + (ctx->opt_mask & BIT(F2FS_MOUNT_RESERVE_ROOT)) && + ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_ROOT)) { + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); + ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); + ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_ROOT); + } + if (test_opt(sbi, RESERVE_NODE) && + (ctx->opt_mask & BIT(F2FS_MOUNT_RESERVE_NODE)) && + ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_NODE)) { + f2fs_info(sbi, "Preserve previous reserve_node=%u", + F2FS_OPTION(sbi).root_reserved_nodes); + ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_NODE); + ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_NODE); + } + + err = f2fs_check_test_dummy_encryption(fc, sb); + if (err) + return err; + + err = f2fs_check_compression(fc, sb); + if (err) + return err; + + err = f2fs_check_quota_consistency(fc, sb); + if (err) + return err; + + if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) { f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); return -EINVAL; } -#endif + /* * The BLKZONED feature indicates that the drive was formatted with * zone alignment optimization. This is optional for host-aware * devices, but mandatory for host-managed zoned block devices. */ if (f2fs_sb_has_blkzoned(sbi)) { + if (F2FS_CTX_INFO(ctx).bggc_mode == BGGC_MODE_OFF) { + f2fs_warn(sbi, "zoned devices need bggc"); + return -EINVAL; + } #ifdef CONFIG_BLK_DEV_ZONED - if (F2FS_OPTION(sbi).discard_unit != - DISCARD_UNIT_SECTION) { + if ((ctx->spec_mask & F2FS_SPEC_discard_unit) && + F2FS_CTX_INFO(ctx).discard_unit != DISCARD_UNIT_SECTION) { f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default"); - F2FS_OPTION(sbi).discard_unit = - DISCARD_UNIT_SECTION; + F2FS_CTX_INFO(ctx).discard_unit = DISCARD_UNIT_SECTION; + } + + if ((ctx->spec_mask & F2FS_SPEC_mode) && + F2FS_CTX_INFO(ctx).fs_mode != FS_MODE_LFS) { + f2fs_info(sbi, "Only lfs mode is allowed with zoned block device feature"); + return -EINVAL; } #else f2fs_err(sbi, "Zoned block device support is not enabled"); @@ -1337,54 +1523,25 @@ default_check: #endif } -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_test_compress_extension(sbi)) { - f2fs_err(sbi, "invalid compress or nocompress extension"); - return -EINVAL; - } -#endif - - if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) { - f2fs_err(sbi, "Should set mode=lfs with %luKB-sized IO", - F2FS_IO_SIZE_KB(sbi)); - return -EINVAL; - } - - if (test_opt(sbi, INLINE_XATTR_SIZE)) { - int min_size, max_size; - + if (ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE)) { if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } - if (!test_opt(sbi, INLINE_XATTR)) { + if (!ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR) && !test_opt(sbi, INLINE_XATTR)) { f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } - - min_size = MIN_INLINE_XATTR_SIZE; - max_size = MAX_INLINE_XATTR_SIZE; - - if (F2FS_OPTION(sbi).inline_xattr_size < min_size || - F2FS_OPTION(sbi).inline_xattr_size > max_size) { - f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d", - min_size, max_size); - return -EINVAL; - } - } - - if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) { - f2fs_err(sbi, "LFS is not compatible with checkpoint=disable"); - return -EINVAL; } - if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) { + if (ctx_test_opt(ctx, F2FS_MOUNT_ATGC) && + F2FS_CTX_INFO(ctx).fs_mode == FS_MODE_LFS) { f2fs_err(sbi, "LFS is not compatible with ATGC"); return -EINVAL; } - if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) { + if (f2fs_is_readonly(sbi) && ctx_test_opt(ctx, F2FS_MOUNT_FLUSH_MERGE)) { f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode"); return -EINVAL; } @@ -1396,6 +1553,195 @@ default_check: return 0; } +static void f2fs_apply_quota_options(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + bool quota_feature = f2fs_sb_has_quota_ino(sbi); + char *qname; + int i; + + if (quota_feature) + return; + + for (i = 0; i < MAXQUOTAS; i++) { + if (!(ctx->qname_mask & (1 << i))) + continue; + + qname = F2FS_CTX_INFO(ctx).s_qf_names[i]; + if (qname) { + qname = kstrdup(F2FS_CTX_INFO(ctx).s_qf_names[i], + GFP_KERNEL | __GFP_NOFAIL); + set_opt(sbi, QUOTA); + } + F2FS_OPTION(sbi).s_qf_names[i] = qname; + } + + if (ctx->spec_mask & F2FS_SPEC_jqfmt) + F2FS_OPTION(sbi).s_jquota_fmt = F2FS_CTX_INFO(ctx).s_jquota_fmt; + + if (quota_feature && F2FS_OPTION(sbi).s_jquota_fmt) { + f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); + F2FS_OPTION(sbi).s_jquota_fmt = 0; + } +#endif +} + +static void f2fs_apply_test_dummy_encryption(struct fs_context *fc, + struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy) || + /* if already set, it was already verified to be the same */ + fscrypt_is_dummy_policy_set(&F2FS_OPTION(sbi).dummy_enc_policy)) + return; + swap(F2FS_OPTION(sbi).dummy_enc_policy, F2FS_CTX_INFO(ctx).dummy_enc_policy); + f2fs_warn(sbi, "Test dummy encryption mode enabled"); +} + +static void f2fs_apply_compression(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned char (*ctx_ext)[F2FS_EXTENSION_LEN]; + unsigned char (*sbi_ext)[F2FS_EXTENSION_LEN]; + int ctx_cnt, sbi_cnt, i; + + if (ctx->spec_mask & F2FS_SPEC_compress_level) + F2FS_OPTION(sbi).compress_level = + F2FS_CTX_INFO(ctx).compress_level; + if (ctx->spec_mask & F2FS_SPEC_compress_algorithm) + F2FS_OPTION(sbi).compress_algorithm = + F2FS_CTX_INFO(ctx).compress_algorithm; + if (ctx->spec_mask & F2FS_SPEC_compress_log_size) + F2FS_OPTION(sbi).compress_log_size = + F2FS_CTX_INFO(ctx).compress_log_size; + if (ctx->spec_mask & F2FS_SPEC_compress_chksum) + F2FS_OPTION(sbi).compress_chksum = + F2FS_CTX_INFO(ctx).compress_chksum; + if (ctx->spec_mask & F2FS_SPEC_compress_mode) + F2FS_OPTION(sbi).compress_mode = + F2FS_CTX_INFO(ctx).compress_mode; + if (ctx->spec_mask & F2FS_SPEC_compress_extension) { + ctx_ext = F2FS_CTX_INFO(ctx).extensions; + ctx_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt; + sbi_ext = F2FS_OPTION(sbi).extensions; + sbi_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + for (i = 0; i < ctx_cnt; i++) { + if (strlen(ctx_ext[i]) == 0) + continue; + strscpy(sbi_ext[sbi_cnt], ctx_ext[i]); + sbi_cnt++; + } + F2FS_OPTION(sbi).compress_ext_cnt = sbi_cnt; + } + if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) { + ctx_ext = F2FS_CTX_INFO(ctx).noextensions; + ctx_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt; + sbi_ext = F2FS_OPTION(sbi).noextensions; + sbi_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; + for (i = 0; i < ctx_cnt; i++) { + if (strlen(ctx_ext[i]) == 0) + continue; + strscpy(sbi_ext[sbi_cnt], ctx_ext[i]); + sbi_cnt++; + } + F2FS_OPTION(sbi).nocompress_ext_cnt = sbi_cnt; + } +#endif +} + +static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + F2FS_OPTION(sbi).opt &= ~ctx->opt_mask; + F2FS_OPTION(sbi).opt |= F2FS_CTX_INFO(ctx).opt; + + if (ctx->spec_mask & F2FS_SPEC_background_gc) + F2FS_OPTION(sbi).bggc_mode = F2FS_CTX_INFO(ctx).bggc_mode; + if (ctx->spec_mask & F2FS_SPEC_inline_xattr_size) + F2FS_OPTION(sbi).inline_xattr_size = + F2FS_CTX_INFO(ctx).inline_xattr_size; + if (ctx->spec_mask & F2FS_SPEC_active_logs) + F2FS_OPTION(sbi).active_logs = F2FS_CTX_INFO(ctx).active_logs; + if (ctx->spec_mask & F2FS_SPEC_reserve_root) + F2FS_OPTION(sbi).root_reserved_blocks = + F2FS_CTX_INFO(ctx).root_reserved_blocks; + if (ctx->spec_mask & F2FS_SPEC_reserve_node) + F2FS_OPTION(sbi).root_reserved_nodes = + F2FS_CTX_INFO(ctx).root_reserved_nodes; + if (ctx->spec_mask & F2FS_SPEC_resgid) + F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid; + if (ctx->spec_mask & F2FS_SPEC_resuid) + F2FS_OPTION(sbi).s_resuid = F2FS_CTX_INFO(ctx).s_resuid; + if (ctx->spec_mask & F2FS_SPEC_mode) + F2FS_OPTION(sbi).fs_mode = F2FS_CTX_INFO(ctx).fs_mode; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (ctx->spec_mask & F2FS_SPEC_fault_injection) + (void)f2fs_build_fault_attr(sbi, + F2FS_CTX_INFO(ctx).fault_info.inject_rate, 0, FAULT_RATE); + if (ctx->spec_mask & F2FS_SPEC_fault_type) + (void)f2fs_build_fault_attr(sbi, 0, + F2FS_CTX_INFO(ctx).fault_info.inject_type, FAULT_TYPE); +#endif + if (ctx->spec_mask & F2FS_SPEC_alloc_mode) + F2FS_OPTION(sbi).alloc_mode = F2FS_CTX_INFO(ctx).alloc_mode; + if (ctx->spec_mask & F2FS_SPEC_fsync_mode) + F2FS_OPTION(sbi).fsync_mode = F2FS_CTX_INFO(ctx).fsync_mode; + if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap) + F2FS_OPTION(sbi).unusable_cap = F2FS_CTX_INFO(ctx).unusable_cap; + if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap_perc) + F2FS_OPTION(sbi).unusable_cap_perc = + F2FS_CTX_INFO(ctx).unusable_cap_perc; + if (ctx->spec_mask & F2FS_SPEC_discard_unit) + F2FS_OPTION(sbi).discard_unit = F2FS_CTX_INFO(ctx).discard_unit; + if (ctx->spec_mask & F2FS_SPEC_memory_mode) + F2FS_OPTION(sbi).memory_mode = F2FS_CTX_INFO(ctx).memory_mode; + if (ctx->spec_mask & F2FS_SPEC_errors) + F2FS_OPTION(sbi).errors = F2FS_CTX_INFO(ctx).errors; + if (ctx->spec_mask & F2FS_SPEC_lookup_mode) + F2FS_OPTION(sbi).lookup_mode = F2FS_CTX_INFO(ctx).lookup_mode; + + f2fs_apply_compression(fc, sb); + f2fs_apply_test_dummy_encryption(fc, sb); + f2fs_apply_quota_options(fc, sb); +} + +static int f2fs_sanity_check_options(struct f2fs_sb_info *sbi, bool remount) +{ + if (f2fs_sb_has_device_alias(sbi) && + !test_opt(sbi, READ_EXTENT_CACHE)) { + f2fs_err(sbi, "device aliasing requires extent cache"); + return -EINVAL; + } + + if (!remount) + return 0; + +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && + sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { + f2fs_err(sbi, + "zoned: max open zones %u is too small, need at least %u open zones", + sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); + return -EINVAL; + } +#endif + if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) { + f2fs_warn(sbi, "LFS is not compatible with IPU"); + return -EINVAL; + } + return 0; +} + static struct inode *f2fs_alloc_inode(struct super_block *sb) { struct f2fs_inode_info *fi; @@ -1412,10 +1758,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); + atomic_set(&fi->open_count, 0); + atomic_set(&fi->writeback, 0); init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); + INIT_LIST_HEAD(&fi->gdonate_list); init_f2fs_rwsem(&fi->i_gc_rwsem[READ]); init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]); init_f2fs_rwsem(&fi->i_xattr_sem); @@ -1450,10 +1799,10 @@ static int f2fs_drop_inode(struct inode *inode) * - f2fs_gc -> iput -> evict * - inode_wait_for_writeback(inode) */ - if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) { + if ((!inode_unhashed(inode) && inode_state_read(inode) & I_SYNC)) { if (!inode->i_nlink && !is_bad_inode(inode)) { /* to avoid evict_inode call simultaneously */ - atomic_inc(&inode->i_count); + __iget(inode); spin_unlock(&inode->i_lock); /* should remain fi->extent_tree for writepage */ @@ -1477,7 +1826,7 @@ static int f2fs_drop_inode(struct inode *inode) trace_f2fs_drop_inode(inode, 0); return 0; } - ret = generic_drop_inode(inode); + ret = inode_generic_drop(inode); if (!ret) ret = fscrypt_drop_inode(inode); trace_f2fs_drop_inode(inode, ret); @@ -1502,6 +1851,12 @@ int f2fs_inode_dirtied(struct inode *inode, bool sync) inc_page_count(sbi, F2FS_DIRTY_IMETA); } spin_unlock(&sbi->inode_lock[DIRTY_META]); + + /* if atomic write is not committed, set inode w/ atomic dirty */ + if (!ret && f2fs_is_atomic_file(inode) && + !is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) + set_inode_flag(inode, FI_ATOMIC_DIRTIED); + return ret; } @@ -1561,7 +1916,8 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) int i; for (i = 0; i < sbi->s_ndevs; i++) { - blkdev_put(FDEV(i).bdev, sbi->sb); + if (i > 0) + bdev_fput(FDEV(i).bdev_file); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif @@ -1600,6 +1956,7 @@ static void f2fs_put_super(struct super_block *sb) struct cp_control cpc = { .reason = CP_UMOUNT, }; + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); } @@ -1609,6 +1966,7 @@ static void f2fs_put_super(struct super_block *sb) struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); } @@ -1626,19 +1984,11 @@ static void f2fs_put_super(struct super_block *sb) f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); - if (err) { + if (err || f2fs_cp_error(sbi)) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); } - for (i = 0; i < NR_COUNT_TYPE; i++) { - if (!get_pages(sbi, i)) - continue; - f2fs_err(sbi, "detect filesystem reference count leak during " - "umount, type: %d, count: %lld", i, get_pages(sbi, i)); - f2fs_bug_on(sbi, 1); - } - f2fs_bug_on(sbi, sbi->fsync_node_num); f2fs_destroy_compress_inode(sbi); @@ -1649,6 +1999,15 @@ static void f2fs_put_super(struct super_block *sb) iput(sbi->meta_inode); sbi->meta_inode = NULL; + /* Should check the page counts after dropping all node/meta pages */ + for (i = 0; i < NR_COUNT_TYPE; i++) { + if (!get_pages(sbi, i)) + continue; + f2fs_err(sbi, "detect filesystem reference count leak during " + "umount, type: %d, count: %lld", i, get_pages(sbi, i)); + f2fs_bug_on(sbi, 1); + } + /* * iput() can update stat information, if f2fs_write_checkpoint() * above failed with error. @@ -1666,15 +2025,9 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->ckpt); - sb->s_fs_info = NULL; - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->raw_super); - destroy_device_list(sbi); f2fs_destroy_page_array_cache(sbi); - f2fs_destroy_xattr_caches(sbi); - mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) kfree(F2FS_OPTION(sbi).s_qf_names[i]); @@ -1683,11 +2036,10 @@ static void f2fs_put_super(struct super_block *sb) destroy_percpu_info(sbi); f2fs_destroy_iostat(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) - kvfree(sbi->write_io[i]); + kfree(sbi->write_io[i]); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif - kfree(sbi); } int f2fs_sync_fs(struct super_block *sb, int sync) @@ -1705,35 +2057,55 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return -EAGAIN; - if (sync) + if (sync) { + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_issue_checkpoint(sbi); + } return err; } static int f2fs_freeze(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); + if (f2fs_readonly(sb)) return 0; /* IO error happened before */ - if (unlikely(f2fs_cp_error(F2FS_SB(sb)))) + if (unlikely(f2fs_cp_error(sbi))) return -EIO; /* must be clean, since sync_filesystem() was already called */ - if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY)) + if (is_sbi_flag_set(sbi, SBI_IS_DIRTY)) return -EINVAL; + sbi->umount_lock_holder = current; + /* Let's flush checkpoints and stop the thread. */ - f2fs_flush_ckpt_thread(F2FS_SB(sb)); + f2fs_flush_ckpt_thread(sbi); + + sbi->umount_lock_holder = NULL; /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */ - set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); + set_sbi_flag(sbi, SBI_IS_FREEZING); return 0; } static int f2fs_unfreeze(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + /* + * It will update discard_max_bytes of mounted lvm device to zero + * after creating snapshot on this lvm device, let's drop all + * remained discards. + * We don't need to disable real-time discard because discard_max_bytes + * will recover after removal of snapshot. + */ + if (test_opt(sbi, DISCARD) && !f2fs_hw_support_discard(sbi)) + f2fs_issue_discard_timeout(sbi); + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } @@ -1755,26 +2127,32 @@ static int f2fs_statfs_project(struct super_block *sb, limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, dquot->dq_dqb.dqb_bhardlimit); - if (limit) - limit >>= sb->s_blocksize_bits; + limit >>= sb->s_blocksize_bits; + + if (limit) { + uint64_t remaining = 0; - if (limit && buf->f_blocks > limit) { curblock = (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits; - buf->f_blocks = limit; - buf->f_bfree = buf->f_bavail = - (buf->f_blocks > curblock) ? - (buf->f_blocks - curblock) : 0; + if (limit > curblock) + remaining = limit - curblock; + + buf->f_blocks = min(buf->f_blocks, limit); + buf->f_bfree = min(buf->f_bfree, remaining); + buf->f_bavail = min(buf->f_bavail, remaining); } limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, dquot->dq_dqb.dqb_ihardlimit); - if (limit && buf->f_files > limit) { - buf->f_files = limit; - buf->f_ffree = - (buf->f_files > dquot->dq_dqb.dqb_curinodes) ? - (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0; + if (limit) { + uint64_t remaining = 0; + + if (limit > dquot->dq_dqb.dqb_curinodes) + remaining = limit - dquot->dq_dqb.dqb_curinodes; + + buf->f_files = min(buf->f_files, limit); + buf->f_ffree = min(buf->f_ffree, remaining); } spin_unlock(&dquot->dq_dqb_lock); @@ -1800,7 +2178,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; spin_lock(&sbi->stat_lock); - + if (sbi->carve_out) + buf->f_blocks -= sbi->current_reserved_blocks; user_block_count = sbi->user_block_count; total_valid_node_count = valid_node_count(sbi); avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; @@ -1832,9 +2211,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_fsid = u64_to_fsid(id); #ifdef CONFIG_QUOTA - if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) && + if (is_inode_flag_set(d_inode(dentry), FI_PROJ_INHERIT) && sb_has_quota_limits_enabled(sb, PRJQUOTA)) { - f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf); + f2fs_statfs_project(sb, F2FS_I(d_inode(dentry))->i_projid, buf); } #endif return 0; @@ -1964,10 +2343,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) } else { seq_puts(seq, ",nodiscard"); } - if (test_opt(sbi, NOHEAP)) - seq_puts(seq, ",no_heap"); - else - seq_puts(seq, ",heap"); #ifdef CONFIG_F2FS_FS_XATTR if (test_opt(sbi, XATTR_USER)) seq_puts(seq, ",user_xattr"); @@ -2026,16 +2401,15 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) seq_puts(seq, "fragment:block"); seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); - if (test_opt(sbi, RESERVE_ROOT)) - seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", + if (test_opt(sbi, RESERVE_ROOT) || test_opt(sbi, RESERVE_NODE)) + seq_printf(seq, ",reserve_root=%u,reserve_node=%u,resuid=%u," + "resgid=%u", F2FS_OPTION(sbi).root_reserved_blocks, + F2FS_OPTION(sbi).root_reserved_nodes, from_kuid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resuid), from_kgid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resgid)); - if (F2FS_IO_SIZE_BITS(sbi)) - seq_printf(seq, ",io_bits=%u", - F2FS_OPTION(sbi).write_io_size_bits); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) { seq_printf(seq, ",fault_injection=%u", @@ -2099,6 +2473,16 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC) seq_printf(seq, ",errors=%s", "panic"); + if (test_opt(sbi, NAT_BITS)) + seq_puts(seq, ",nat_bits"); + + if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_PERF) + seq_show_option(seq, "lookup_mode", "perf"); + else if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_COMPAT) + seq_show_option(seq, "lookup_mode", "compat"); + else if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_AUTO) + seq_show_option(seq, "lookup_mode", "auto"); + return 0; } @@ -2142,15 +2526,12 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount) F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL; F2FS_OPTION(sbi).errors = MOUNT_ERRORS_CONTINUE; - sbi->sb->s_flags &= ~SB_INLINECRYPT; - set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); - set_opt(sbi, NOHEAP); set_opt(sbi, MERGE_CHECKPOINT); + set_opt(sbi, LAZYTIME); F2FS_OPTION(sbi).unusable_cap = 0; - sbi->sb->s_flags |= SB_LAZYTIME; if (!f2fs_is_readonly(sbi)) set_opt(sbi, FLUSH_MERGE); if (f2fs_sb_has_blkzoned(sbi)) @@ -2165,7 +2546,9 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount) set_opt(sbi, POSIX_ACL); #endif - f2fs_build_fault_attr(sbi, 0, 0); + f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL); + + F2FS_OPTION(sbi).lookup_mode = LOOKUP_PERF; } #ifdef CONFIG_QUOTA @@ -2202,9 +2585,11 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) .init_gc_type = FG_GC, .should_migrate_blocks = false, .err_gc_skipped = true, + .no_bg_gc = true, .nr_free_secs = 1 }; f2fs_down_write(&sbi->gc_lock); + stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); if (err == -ENODATA) { err = 0; @@ -2230,6 +2615,7 @@ skip_gc: f2fs_down_write(&sbi->gc_lock); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); if (err) goto out_unlock; @@ -2243,21 +2629,48 @@ out_unlock: restore_flag: sbi->gc_mode = gc_mode; sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ + f2fs_info(sbi, "f2fs_disable_checkpoint() finish, err:%d", err); return err; } -static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) +static int f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) { - int retry = DEFAULT_RETRY_IO_COUNT; + unsigned int nr_pages = get_pages(sbi, F2FS_DIRTY_DATA) / 16; + long long start, writeback, lock, sync_inode, end; + int ret; + + f2fs_info(sbi, "%s start, meta: %lld, node: %lld, data: %lld", + __func__, + get_pages(sbi, F2FS_DIRTY_META), + get_pages(sbi, F2FS_DIRTY_NODES), + get_pages(sbi, F2FS_DIRTY_DATA)); + + f2fs_update_time(sbi, ENABLE_TIME); + + start = ktime_get(); /* we should flush all the data to keep data consistency */ - do { + while (get_pages(sbi, F2FS_DIRTY_DATA)) { + writeback_inodes_sb_nr(sbi->sb, nr_pages, WB_REASON_SYNC); + f2fs_io_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT); + + if (f2fs_time_over(sbi, ENABLE_TIME)) + break; + } + writeback = ktime_get(); + + f2fs_down_write(&sbi->cp_enable_rwsem); + + lock = ktime_get(); + + if (get_pages(sbi, F2FS_DIRTY_DATA)) sync_inodes_sb(sbi->sb); - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); - } while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--); - if (unlikely(retry < 0)) - f2fs_warn(sbi, "checkpoint=enable has some unwritten data."); + if (unlikely(get_pages(sbi, F2FS_DIRTY_DATA))) + f2fs_warn(sbi, "%s: has some unwritten data: %lld", + __func__, get_pages(sbi, F2FS_DIRTY_DATA)); + + sync_inode = ktime_get(); f2fs_down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); @@ -2266,30 +2679,53 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) set_sbi_flag(sbi, SBI_IS_DIRTY); f2fs_up_write(&sbi->gc_lock); - f2fs_sync_fs(sbi->sb, 1); + f2fs_info(sbi, "%s sync_fs, meta: %lld, imeta: %lld, node: %lld, dents: %lld, qdata: %lld", + __func__, + get_pages(sbi, F2FS_DIRTY_META), + get_pages(sbi, F2FS_DIRTY_IMETA), + get_pages(sbi, F2FS_DIRTY_NODES), + get_pages(sbi, F2FS_DIRTY_DENTS), + get_pages(sbi, F2FS_DIRTY_QDATA)); + ret = f2fs_sync_fs(sbi->sb, 1); + if (ret) + f2fs_err(sbi, "%s sync_fs failed, ret: %d", __func__, ret); /* Let's ensure there's no pending checkpoint anymore */ f2fs_flush_ckpt_thread(sbi); + + f2fs_up_write(&sbi->cp_enable_rwsem); + + end = ktime_get(); + + f2fs_info(sbi, "%s end, writeback:%llu, " + "lock:%llu, sync_inode:%llu, sync_fs:%llu", + __func__, + ktime_ms_delta(writeback, start), + ktime_ms_delta(lock, writeback), + ktime_ms_delta(sync_inode, lock), + ktime_ms_delta(end, sync_inode)); + return ret; } -static int f2fs_remount(struct super_block *sb, int *flags, char *data) +static int __f2fs_remount(struct fs_context *fc, struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; unsigned long old_sb_flags; + unsigned int flags = fc->sb_flags; int err; bool need_restart_gc = false, need_stop_gc = false; - bool need_restart_ckpt = false, need_stop_ckpt = false; bool need_restart_flush = false, need_stop_flush = false; bool need_restart_discard = false, need_stop_discard = false; + bool need_enable_checkpoint = false, need_disable_checkpoint = false; bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE); bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE); bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT); - bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); bool no_discard = !test_opt(sbi, DISCARD); bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE); bool block_unit_discard = f2fs_block_unit_discard(sbi); + bool no_nat_bits = !test_opt(sbi, NAT_BITS); #ifdef CONFIG_QUOTA int i, j; #endif @@ -2301,6 +2737,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) org_mount_opt = sbi->mount_opt; old_sb_flags = sb->s_flags; + sbi->umount_lock_holder = current; + #ifdef CONFIG_QUOTA org_mount_opt.s_jquota_fmt = F2FS_OPTION(sbi).s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { @@ -2320,7 +2758,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) #endif /* recover superblocks we couldn't write due to previous RO mount */ - if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { + if (!(flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); f2fs_info(sbi, "Try to recover all the superblocks, ret: %d", err); @@ -2330,8 +2768,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) default_options(sbi, true); - /* parse mount options */ - err = parse_options(sb, data, true); + err = f2fs_check_opt_consistency(fc, sb); + if (err) + goto restore_opts; + + f2fs_apply_options(fc, sb); + + err = f2fs_sanity_check_options(sbi, true); if (err) goto restore_opts; @@ -2342,20 +2785,20 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * Previous and new state of filesystem is RO, * so skip checking GC and FLUSH_MERGE conditions. */ - if (f2fs_readonly(sb) && (*flags & SB_RDONLY)) + if (f2fs_readonly(sb) && (flags & SB_RDONLY)) goto skip; - if (f2fs_dev_is_readonly(sbi) && !(*flags & SB_RDONLY)) { + if (f2fs_dev_is_readonly(sbi) && !(flags & SB_RDONLY)) { err = -EROFS; goto restore_opts; } #ifdef CONFIG_QUOTA - if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) { + if (!f2fs_readonly(sb) && (flags & SB_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; - } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) { + } else if (f2fs_readonly(sb) && !(flags & SB_RDONLY)) { /* dquot_resume needs RW */ sb->s_flags &= ~SB_RDONLY; if (sb_any_quota_suspended(sb)) { @@ -2367,12 +2810,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } } #endif - if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) { - err = -EINVAL; - f2fs_warn(sbi, "LFS is not compatible with IPU"); - goto restore_opts; - } - /* disallow enable atgc dynamically */ if (no_atgc == !!test_opt(sbi, ATGC)) { err = -EINVAL; @@ -2393,12 +2830,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) { - err = -EINVAL; - f2fs_warn(sbi, "switch io_bits option is not allowed"); - goto restore_opts; - } - if (no_compress_cache == !!test_opt(sbi, COMPRESS_CACHE)) { err = -EINVAL; f2fs_warn(sbi, "switch compress_cache option is not allowed"); @@ -2411,7 +2842,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { + if (no_nat_bits == !!test_opt(sbi, NAT_BITS)) { + err = -EINVAL; + f2fs_warn(sbi, "switch nat_bits option is not allowed"); + goto restore_opts; + } + + if ((flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); goto restore_opts; @@ -2422,7 +2859,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & SB_RDONLY) || + if ((flags & SB_RDONLY) || (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { @@ -2436,7 +2873,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & SB_RDONLY) { + if (flags & SB_RDONLY) { sync_inodes_sb(sb); set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -2445,36 +2882,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_IS_CLOSE); } - if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || - !test_opt(sbi, MERGE_CHECKPOINT)) { - f2fs_stop_ckpt_thread(sbi); - need_restart_ckpt = true; - } else { - /* Flush if the prevous checkpoint, if exists. */ - f2fs_flush_ckpt_thread(sbi); - - err = f2fs_start_ckpt_thread(sbi); - if (err) { - f2fs_err(sbi, - "Failed to start F2FS issue_checkpoint_thread (%d)", - err); - goto restore_gc; - } - need_stop_ckpt = true; - } - /* * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. */ - if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { + if ((flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { clear_opt(sbi, FLUSH_MERGE); f2fs_destroy_flush_cmd_control(sbi, false); need_restart_flush = true; } else { err = f2fs_create_flush_cmd_control(sbi); if (err) - goto restore_ckpt; + goto restore_gc; need_stop_flush = true; } @@ -2491,13 +2910,39 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } } + adjust_unusable_cap_perc(sbi); if (enable_checkpoint == !!test_opt(sbi, DISABLE_CHECKPOINT)) { if (test_opt(sbi, DISABLE_CHECKPOINT)) { err = f2fs_disable_checkpoint(sbi); if (err) goto restore_discard; + need_enable_checkpoint = true; } else { - f2fs_enable_checkpoint(sbi); + err = f2fs_enable_checkpoint(sbi); + if (err) + goto restore_discard; + need_disable_checkpoint = true; + } + } + + /* + * Place this routine at the end, since a new checkpoint would be + * triggered while remount and we need to take care of it before + * returning from remount. + */ + if ((flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) || + !test_opt(sbi, MERGE_CHECKPOINT)) { + f2fs_stop_ckpt_thread(sbi); + } else { + /* Flush if the previous checkpoint, if exists. */ + f2fs_flush_ckpt_thread(sbi); + + err = f2fs_start_ckpt_thread(sbi); + if (err) { + f2fs_err(sbi, + "Failed to start F2FS issue_checkpoint_thread (%d)", + err); + goto restore_checkpoint; } } @@ -2512,9 +2957,18 @@ skip: (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); limit_reserve_root(sbi); - adjust_unusable_cap_perc(sbi); - *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); + fc->sb_flags = (flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); + + sbi->umount_lock_holder = NULL; return 0; +restore_checkpoint: + if (need_enable_checkpoint) { + if (f2fs_enable_checkpoint(sbi)) + f2fs_warn(sbi, "checkpoint has not been enabled"); + } else if (need_disable_checkpoint) { + if (f2fs_disable_checkpoint(sbi)) + f2fs_warn(sbi, "checkpoint has not been disabled"); + } restore_discard: if (need_restart_discard) { if (f2fs_start_discard_thread(sbi)) @@ -2530,13 +2984,6 @@ restore_flush: clear_opt(sbi, FLUSH_MERGE); f2fs_destroy_flush_cmd_control(sbi, false); } -restore_ckpt: - if (need_restart_ckpt) { - if (f2fs_start_ckpt_thread(sbi)) - f2fs_warn(sbi, "background ckpt thread has stopped"); - } else if (need_stop_ckpt) { - f2fs_stop_ckpt_thread(sbi); - } restore_gc: if (need_restart_gc) { if (f2fs_start_gc_thread(sbi)) @@ -2554,9 +3001,16 @@ restore_opts: #endif sbi->mount_opt = org_mount_opt; sb->s_flags = old_sb_flags; + + sbi->umount_lock_holder = NULL; return err; } +static void f2fs_shutdown(struct super_block *sb) +{ + f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false, false); +} + #ifdef CONFIG_QUOTA static bool f2fs_need_recovery(struct f2fs_sb_info *sbi) { @@ -2612,12 +3066,9 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, { struct inode *inode = sb_dqopt(sb)->files[type]; struct address_space *mapping = inode->i_mapping; - block_t blkidx = F2FS_BYTES_TO_BLK(off); - int offset = off & (sb->s_blocksize - 1); int tocopy; size_t toread; loff_t i_size = i_size_read(inode); - struct page *page; if (off > i_size) return 0; @@ -2626,37 +3077,42 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, len = i_size - off; toread = len; while (toread > 0) { - tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); + struct folio *folio; + size_t offset; + repeat: - page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); - if (IS_ERR(page)) { - if (PTR_ERR(page) == -ENOMEM) { + folio = mapping_read_folio_gfp(mapping, off >> PAGE_SHIFT, + GFP_NOFS); + if (IS_ERR(folio)) { + if (PTR_ERR(folio) == -ENOMEM) { memalloc_retry_wait(GFP_NOFS); goto repeat; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); - return PTR_ERR(page); + return PTR_ERR(folio); } + offset = offset_in_folio(folio, off); + tocopy = min(folio_size(folio) - offset, toread); - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); + if (unlikely(folio->mapping != mapping)) { + f2fs_folio_put(folio, true); goto repeat; } - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); - return -EIO; - } - memcpy_from_page(data, page, offset, tocopy); - f2fs_put_page(page, 1); + /* + * should never happen, just leave f2fs_bug_on() here to catch + * any potential bug. + */ + f2fs_bug_on(F2FS_SB(sb), !folio_test_uptodate(folio)); + + memcpy_from_folio(data, folio, offset, tocopy); + f2fs_folio_put(folio, true); - offset = 0; toread -= tocopy; data += tocopy; - blkidx++; + off += tocopy; } return len; } @@ -2670,7 +3126,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, const struct address_space_operations *a_ops = mapping->a_ops; int offset = off & (sb->s_blocksize - 1); size_t towrite = len; - struct page *page; + struct folio *folio; void *fsdata = NULL; int err = 0; int tocopy; @@ -2680,20 +3136,20 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, towrite); retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, - &page, &fsdata); + &folio, &fsdata); if (unlikely(err)) { if (err == -ENOMEM) { - f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + memalloc_retry_wait(GFP_NOFS); goto retry; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); break; } - memcpy_to_page(page, offset, data, tocopy); + memcpy_to_folio(folio, offset_in_folio(folio, off), data, tocopy); a_ops->write_end(NULL, mapping, off, tocopy, tocopy, - page, fsdata); + folio, fsdata); offset = 0; towrite -= tocopy; off += tocopy; @@ -2703,7 +3159,7 @@ retry: if (len == towrite) return err; - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); f2fs_mark_inode_dirty_sync(inode, false); return len - towrite; } @@ -2716,7 +3172,7 @@ int f2fs_dquot_initialize(struct inode *inode) return dquot_initialize(inode); } -static struct dquot **f2fs_get_dquots(struct inode *inode) +static struct dquot __rcu **f2fs_get_dquots(struct inode *inode) { return F2FS_I(inode)->i_dquot; } @@ -2865,7 +3321,7 @@ out: return ret; } -int f2fs_quota_sync(struct super_block *sb, int type) +int f2fs_do_quota_sync(struct super_block *sb, int type) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); @@ -2913,11 +3369,21 @@ int f2fs_quota_sync(struct super_block *sb, int type) return ret; } +static int f2fs_quota_sync(struct super_block *sb, int type) +{ + int ret; + + F2FS_SB(sb)->umount_lock_holder = current; + ret = f2fs_do_quota_sync(sb, type); + F2FS_SB(sb)->umount_lock_holder = NULL; + return ret; +} + static int f2fs_quota_on(struct super_block *sb, int type, int format_id, const struct path *path) { struct inode *inode; - int err; + int err = 0; /* if quota sysfile exists, deny enabling quota with specific file */ if (f2fs_sb_has_quota_ino(F2FS_SB(sb))) { @@ -2928,31 +3394,34 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, if (path->dentry->d_sb != sb) return -EXDEV; - err = f2fs_quota_sync(sb, type); + F2FS_SB(sb)->umount_lock_holder = current; + + err = f2fs_do_quota_sync(sb, type); if (err) - return err; + goto out; inode = d_inode(path->dentry); err = filemap_fdatawrite(inode->i_mapping); if (err) - return err; + goto out; err = filemap_fdatawait(inode->i_mapping); if (err) - return err; + goto out; err = dquot_quota_on(sb, type, format_id, path); if (err) - return err; + goto out; inode_lock(inode); F2FS_I(inode)->i_flags |= F2FS_QUOTA_DEFAULT_FL; f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); - - return 0; +out: + F2FS_SB(sb)->umount_lock_holder = NULL; + return err; } static int __f2fs_quota_off(struct super_block *sb, int type) @@ -2963,7 +3432,7 @@ static int __f2fs_quota_off(struct super_block *sb, int type) if (!inode || !igrab(inode)) return dquot_quota_off(sb, type); - err = f2fs_quota_sync(sb, type); + err = f2fs_do_quota_sync(sb, type); if (err) goto out_put; @@ -2986,6 +3455,8 @@ static int f2fs_quota_off(struct super_block *sb, int type) struct f2fs_sb_info *sbi = F2FS_SB(sb); int err; + F2FS_SB(sb)->umount_lock_holder = current; + err = __f2fs_quota_off(sb, type); /* @@ -2995,6 +3466,9 @@ static int f2fs_quota_off(struct super_block *sb, int type) */ if (is_journalled_quota(sbi)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + + F2FS_SB(sb)->umount_lock_holder = NULL; + return err; } @@ -3127,7 +3601,7 @@ int f2fs_dquot_initialize(struct inode *inode) return 0; } -int f2fs_quota_sync(struct super_block *sb, int type) +int f2fs_do_quota_sync(struct super_block *sb, int type) { return 0; } @@ -3155,7 +3629,7 @@ static const struct super_operations f2fs_sops = { .freeze_fs = f2fs_freeze, .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, - .remount_fs = f2fs_remount, + .shutdown = f2fs_shutdown, }; #ifdef CONFIG_FS_ENCRYPTION @@ -3196,13 +3670,6 @@ static bool f2fs_has_stable_inodes(struct super_block *sb) return true; } -static void f2fs_get_ino_and_lblk_bits(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret) -{ - *ino_bits_ret = 8 * sizeof(nid_t); - *lblk_bits_ret = 8 * sizeof(block_t); -} - static struct block_device **f2fs_get_devices(struct super_block *sb, unsigned int *num_devs) { @@ -3224,16 +3691,20 @@ static struct block_device **f2fs_get_devices(struct super_block *sb, } static const struct fscrypt_operations f2fs_cryptops = { - .key_prefix = "f2fs:", + .inode_info_offs = (int)offsetof(struct f2fs_inode_info, i_crypt_info) - + (int)offsetof(struct f2fs_inode_info, vfs_inode), + .needs_bounce_pages = 1, + .has_32bit_inodes = 1, + .supports_subblock_data_units = 1, + .legacy_key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, .get_dummy_policy = f2fs_get_dummy_policy, .empty_dir = f2fs_empty_dir, .has_stable_inodes = f2fs_has_stable_inodes, - .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, .get_devices = f2fs_get_devices, }; -#endif +#endif /* CONFIG_FS_ENCRYPTION */ static struct inode *f2fs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) @@ -3275,6 +3746,7 @@ static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid, } static const struct export_operations f2fs_export_ops = { + .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = f2fs_fh_to_dentry, .fh_to_parent = f2fs_fh_to_parent, .get_parent = f2fs_get_parent, @@ -3308,27 +3780,54 @@ loff_t max_file_blocks(struct inode *inode) leaf_count *= NIDS_PER_BLOCK; result += leaf_count; + /* + * For compatibility with FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{64,32} with + * a 4K crypto data unit, we must restrict the max filesize to what can + * fit within U32_MAX + 1 data units. + */ + + result = umin(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096)); + return result; } -static int __f2fs_commit_super(struct buffer_head *bh, - struct f2fs_super_block *super) +static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio, + pgoff_t index, bool update) { - lock_buffer(bh); - if (super) - memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_dirty(bh); - unlock_buffer(bh); - + struct bio *bio; /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); + blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA; + int ret; + + folio_lock(folio); + folio_wait_writeback(folio); + if (update) + memcpy(F2FS_SUPER_BLOCK(folio, index), F2FS_RAW_SUPER(sbi), + sizeof(struct f2fs_super_block)); + folio_mark_dirty(folio); + folio_clear_dirty_for_io(folio); + folio_start_writeback(folio); + folio_unlock(folio); + + bio = bio_alloc(sbi->sb->s_bdev, 1, opf, GFP_NOFS); + + /* it doesn't need to set crypto context for superblock update */ + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio->index); + + if (!bio_add_folio(bio, folio, folio_size(folio), 0)) + f2fs_bug_on(sbi, 1); + + ret = submit_bio_wait(bio); + bio_put(bio); + folio_end_writeback(folio); + + return ret; } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); struct super_block *sb = sbi->sb; u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); @@ -3344,9 +3843,9 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, u32 segment_count = le32_to_cpu(raw_super->segment_count); u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); u64 main_end_blkaddr = main_blkaddr + - (segment_count_main << log_blocks_per_seg); + ((u64)segment_count_main << log_blocks_per_seg); u64 seg_end_blkaddr = segment0_blkaddr + - (segment_count << log_blocks_per_seg); + ((u64)segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", @@ -3403,7 +3902,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, set_sbi_flag(sbi, SBI_NEED_SB_WRITE); res = "internally"; } else { - err = __f2fs_commit_super(bh, NULL); + err = __f2fs_commit_super(sbi, folio, index, false); res = err ? "failed" : "done"; } f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)", @@ -3416,12 +3915,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, } static int sanity_check_raw_super(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main; block_t total_sections, blocks_per_seg; - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); size_t crc_offset = 0; __u32 crc = 0; @@ -3441,13 +3939,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } crc = le32_to_cpu(raw_super->crc); - if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { + if (crc != f2fs_crc32(raw_super, crc_offset)) { f2fs_info(sbi, "Invalid SB checksum value: %u", crc); return -EFSCORRUPTED; } } - /* Currently, support only 4KB block size */ + /* only support block_size equals to PAGE_SIZE */ if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) { f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u", le32_to_cpu(raw_super->log_blocksize), @@ -3462,7 +3960,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } - /* Currently, support 512/1024/2048/4096 bytes sector size */ + /* Currently, support 512/1024/2048/4096/16K bytes sector size */ if (le32_to_cpu(raw_super->log_sectorsize) > F2FS_MAX_LOG_SECTOR_SIZE || le32_to_cpu(raw_super->log_sectorsize) < @@ -3579,9 +4077,23 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ - if (sanity_check_area_boundary(sbi, bh)) + if (sanity_check_area_boundary(sbi, folio, index)) return -EFSCORRUPTED; + /* + * Check for legacy summary layout on 16KB+ block devices. + * Modern f2fs-tools packs multiple 4KB summary areas into one block, + * whereas legacy versions used one block per summary, leading + * to a much larger SSA. + */ + if (SUMS_PER_BLOCK > 1 && + !(__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_PACKED_SSA))) { + f2fs_info(sbi, "Error: Device formatted with a legacy version. " + "Please reformat with a tool supporting the packed ssa " + "feature for block sizes larger than 4kb."); + return -EOPNOTSUPP; + } + return 0; } @@ -3600,6 +4112,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) block_t user_block_count, valid_user_blocks; block_t avail_node_count, valid_node_count; unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks; + unsigned int sit_blk_cnt; int i, j; total = le32_to_cpu(raw_super->segment_count); @@ -3650,7 +4163,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) } main_segs = le32_to_cpu(raw_super->segment_count_main); - blocks_per_seg = sbi->blocks_per_seg; + blocks_per_seg = BLKS_PER_SEG(sbi); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || @@ -3711,6 +4224,13 @@ skip_cross: return 1; } + sit_blk_cnt = DIV_ROUND_UP(main_segs, SIT_ENTRY_PER_BLOCK); + if (sit_bitmap_size * 8 < sit_blk_cnt) { + f2fs_err(sbi, "Wrong bitmap size: sit: %u, sit_blk_cnt:%u", + sit_bitmap_size, sit_blk_cnt); + return 1; + } + cp_pack_start_sum = __start_sum_addr(sbi); cp_payload = __cp_payload(sbi); if (cp_pack_start_sum < cp_payload + 1 || @@ -3762,9 +4282,11 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); sbi->total_sections = le32_to_cpu(raw_super->section_count); - sbi->total_node_count = - (le32_to_cpu(raw_super->segment_count_nat) / 2) - * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; + sbi->total_node_count = SEGS_TO_BLKS(sbi, + ((le32_to_cpu(raw_super->segment_count_nat) / 2) * + NAT_ENTRY_PER_BLOCK)); + sbi->allocate_section_hint = le32_to_cpu(raw_super->section_count); + sbi->allocate_section_policy = ALLOCATE_FORWARD_NOHINT; F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino); F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino); F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino); @@ -3773,7 +4295,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->next_victim_seg[BG_GC] = NULL_SEGNO; sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; - sbi->migration_granularity = sbi->segs_per_sec; + sbi->migration_granularity = SEGS_PER_SEC(sbi); + sbi->migration_window_granularity = f2fs_sb_has_blkzoned(sbi) ? + DEF_MIGRATION_WINDOW_GRANULARITY_ZONED : SEGS_PER_SEC(sbi); sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; @@ -3786,6 +4310,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL; + sbi->interval_time[ENABLE_TIME] = DEF_ENABLE_INTERVAL; sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] = DEF_UMOUNT_DISCARD_TIMEOUT; clear_sbi_flag(sbi, SBI_NEED_FSCK); @@ -3868,17 +4393,25 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) sector_t nr_sectors = bdev_nr_sectors(bdev); struct f2fs_report_zones_args rep_zone_arg; u64 zone_sectors; + unsigned int max_open_zones; int ret; if (!f2fs_sb_has_blkzoned(sbi)) return 0; - zone_sectors = bdev_zone_sectors(bdev); - if (!is_power_of_2(zone_sectors)) { - f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n"); - return -EINVAL; + if (bdev_is_zoned(FDEV(devi).bdev)) { + max_open_zones = bdev_max_open_zones(bdev); + if (max_open_zones && (max_open_zones < sbi->max_open_zones)) + sbi->max_open_zones = max_open_zones; + if (sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { + f2fs_err(sbi, + "zoned: max open zones %u is too small, need at least %u open zones", + sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); + return -EINVAL; + } } + zone_sectors = bdev_zone_sectors(bdev); if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != SECTOR_TO_BLOCK(zone_sectors)) return -EINVAL; @@ -3918,7 +4451,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, { struct super_block *sb = sbi->sb; int block; - struct buffer_head *bh; + struct folio *folio; struct f2fs_super_block *super; int err = 0; @@ -3927,32 +4460,32 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, return -ENOMEM; for (block = 0; block < 2; block++) { - bh = sb_bread(sb, block); - if (!bh) { + folio = read_mapping_folio(sb->s_bdev->bd_mapping, block, NULL); + if (IS_ERR(folio)) { f2fs_err(sbi, "Unable to read %dth superblock", block + 1); - err = -EIO; + err = PTR_ERR(folio); *recovery = 1; continue; } /* sanity checking of raw super */ - err = sanity_check_raw_super(sbi, bh); + err = sanity_check_raw_super(sbi, folio, block); if (err) { f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - brelse(bh); + folio_put(folio); *recovery = 1; continue; } if (!*raw_super) { - memcpy(super, bh->b_data + F2FS_SUPER_OFFSET, + memcpy(super, F2FS_SUPER_BLOCK(folio, block), sizeof(*super)); *valid_super_block = block; *raw_super = super; } - brelse(bh); + folio_put(folio); } /* No valid superblock */ @@ -3966,7 +4499,8 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { - struct buffer_head *bh; + struct folio *folio; + pgoff_t index; __u32 crc = 0; int err; @@ -3978,28 +4512,30 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) /* we should update superblock crc here */ if (!recover && f2fs_sb_has_sb_chksum(sbi)) { - crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), + crc = f2fs_crc32(F2FS_RAW_SUPER(sbi), offsetof(struct f2fs_super_block, crc)); F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); } /* write back-up superblock first */ - bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block ? 0 : 1; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); /* if we are in recovery path, skip writing valid superblock */ if (recover || err) return err; /* write current valid superblock */ - bh = sb_bread(sbi->sb, sbi->valid_super_block); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); return err; } @@ -4034,7 +4570,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi) f2fs_up_write(&sbi->sb_lock); if (err) - f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err); + f2fs_err_ratelimited(sbi, + "f2fs_commit_super fails to record stop_reason, err:%d", + err); } void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag) @@ -4049,49 +4587,9 @@ void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag) spin_unlock_irqrestore(&sbi->error_lock, flags); } -static bool f2fs_update_errors(struct f2fs_sb_info *sbi) -{ - unsigned long flags; - bool need_update = false; - - spin_lock_irqsave(&sbi->error_lock, flags); - if (sbi->error_dirty) { - memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors, - MAX_F2FS_ERRORS); - sbi->error_dirty = false; - need_update = true; - } - spin_unlock_irqrestore(&sbi->error_lock, flags); - - return need_update; -} - -static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error) -{ - int err; - - f2fs_down_write(&sbi->sb_lock); - - if (!f2fs_update_errors(sbi)) - goto out_unlock; - - err = f2fs_commit_super(sbi, false); - if (err) - f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d", - error, err); -out_unlock: - f2fs_up_write(&sbi->sb_lock); -} - void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error) { f2fs_save_errors(sbi, error); - f2fs_record_errors(sbi, error); -} - -void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error) -{ - f2fs_save_errors(sbi, error); if (!sbi->error_dirty) return; @@ -4106,8 +4604,7 @@ static bool system_going_down(void) || system_state == SYSTEM_RESTART; } -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, - bool irq_context) +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason) { struct super_block *sb = sbi->sb; bool shutdown = reason == STOP_CP_REASON_SHUTDOWN; @@ -4119,10 +4616,12 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, if (!f2fs_hw_is_readonly(sbi)) { save_stop_reason(sbi, reason); - if (irq_context && !shutdown) - schedule_work(&sbi->s_error_work); - else - f2fs_record_stop_reason(sbi); + /* + * always create an asynchronous task to record stop_reason + * in order to avoid potential deadlock when running into + * f2fs_record_stop_reason() synchronously. + */ + schedule_work(&sbi->s_error_work); } /* @@ -4138,18 +4637,28 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, if (shutdown) set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + else + dump_stack(); - /* continue filesystem operators if errors=continue */ - if (continue_fs || f2fs_readonly(sb)) + /* + * Continue filesystem operators if errors=continue. Should not set + * RO by shutdown, since RO bypasses thaw_super which can hang the + * system. + */ + if (continue_fs || f2fs_readonly(sb) || shutdown) { + f2fs_warn(sbi, "Stopped filesystem due to reason: %d", reason); return; + } f2fs_warn(sbi, "Remounting filesystem read-only"); + /* - * Make sure updated value of ->s_mount_flags will be visible before - * ->s_flags update + * We have already set CP_ERROR_FLAG flag to stop all updates + * to filesystem, so it doesn't need to set SB_RDONLY flag here + * because the flag should be set covered w/ sb->s_umount semaphore + * via remount procedure, otherwise, it will confuse code like + * freeze_super() which will lead to deadlocks and other problems. */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; } static void f2fs_record_error_work(struct work_struct *work) @@ -4160,6 +4669,37 @@ static void f2fs_record_error_work(struct work_struct *work) f2fs_record_stop_reason(sbi); } +static inline unsigned int get_first_seq_zone_segno(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_BLK_DEV_ZONED + unsigned int zoneno, total_zones; + int devi; + + if (!f2fs_sb_has_blkzoned(sbi)) + return NULL_SEGNO; + + for (devi = 0; devi < sbi->s_ndevs; devi++) { + if (!bdev_is_zoned(FDEV(devi).bdev)) + continue; + + total_zones = GET_ZONE_FROM_SEG(sbi, FDEV(devi).total_segments); + + for (zoneno = 0; zoneno < total_zones; zoneno++) { + unsigned int segs, blks; + + if (!f2fs_zone_is_seq(sbi, devi, zoneno)) + continue; + + segs = GET_SEG_FROM_SEC(sbi, + zoneno * sbi->secs_per_zone); + blks = SEGS_TO_BLKS(sbi, segs); + return GET_SEGNO(sbi, FDEV(devi).start_blk + blks); + } + } +#endif + return NULL_SEGNO; +} + static int f2fs_scan_devices(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); @@ -4188,18 +4728,28 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev); sbi->aligned_blksize = true; + sbi->bggc_io_aware = AWARE_ALL_IO; +#ifdef CONFIG_BLK_DEV_ZONED + sbi->max_open_zones = UINT_MAX; + sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ; + sbi->bggc_io_aware = AWARE_READ_IO; +#endif for (i = 0; i < max_devices; i++) { + if (max_devices == 1) { + FDEV(i).total_segments = + le32_to_cpu(raw_super->segment_count_main); + FDEV(i).start_blk = 0; + FDEV(i).end_blk = FDEV(i).total_segments * + BLKS_PER_SEG(sbi); + } - if (i > 0 && !RDEV(i).path[0]) + if (i == 0) + FDEV(0).bdev_file = sbi->sb->s_bdev_file; + else if (!RDEV(i).path[0]) break; - if (max_devices == 1) { - /* Single zoned block device mount */ - FDEV(0).bdev = - blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, mode, - sbi->sb, NULL); - } else { + if (max_devices > 1) { /* Multi-device mount */ memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN); FDEV(i).total_segments = @@ -4207,21 +4757,24 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) if (i == 0) { FDEV(i).start_blk = 0; FDEV(i).end_blk = FDEV(i).start_blk + - (FDEV(i).total_segments << - sbi->log_blocks_per_seg) - 1 + - le32_to_cpu(raw_super->segment0_blkaddr); + SEGS_TO_BLKS(sbi, + FDEV(i).total_segments) - 1 + + le32_to_cpu(raw_super->segment0_blkaddr); + sbi->allocate_section_hint = FDEV(i).total_segments / + SEGS_PER_SEC(sbi); } else { FDEV(i).start_blk = FDEV(i - 1).end_blk + 1; FDEV(i).end_blk = FDEV(i).start_blk + - (FDEV(i).total_segments << - sbi->log_blocks_per_seg) - 1; + SEGS_TO_BLKS(sbi, + FDEV(i).total_segments) - 1; + FDEV(i).bdev_file = bdev_file_open_by_path( + FDEV(i).path, mode, sbi->sb, NULL); } - FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, mode, - sbi->sb, NULL); } - if (IS_ERR(FDEV(i).bdev)) - return PTR_ERR(FDEV(i).bdev); + if (IS_ERR(FDEV(i).bdev_file)) + return PTR_ERR(FDEV(i).bdev_file); + FDEV(i).bdev = file_bdev(FDEV(i).bdev_file); /* to release errored devices */ sbi->s_ndevs = i + 1; @@ -4229,24 +4782,21 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) sbi->aligned_blksize = false; #ifdef CONFIG_BLK_DEV_ZONED - if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_has_blkzoned(sbi)) { - f2fs_err(sbi, "Zoned block device feature not enabled"); - return -EINVAL; - } - if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) { + if (bdev_is_zoned(FDEV(i).bdev)) { + if (!f2fs_sb_has_blkzoned(sbi)) { + f2fs_err(sbi, "Zoned block device feature not enabled"); + return -EINVAL; + } if (init_blkz_info(sbi, i)) { f2fs_err(sbi, "Failed to initialize F2FS blkzone information"); return -EINVAL; } if (max_devices == 1) break; - f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: Host-managed)", i, FDEV(i).path, FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk, - bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? - "Host-aware" : "Host-managed"); + FDEV(i).start_blk, FDEV(i).end_blk); continue; } #endif @@ -4255,8 +4805,6 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) FDEV(i).total_segments, FDEV(i).start_blk, FDEV(i).end_blk); } - f2fs_info(sbi, - "IO Block Size: %8ld KB", F2FS_IO_SIZE_KB(sbi)); return 0; } @@ -4322,14 +4870,14 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sbi->readdir_ra = true; } -static int f2fs_fill_super(struct super_block *sb, void *data, int silent) +static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc) { + struct f2fs_fs_context *ctx = fc->fs_private; struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; struct inode *root; int err; bool skip_recovery = false, need_fsck = false; - char *options = NULL; int recovery, i, valid_super_block; struct curseg_info *seg_i; int retry_cnt = 1; @@ -4358,6 +4906,7 @@ try_onemore: init_f2fs_rwsem(&sbi->node_change); spin_lock_init(&sbi->stat_lock); init_f2fs_rwsem(&sbi->cp_rwsem); + init_f2fs_rwsem(&sbi->cp_enable_rwsem); init_f2fs_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); spin_lock_init(&sbi->error_lock); @@ -4368,15 +4917,6 @@ try_onemore: } mutex_init(&sbi->flush_lock); - /* Load the checksum driver */ - sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - f2fs_err(sbi, "Cannot load crc32 driver."); - err = PTR_ERR(sbi->s_chksum_driver); - sbi->s_chksum_driver = NULL; - goto free_sbi; - } - /* set a block size */ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { f2fs_err(sbi, "unable to set blocksize"); @@ -4397,18 +4937,18 @@ try_onemore: /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sbi)) - sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, - sizeof(raw_super->uuid)); + sbi->s_chksum_seed = f2fs_chksum(~0, raw_super->uuid, + sizeof(raw_super->uuid)); default_options(sbi, false); - /* parse mount options */ - options = kstrdup((const char *)data, GFP_KERNEL); - if (data && !options) { - err = -ENOMEM; + + err = f2fs_check_opt_consistency(fc, sb); + if (err) goto free_sb_buf; - } - err = parse_options(sb, options, false); + f2fs_apply_options(fc, sb); + + err = f2fs_sanity_check_options(sbi, false); if (err) goto free_options; @@ -4446,7 +4986,16 @@ try_onemore: sb->s_time_gran = 1; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); - memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + if (test_opt(sbi, INLINECRYPT)) + sb->s_flags |= SB_INLINECRYPT; + + if (test_opt(sbi, LAZYTIME)) + sb->s_flags |= SB_LAZYTIME; + else + sb->s_flags &= ~SB_LAZYTIME; + + super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid)); + super_set_sysfs_name_bdev(sb); sb->s_iflags |= SB_I_CGROUPWB; /* init f2fs-specific super block info */ @@ -4469,22 +5018,9 @@ try_onemore: if (err) goto free_iostat; - if (F2FS_IO_ALIGNED(sbi)) { - sbi->write_io_dummy = - mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0); - if (!sbi->write_io_dummy) { - err = -ENOMEM; - goto free_percpu; - } - } - - /* init per sbi slab cache */ - err = f2fs_init_xattr_caches(sbi); - if (err) - goto free_io_dummy; err = f2fs_init_page_array_cache(sbi); if (err) - goto free_xattr_cache; + goto free_percpu; /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); @@ -4569,13 +5105,16 @@ try_onemore: goto free_nm; } - err = adjust_reserved_segment(sbi); - if (err) - goto free_nm; - /* For write statistics */ sbi->sectors_written_start = f2fs_get_sectors_written(sbi); + /* get segno of first zoned block device */ + sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi); + + sbi->reserved_pin_section = f2fs_sb_has_blkzoned(sbi) ? + ZONED_PIN_SEC_REQUIRED_COUNT : + GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)); + /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); if (__exist_node_summaries(sbi)) @@ -4610,6 +5149,7 @@ try_onemore: goto free_node_inode; } + generic_set_sb_d_ops(sb); sb->s_root = d_make_root(root); /* allocate root dentry */ if (!sb->s_root) { err = -ENOMEM; @@ -4624,6 +5164,7 @@ try_onemore: if (err) goto free_compress_inode; + sbi->umount_lock_holder = current; #ifdef CONFIG_QUOTA /* Enable quota usage during mount */ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { @@ -4639,8 +5180,10 @@ try_onemore: if (err) goto free_meta; - if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) + if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) { + skip_recovery = true; goto reset_checkpoint; + } /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD) && @@ -4682,41 +5225,47 @@ try_onemore: } } else { err = f2fs_recover_fsync_data(sbi, true); - - if (!f2fs_readonly(sb) && err > 0) { - err = -EINVAL; - f2fs_err(sbi, "Need to recover fsync data"); - goto free_meta; + if (err > 0) { + if (!f2fs_readonly(sb)) { + f2fs_err(sbi, "Need to recover fsync data"); + err = -EINVAL; + goto free_meta; + } else { + f2fs_info(sbi, "drop all fsynced data"); + err = 0; + } } } +reset_checkpoint: #ifdef CONFIG_QUOTA f2fs_recover_quota_end(sbi, quota_enabled); #endif - /* * If the f2fs is not readonly and fsync data recovery succeeds, - * check zoned block devices' write pointer consistency. + * write pointer consistency of cursegs and other zones are already + * checked and fixed during recovery. However, if recovery fails, + * write pointers are left untouched, and retry-mount should check + * them here. */ - if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) { - err = f2fs_check_write_pointer(sbi); - if (err) - goto free_meta; - } - -reset_checkpoint: - f2fs_init_inmem_curseg(sbi); + if (skip_recovery) + err = f2fs_check_and_fix_write_pointer(sbi); + if (err) + goto free_meta; /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); - if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_init_inmem_curseg(sbi); + if (err) + goto sync_free_meta; + + if (test_opt(sbi, DISABLE_CHECKPOINT)) err = f2fs_disable_checkpoint(sbi); - if (err) - goto sync_free_meta; - } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { - f2fs_enable_checkpoint(sbi); - } + else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) + err = f2fs_enable_checkpoint(sbi); + if (err) + goto sync_free_meta; /* * If filesystem is not mounted as read-only then @@ -4729,7 +5278,6 @@ reset_checkpoint: if (err) goto sync_free_meta; } - kvfree(options); /* recover broken superblock */ if (recovery) { @@ -4747,6 +5295,8 @@ reset_checkpoint: f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + + sbi->umount_lock_holder = NULL; return 0; sync_free_meta: @@ -4802,17 +5352,13 @@ free_meta_inode: sbi->meta_inode = NULL; free_page_array_cache: f2fs_destroy_page_array_cache(sbi); -free_xattr_cache: - f2fs_destroy_xattr_caches(sbi); -free_io_dummy: - mempool_destroy(sbi->write_io_dummy); free_percpu: destroy_percpu_info(sbi); free_iostat: f2fs_destroy_iostat(sbi); free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) - kvfree(sbi->write_io[i]); + kfree(sbi->write_io[i]); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); @@ -4823,14 +5369,13 @@ free_options: for (i = 0; i < MAXQUOTAS; i++) kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif - fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy); - kvfree(options); + /* no need to free dummy_enc_policy, we just keep it in ctx when failed */ + swap(F2FS_CTX_INFO(ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy); free_sb_buf: kfree(raw_super); free_sbi: - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi); + sb->s_fs_info = NULL; /* give only one another chance */ if (retry_cnt > 0 && skip_recovery) { @@ -4841,16 +5386,45 @@ free_sbi: return err; } -static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int f2fs_get_tree(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); + return get_tree_bdev(fc, f2fs_fill_super); +} + +static int f2fs_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + + return __f2fs_remount(fc, sb); +} + +static void f2fs_fc_free(struct fs_context *fc) +{ + struct f2fs_fs_context *ctx = fc->fs_private; + + if (!ctx) + return; + +#ifdef CONFIG_QUOTA + f2fs_unnote_qf_name_all(fc); +#endif + fscrypt_free_dummy_policy(&F2FS_CTX_INFO(ctx).dummy_enc_policy); + kfree(ctx); } +static const struct fs_context_operations f2fs_context_ops = { + .parse_param = f2fs_parse_param, + .get_tree = f2fs_get_tree, + .reconfigure = f2fs_reconfigure, + .free = f2fs_fc_free, +}; + static void kill_f2fs_super(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); + if (sb->s_root) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); + sbi->umount_lock_holder = current; set_sbi_flag(sbi, SBI_IS_CLOSE); f2fs_stop_gc_thread(sbi); @@ -4870,6 +5444,7 @@ static void kill_f2fs_super(struct super_block *sb) struct cp_control cpc = { .reason = CP_UMOUNT, }; + stat_inc_cp_call_count(sbi, TOTAL_CALL); f2fs_write_checkpoint(sbi, &cpc); } @@ -4877,12 +5452,32 @@ static void kill_f2fs_super(struct super_block *sb) sb->s_flags &= ~SB_RDONLY; } kill_block_super(sb); + /* Release block devices last, after fscrypt_destroy_keyring(). */ + if (sbi) { + destroy_device_list(sbi); + kfree(sbi); + sb->s_fs_info = NULL; + } +} + +static int f2fs_init_fs_context(struct fs_context *fc) +{ + struct f2fs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct f2fs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + fc->fs_private = ctx; + fc->ops = &f2fs_context_ops; + + return 0; } static struct file_system_type f2fs_fs_type = { .owner = THIS_MODULE, .name = "f2fs", - .mount = f2fs_mount, + .init_fs_context = f2fs_init_fs_context, .kill_sb = kill_f2fs_super, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; @@ -4910,12 +5505,6 @@ static int __init init_f2fs_fs(void) { int err; - if (PAGE_SIZE != F2FS_BLKSIZE) { - printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n", - PAGE_SIZE, F2FS_BLKSIZE); - return -EINVAL; - } - err = init_inodecache(); if (err) goto fail; @@ -4940,12 +5529,9 @@ static int __init init_f2fs_fs(void) err = f2fs_init_sysfs(); if (err) goto free_garbage_collection_cache; - err = register_shrinker(&f2fs_shrinker_info, "f2fs-shrinker"); + err = f2fs_init_shrinker(); if (err) goto free_sysfs; - err = register_filesystem(&f2fs_fs_type); - if (err) - goto free_shrinker; f2fs_create_root_stats(); err = f2fs_init_post_read_processing(); if (err) @@ -4968,7 +5554,17 @@ static int __init init_f2fs_fs(void) err = f2fs_create_casefold_cache(); if (err) goto free_compress_cache; + err = f2fs_init_xattr_cache(); + if (err) + goto free_casefold_cache; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_xattr_cache; return 0; +free_xattr_cache: + f2fs_destroy_xattr_cache(); +free_casefold_cache: + f2fs_destroy_casefold_cache(); free_compress_cache: f2fs_destroy_compress_cache(); free_compress_mempool: @@ -4983,9 +5579,7 @@ free_post_read: f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); -free_shrinker: - unregister_shrinker(&f2fs_shrinker_info); + f2fs_exit_shrinker(); free_sysfs: f2fs_exit_sysfs(); free_garbage_collection_cache: @@ -5008,6 +5602,8 @@ fail: static void __exit exit_f2fs_fs(void) { + unregister_filesystem(&f2fs_fs_type); + f2fs_destroy_xattr_cache(); f2fs_destroy_casefold_cache(); f2fs_destroy_compress_cache(); f2fs_destroy_compress_mempool(); @@ -5016,8 +5612,7 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_iostat_processing(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); - unregister_shrinker(&f2fs_shrinker_info); + f2fs_exit_shrinker(); f2fs_exit_sysfs(); f2fs_destroy_garbage_collection_cache(); f2fs_destroy_extent_cache(); @@ -5034,5 +5629,3 @@ module_exit(exit_f2fs_fs) MODULE_AUTHOR("Samsung Electronics's Praesto Team"); MODULE_DESCRIPTION("Flash Friendly File System"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32"); - diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 48b7e0073884..c42f4f979d13 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -61,6 +61,12 @@ struct f2fs_attr { int id; }; +struct f2fs_base_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_base_attr *a, char *buf); + ssize_t (*store)(struct f2fs_base_attr *a, const char *buf, size_t len); +}; + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf); @@ -143,6 +149,39 @@ static ssize_t pending_discard_show(struct f2fs_attr *a, &SM_I(sbi)->dcc_info->discard_cmd_cnt)); } +static ssize_t issued_discard_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + if (!SM_I(sbi)->dcc_info) + return -EINVAL; + return sysfs_emit(buf, "%llu\n", (unsigned long long)atomic_read( + &SM_I(sbi)->dcc_info->issued_discard)); +} + +static ssize_t queued_discard_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + if (!SM_I(sbi)->dcc_info) + return -EINVAL; + return sysfs_emit(buf, "%llu\n", (unsigned long long)atomic_read( + &SM_I(sbi)->dcc_info->queued_discard)); +} + +static ssize_t undiscard_blks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + if (!SM_I(sbi)->dcc_info) + return -EINVAL; + return sysfs_emit(buf, "%u\n", + SM_I(sbi)->dcc_info->undiscard_blks); +} + +static ssize_t atgc_enabled_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%d\n", sbi->am.atgc_enabled ? 1 : 0); +} + static ssize_t gc_mode_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -155,50 +194,53 @@ static ssize_t features_show(struct f2fs_attr *a, int len = 0; if (f2fs_sb_has_encrypt(sbi)) - len += scnprintf(buf, PAGE_SIZE - len, "%s", + len += sysfs_emit_at(buf, len, "%s", "encryption"); if (f2fs_sb_has_blkzoned(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "extra_attr"); if (f2fs_sb_has_project_quota(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "projquota"); if (f2fs_sb_has_inode_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_checksum"); if (f2fs_sb_has_flexible_inline_xattr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); if (f2fs_sb_has_quota_ino(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "quota_ino"); if (f2fs_sb_has_inode_crtime(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_crtime"); if (f2fs_sb_has_lost_found(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "lost_found"); if (f2fs_sb_has_verity(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "sb_checksum"); if (f2fs_sb_has_casefold(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "casefold"); if (f2fs_sb_has_readonly(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "readonly"); if (f2fs_sb_has_compression(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "compression"); - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + if (f2fs_sb_has_packed_ssa(sbi)) + len += sysfs_emit_at(buf, len, "%s%s", + len ? ", " : "", "packed_ssa"); + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "pin_file"); - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); return len; } @@ -235,6 +277,29 @@ static ssize_t encoding_show(struct f2fs_attr *a, return sysfs_emit(buf, "(none)\n"); } +static ssize_t encoding_flags_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%x\n", + le16_to_cpu(F2FS_RAW_SUPER(sbi)->s_encoding_flags)); +} + +static ssize_t effective_lookup_mode_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + switch (F2FS_OPTION(sbi).lookup_mode) { + case LOOKUP_PERF: + return sysfs_emit(buf, "perf\n"); + case LOOKUP_COMPAT: + return sysfs_emit(buf, "compat\n"); + case LOOKUP_AUTO: + if (sb_no_casefold_compat_fallback(sbi->sb)) + return sysfs_emit(buf, "auto:perf\n"); + return sysfs_emit(buf, "auto:compat\n"); + } + return 0; +} + static ssize_t mounted_time_sec_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -296,30 +361,27 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, int hot_count = sbi->raw_super->hot_ext_count; int len = 0, i; - len += scnprintf(buf + len, PAGE_SIZE - len, - "cold file extension:\n"); + len += sysfs_emit_at(buf, len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); - len += scnprintf(buf + len, PAGE_SIZE - len, - "hot file extension:\n"); + len += sysfs_emit_at(buf, len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); + return len; } if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) { struct ckpt_req_control *cprc = &sbi->cprc_info; int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio); - int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio); + int level = IOPRIO_PRIO_LEVEL(cprc->ckpt_thread_ioprio); if (class != IOPRIO_CLASS_RT && class != IOPRIO_CLASS_BE) return -EINVAL; return sysfs_emit(buf, "%s,%d\n", - class == IOPRIO_CLASS_RT ? "rt" : "be", data); + class == IOPRIO_CLASS_RT ? "rt" : "be", level); } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -356,6 +418,16 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!strcmp(a->attr.name, "revoked_atomic_block")) return sysfs_emit(buf, "%llu\n", sbi->revoked_atomic_block); +#ifdef CONFIG_F2FS_STAT_FS + if (!strcmp(a->attr.name, "cp_foreground_calls")) + return sysfs_emit(buf, "%d\n", + atomic_read(&sbi->cp_call_count[TOTAL_CALL]) - + atomic_read(&sbi->cp_call_count[BACKGROUND])); + if (!strcmp(a->attr.name, "cp_background_calls")) + return sysfs_emit(buf, "%d\n", + atomic_read(&sbi->cp_call_count[BACKGROUND])); +#endif + ui = (unsigned int *)(ptr + a->offset); return sysfs_emit(buf, "%u\n", *ui); @@ -413,7 +485,7 @@ out: const char *name = strim((char *)buf); struct ckpt_req_control *cprc = &sbi->cprc_info; int class; - long data; + long level; int ret; if (!strncmp(name, "rt,", 3)) @@ -424,13 +496,13 @@ out: return -EINVAL; name += 3; - ret = kstrtol(name, 10, &data); + ret = kstrtol(name, 10, &level); if (ret) return ret; - if (data >= IOPRIO_NR_LEVELS || data < 0) + if (level >= IOPRIO_NR_LEVELS || level < 0) return -EINVAL; - cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data); + cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, level); if (test_opt(sbi, MERGE_CHECKPOINT)) { ret = set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio); @@ -447,17 +519,21 @@ out: if (ret < 0) return ret; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (a->struct_type == FAULT_INFO_TYPE && t >= BIT(FAULT_MAX)) - return -EINVAL; - if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX) - return -EINVAL; + if (a->struct_type == FAULT_INFO_TYPE) { + if (f2fs_build_fault_attr(sbi, 0, t, FAULT_TYPE)) + return -EINVAL; + return count; + } + if (a->struct_type == FAULT_INFO_RATE) { + if (f2fs_build_fault_attr(sbi, t, 0, FAULT_RATE)) + return -EINVAL; + return count; + } #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - F2FS_OPTION(sbi).root_reserved_blocks - - sbi->blocks_per_seg * - SM_I(sbi)->additional_reserved_segments)) { + F2FS_OPTION(sbi).root_reserved_blocks)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } @@ -506,8 +582,20 @@ out: return count; } + if (!strcmp(a->attr.name, "discard_io_aware")) { + if (t >= DPOLICY_IO_AWARE_MAX) + return -EINVAL; + *ui = t; + return count; + } + if (!strcmp(a->attr.name, "migration_granularity")) { - if (t == 0 || t > sbi->segs_per_sec) + if (t == 0 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + } + + if (!strcmp(a->attr.name, "migration_window_granularity")) { + if (t == 0 || t > SEGS_PER_SEC(sbi)) return -EINVAL; } @@ -559,6 +647,27 @@ out: return count; } + if (!strcmp(a->attr.name, "gc_no_zoned_gc_percent")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "gc_boost_zoned_gc_percent")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "gc_valid_thresh_ratio")) { + if (t > 100) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + #ifdef CONFIG_F2FS_IOSTAT if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; @@ -577,6 +686,15 @@ out: } #endif +#ifdef CONFIG_BLK_DEV_ZONED + if (!strcmp(a->attr.name, "blkzone_alloc_policy")) { + if (t < BLKZONE_ALLOC_PRIOR_SEQ || t > BLKZONE_ALLOC_PRIOR_CONV) + return -EINVAL; + sbi->blkzone_alloc_policy = t; + return count; + } +#endif + #ifdef CONFIG_F2FS_FS_COMPRESSION if (!strcmp(a->attr.name, "compr_written_block") || !strcmp(a->attr.name, "compr_saved_block")) { @@ -631,6 +749,13 @@ out: return count; } + if (!strcmp(a->attr.name, "gc_pin_file_threshold")) { + if (t > MAX_GC_FAILED_PINNED_FILES) + return -EINVAL; + sbi->gc_pin_file_threshold = t; + return count; + } + if (!strcmp(a->attr.name, "gc_reclaimed_segments")) { if (t != 0) return -EINVAL; @@ -715,15 +840,72 @@ out: return count; } + if (!strcmp(a->attr.name, "max_read_extent_count")) { + if (t > UINT_MAX) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + if (!strcmp(a->attr.name, "ipu_policy")) { if (t >= BIT(F2FS_IPU_MAX)) return -EINVAL; - if (t && f2fs_lfs_mode(sbi)) + /* allow F2FS_IPU_NOCACHE only for IPU in the pinned file */ + if (f2fs_lfs_mode(sbi) && (t & ~BIT(F2FS_IPU_NOCACHE))) return -EINVAL; SM_I(sbi)->ipu_policy = (unsigned int)t; return count; } + if (!strcmp(a->attr.name, "dir_level")) { + if (t > MAX_DIR_HASH_DEPTH) + return -EINVAL; + sbi->dir_level = t; + return count; + } + + if (!strcmp(a->attr.name, "reserved_pin_section")) { + if (t > GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi))) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "gc_boost_gc_multiple")) { + if (t < 1 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + sbi->gc_thread->boost_gc_multiple = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "gc_boost_gc_greedy")) { + if (t > GC_GREEDY) + return -EINVAL; + sbi->gc_thread->boost_gc_greedy = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "bggc_io_aware")) { + if (t < AWARE_ALL_IO || t > AWARE_NONE) + return -EINVAL; + sbi->bggc_io_aware = t; + return count; + } + + if (!strcmp(a->attr.name, "allocate_section_hint")) { + if (t < 0 || t > MAIN_SECS(sbi)) + return -EINVAL; + sbi->allocate_section_hint = t; + return count; + } + + if (!strcmp(a->attr.name, "allocate_section_policy")) { + if (t < ALLOCATE_FORWARD_NOHINT || t > ALLOCATE_FORWARD_FROM_HINT) + return -EINVAL; + sbi->allocate_section_policy = t; + return count; + } + *ui = (unsigned int)t; return count; @@ -775,6 +957,25 @@ static void f2fs_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } +static ssize_t f2fs_base_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_base_attr *a = container_of(attr, + struct f2fs_base_attr, attr); + + return a->show ? a->show(a, buf) : 0; +} + +static ssize_t f2fs_base_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_base_attr *a = container_of(attr, + struct f2fs_base_attr, attr); + + return a->store ? a->store(a, buf, len) : 0; +} + /* * Note that there are three feature list entries: * 1) /sys/fs/f2fs/features @@ -793,18 +994,50 @@ static void f2fs_sb_release(struct kobject *kobj) * please add new on-disk feature in this list only. * - ref. F2FS_SB_FEATURE_RO_ATTR() */ -static ssize_t f2fs_feature_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) +static ssize_t f2fs_feature_show(struct f2fs_base_attr *a, char *buf) { return sysfs_emit(buf, "supported\n"); } #define F2FS_FEATURE_RO_ATTR(_name) \ -static struct f2fs_attr f2fs_attr_##_name = { \ +static struct f2fs_base_attr f2fs_base_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = 0444 }, \ .show = f2fs_feature_show, \ } +static ssize_t f2fs_tune_show(struct f2fs_base_attr *a, char *buf) +{ + unsigned int res = 0; + + if (!strcmp(a->attr.name, "reclaim_caches_kb")) + res = f2fs_donate_files(); + + return sysfs_emit(buf, "%u\n", res); +} + +static ssize_t f2fs_tune_store(struct f2fs_base_attr *a, + const char *buf, size_t count) +{ + unsigned long t; + int ret; + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + + if (!strcmp(a->attr.name, "reclaim_caches_kb")) + f2fs_reclaim_caches(t); + + return count; +} + +#define F2FS_TUNE_RW_ATTR(_name) \ +static struct f2fs_base_attr f2fs_base_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0644 }, \ + .show = f2fs_tune_show, \ + .store = f2fs_tune_store, \ +} + static ssize_t f2fs_sb_feature_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -896,6 +1129,11 @@ GC_THREAD_RW_ATTR(gc_urgent_sleep_time, urgent_sleep_time); GC_THREAD_RW_ATTR(gc_min_sleep_time, min_sleep_time); GC_THREAD_RW_ATTR(gc_max_sleep_time, max_sleep_time); GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); +GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple); +GC_THREAD_RW_ATTR(gc_boost_gc_greedy, boost_gc_greedy); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -905,6 +1143,7 @@ SM_INFO_GENERAL_RW_ATTR(min_fsync_blocks); SM_INFO_GENERAL_RW_ATTR(min_seq_blocks); SM_INFO_GENERAL_RW_ATTR(min_hot_blocks); SM_INFO_GENERAL_RW_ATTR(min_ssr_sections); +SM_INFO_GENERAL_RW_ATTR(reserved_segments); /* DCC_INFO ATTR */ DCC_INFO_RW_ATTR(max_small_discards, max_discards); @@ -916,6 +1155,7 @@ DCC_INFO_GENERAL_RW_ATTR(discard_io_aware_gran); DCC_INFO_GENERAL_RW_ATTR(discard_urgent_util); DCC_INFO_GENERAL_RW_ATTR(discard_granularity); DCC_INFO_GENERAL_RW_ATTR(max_ordered_discard); +DCC_INFO_GENERAL_RW_ATTR(discard_io_aware); /* NM_INFO ATTR */ NM_INFO_RW_ATTR(max_roll_forward_node_blocks, max_rf_node_blocks); @@ -936,7 +1176,10 @@ F2FS_SBI_RW_ATTR(gc_pin_file_thresh, gc_pin_file_threshold); F2FS_SBI_RW_ATTR(gc_reclaimed_segments, gc_reclaimed_segs); F2FS_SBI_GENERAL_RW_ATTR(max_victim_search); F2FS_SBI_GENERAL_RW_ATTR(migration_granularity); +F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity); F2FS_SBI_GENERAL_RW_ATTR(dir_level); +F2FS_SBI_GENERAL_RW_ATTR(allocate_section_hint); +F2FS_SBI_GENERAL_RW_ATTR(allocate_section_policy); #ifdef CONFIG_F2FS_IOSTAT F2FS_SBI_GENERAL_RW_ATTR(iostat_enable); F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms); @@ -966,16 +1209,23 @@ F2FS_SBI_GENERAL_RW_ATTR(revoked_atomic_block); F2FS_SBI_GENERAL_RW_ATTR(hot_data_age_threshold); F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold); F2FS_SBI_GENERAL_RW_ATTR(last_age_weight); +/* read extent cache */ +F2FS_SBI_GENERAL_RW_ATTR(max_read_extent_count); #ifdef CONFIG_BLK_DEV_ZONED F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); +F2FS_SBI_GENERAL_RO_ATTR(max_open_zones); +F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif +F2FS_SBI_GENERAL_RW_ATTR(carve_out); +F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section); +F2FS_SBI_GENERAL_RW_ATTR(bggc_io_aware); /* STAT_INFO ATTR */ #ifdef CONFIG_F2FS_STAT_FS -STAT_INFO_RO_ATTR(cp_foreground_calls, cp_count); -STAT_INFO_RO_ATTR(cp_background_calls, bg_cp_count); -STAT_INFO_RO_ATTR(gc_foreground_calls, call_count); -STAT_INFO_RO_ATTR(gc_background_calls, bg_gc); +STAT_INFO_RO_ATTR(cp_foreground_calls, cp_call_count[FOREGROUND]); +STAT_INFO_RO_ATTR(cp_background_calls, cp_call_count[BACKGROUND]); +STAT_INFO_RO_ATTR(gc_foreground_calls, gc_call_count[FOREGROUND]); +STAT_INFO_RO_ATTR(gc_background_calls, gc_call_count[BACKGROUND]); #endif /* FAULT_INFO ATTR */ @@ -1004,9 +1254,12 @@ F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); +F2FS_GENERAL_RO_ATTR(encoding_flags); +F2FS_GENERAL_RO_ATTR(effective_lookup_mode); F2FS_GENERAL_RO_ATTR(mounted_time_sec); F2FS_GENERAL_RO_ATTR(main_blkaddr); F2FS_GENERAL_RO_ATTR(pending_discard); +F2FS_GENERAL_RO_ATTR(atgc_enabled); F2FS_GENERAL_RO_ATTR(gc_mode); #ifdef CONFIG_F2FS_STAT_FS F2FS_GENERAL_RO_ATTR(moved_blocks_background); @@ -1044,6 +1297,10 @@ F2FS_FEATURE_RO_ATTR(readonly); F2FS_FEATURE_RO_ATTR(compression); #endif F2FS_FEATURE_RO_ATTR(pin_file); +#ifdef CONFIG_UNICODE +F2FS_FEATURE_RO_ATTR(linear_lookup); +#endif +F2FS_FEATURE_RO_ATTR(packed_ssa); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -1051,6 +1308,11 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_min_sleep_time), ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_no_zoned_gc_percent), + ATTR_LIST(gc_boost_zoned_gc_percent), + ATTR_LIST(gc_valid_thresh_ratio), + ATTR_LIST(gc_boost_gc_multiple), + ATTR_LIST(gc_boost_gc_greedy), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), @@ -1064,6 +1326,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(discard_urgent_util), ATTR_LIST(discard_granularity), ATTR_LIST(max_ordered_discard), + ATTR_LIST(discard_io_aware), ATTR_LIST(pending_discard), ATTR_LIST(gc_mode), ATTR_LIST(ipu_policy), @@ -1072,8 +1335,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_seq_blocks), ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), + ATTR_LIST(reserved_segments), ATTR_LIST(max_victim_search), ATTR_LIST(migration_granularity), + ATTR_LIST(migration_window_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), @@ -1084,6 +1349,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(discard_idle_interval), ATTR_LIST(gc_idle_interval), ATTR_LIST(umount_discard_timeout), + ATTR_LIST(bggc_io_aware), #ifdef CONFIG_F2FS_IOSTAT ATTR_LIST(iostat_enable), ATTR_LIST(iostat_period_ms), @@ -1109,6 +1375,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(reserved_blocks), ATTR_LIST(current_reserved_blocks), ATTR_LIST(encoding), + ATTR_LIST(encoding_flags), + ATTR_LIST(effective_lookup_mode), ATTR_LIST(mounted_time_sec), #ifdef CONFIG_F2FS_STAT_FS ATTR_LIST(cp_foreground_calls), @@ -1121,6 +1389,8 @@ static struct attribute *f2fs_attrs[] = { #endif #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(unusable_blocks_per_sec), + ATTR_LIST(max_open_zones), + ATTR_LIST(blkzone_alloc_policy), #endif #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compr_written_block), @@ -1134,6 +1404,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(atgc_candidate_count), ATTR_LIST(atgc_age_weight), ATTR_LIST(atgc_age_threshold), + ATTR_LIST(atgc_enabled), ATTR_LIST(seq_file_ra_mul), ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_reclaimed_segments), @@ -1146,50 +1417,67 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(hot_data_age_threshold), ATTR_LIST(warm_data_age_threshold), ATTR_LIST(last_age_weight), + ATTR_LIST(max_read_extent_count), + ATTR_LIST(carve_out), + ATTR_LIST(reserved_pin_section), + ATTR_LIST(allocate_section_hint), + ATTR_LIST(allocate_section_policy), NULL, }; ATTRIBUTE_GROUPS(f2fs); +#define BASE_ATTR_LIST(name) (&f2fs_base_attr_##name.attr) static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_FS_ENCRYPTION - ATTR_LIST(encryption), - ATTR_LIST(test_dummy_encryption_v2), + BASE_ATTR_LIST(encryption), + BASE_ATTR_LIST(test_dummy_encryption_v2), #if IS_ENABLED(CONFIG_UNICODE) - ATTR_LIST(encrypted_casefold), + BASE_ATTR_LIST(encrypted_casefold), #endif #endif /* CONFIG_FS_ENCRYPTION */ #ifdef CONFIG_BLK_DEV_ZONED - ATTR_LIST(block_zoned), + BASE_ATTR_LIST(block_zoned), #endif - ATTR_LIST(atomic_write), - ATTR_LIST(extra_attr), - ATTR_LIST(project_quota), - ATTR_LIST(inode_checksum), - ATTR_LIST(flexible_inline_xattr), - ATTR_LIST(quota_ino), - ATTR_LIST(inode_crtime), - ATTR_LIST(lost_found), + BASE_ATTR_LIST(atomic_write), + BASE_ATTR_LIST(extra_attr), + BASE_ATTR_LIST(project_quota), + BASE_ATTR_LIST(inode_checksum), + BASE_ATTR_LIST(flexible_inline_xattr), + BASE_ATTR_LIST(quota_ino), + BASE_ATTR_LIST(inode_crtime), + BASE_ATTR_LIST(lost_found), #ifdef CONFIG_FS_VERITY - ATTR_LIST(verity), + BASE_ATTR_LIST(verity), #endif - ATTR_LIST(sb_checksum), + BASE_ATTR_LIST(sb_checksum), #if IS_ENABLED(CONFIG_UNICODE) - ATTR_LIST(casefold), + BASE_ATTR_LIST(casefold), #endif - ATTR_LIST(readonly), + BASE_ATTR_LIST(readonly), #ifdef CONFIG_F2FS_FS_COMPRESSION - ATTR_LIST(compression), + BASE_ATTR_LIST(compression), +#endif + BASE_ATTR_LIST(pin_file), +#ifdef CONFIG_UNICODE + BASE_ATTR_LIST(linear_lookup), #endif - ATTR_LIST(pin_file), + BASE_ATTR_LIST(packed_ssa), NULL, }; ATTRIBUTE_GROUPS(f2fs_feat); F2FS_GENERAL_RO_ATTR(sb_status); F2FS_GENERAL_RO_ATTR(cp_status); +F2FS_GENERAL_RO_ATTR(issued_discard); +F2FS_GENERAL_RO_ATTR(queued_discard); +F2FS_GENERAL_RO_ATTR(undiscard_blks); + static struct attribute *f2fs_stat_attrs[] = { ATTR_LIST(sb_status), ATTR_LIST(cp_status), + ATTR_LIST(issued_discard), + ATTR_LIST(queued_discard), + ATTR_LIST(undiscard_blks), NULL, }; ATTRIBUTE_GROUPS(f2fs_stat); @@ -1208,6 +1496,8 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); F2FS_SB_FEATURE_RO_ATTR(readonly, RO); +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); +F2FS_SB_FEATURE_RO_ATTR(packed_ssa, PACKED_SSA); static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_encryption), @@ -1224,10 +1514,20 @@ static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_casefold), ATTR_LIST(sb_compression), ATTR_LIST(sb_readonly), + ATTR_LIST(sb_device_alias), + ATTR_LIST(sb_packed_ssa), NULL, }; ATTRIBUTE_GROUPS(f2fs_sb_feat); +F2FS_TUNE_RW_ATTR(reclaim_caches_kb); + +static struct attribute *f2fs_tune_attrs[] = { + BASE_ATTR_LIST(reclaim_caches_kb), + NULL, +}; +ATTRIBUTE_GROUPS(f2fs_tune); + static const struct sysfs_ops f2fs_attr_ops = { .show = f2fs_attr_show, .store = f2fs_attr_store, @@ -1247,15 +1547,34 @@ static struct kset f2fs_kset = { .kobj = {.ktype = &f2fs_ktype}, }; +static const struct sysfs_ops f2fs_feat_attr_ops = { + .show = f2fs_base_attr_show, + .store = f2fs_base_attr_store, +}; + static const struct kobj_type f2fs_feat_ktype = { .default_groups = f2fs_feat_groups, - .sysfs_ops = &f2fs_attr_ops, + .sysfs_ops = &f2fs_feat_attr_ops, }; static struct kobject f2fs_feat = { .kset = &f2fs_kset, }; +static const struct sysfs_ops f2fs_tune_attr_ops = { + .show = f2fs_base_attr_show, + .store = f2fs_base_attr_store, +}; + +static const struct kobj_type f2fs_tune_ktype = { + .default_groups = f2fs_tune_groups, + .sysfs_ops = &f2fs_tune_attr_ops, +}; + +static struct kobject f2fs_tune = { + .kset = &f2fs_kset, +}; + static ssize_t f2fs_stat_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -1357,7 +1676,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, le32_to_cpu(sbi->raw_super->segment_count_main); int i, j; - seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" + seq_puts(seq, "format: segment_type|valid_blocks|bitmaps|mtime\n" "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); for (i = 0; i < total_segs; i++) { @@ -1367,6 +1686,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); + seq_printf(seq, "| %llx", se->mtime); seq_putc(seq, '\n'); } return 0; @@ -1432,6 +1752,134 @@ static int __maybe_unused discard_plist_seq_show(struct seq_file *seq, return 0; } +static int __maybe_unused disk_map_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; + + seq_printf(seq, "Address Layout : %5luB Block address (# of Segments)\n", + F2FS_BLKSIZE); + seq_printf(seq, " SB : %12s\n", "0/1024B"); + seq_printf(seq, " seg0_blkaddr : 0x%010x\n", SEG0_BLKADDR(sbi)); + seq_printf(seq, " Checkpoint : 0x%010x (%10d)\n", + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr), 2); + seq_printf(seq, " SIT : 0x%010x (%10d)\n", + SIT_I(sbi)->sit_base_addr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_sit)); + seq_printf(seq, " NAT : 0x%010x (%10d)\n", + NM_I(sbi)->nat_blkaddr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_nat)); + seq_printf(seq, " SSA : 0x%010x (%10d)\n", + SM_I(sbi)->ssa_blkaddr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_ssa)); + seq_printf(seq, " Main : 0x%010x (%10d)\n", + SM_I(sbi)->main_blkaddr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main)); + seq_printf(seq, " Block size : %12lu KB\n", F2FS_BLKSIZE >> 10); + seq_printf(seq, " Segment size : %12d MB\n", + (BLKS_PER_SEG(sbi) << (F2FS_BLKSIZE_BITS - 10)) >> 10); + seq_printf(seq, " Segs/Sections : %12d\n", + SEGS_PER_SEC(sbi)); + seq_printf(seq, " Section size : %12d MB\n", + (BLKS_PER_SEC(sbi) << (F2FS_BLKSIZE_BITS - 10)) >> 10); + seq_printf(seq, " # of Sections : %12d\n", + le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count)); + + if (!f2fs_is_multi_device(sbi)) + return 0; + + seq_puts(seq, "\nDisk Map for multi devices:\n"); + for (i = 0; i < sbi->s_ndevs; i++) + seq_printf(seq, "Disk:%2d (zoned=%d): 0x%010x - 0x%010x on %s\n", + i, bdev_is_zoned(FDEV(i).bdev), + FDEV(i).start_blk, FDEV(i).end_blk, + FDEV(i).path); + return 0; +} + +static int __maybe_unused donation_list_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + struct f2fs_inode_info *fi; + struct dentry *dentry; + char *buf, *path; + int i; + + buf = f2fs_getname(sbi); + if (!buf) + return 0; + + seq_printf(seq, "Donation List\n"); + seq_printf(seq, " # of files : %u\n", sbi->donate_files); + seq_printf(seq, " %-50s %10s %20s %20s %22s\n", + "File path", "Status", "Donation offset (kb)", + "Donation size (kb)", "File cached size (kb)"); + seq_printf(seq, "---\n"); + + for (i = 0; i < sbi->donate_files; i++) { + spin_lock(&sbi->inode_lock[DONATE_INODE]); + if (list_empty(&sbi->inode_list[DONATE_INODE])) { + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + break; + } + fi = list_first_entry(&sbi->inode_list[DONATE_INODE], + struct f2fs_inode_info, gdonate_list); + list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + + if (!inode) + continue; + + inode_lock_shared(inode); + + dentry = d_find_alias(inode); + if (!dentry) { + path = NULL; + } else { + path = dentry_path_raw(dentry, buf, PATH_MAX); + if (IS_ERR(path)) + goto next; + } + seq_printf(seq, " %-50s %10s %20llu %20llu %22llu\n", + path ? path : "<unlinked>", + is_inode_flag_set(inode, FI_DONATE_FINISHED) ? + "Evicted" : "Donated", + (loff_t)fi->donate_start << (PAGE_SHIFT - 10), + (loff_t)(fi->donate_end + 1) << (PAGE_SHIFT - 10), + (loff_t)inode->i_mapping->nrpages << (PAGE_SHIFT - 10)); +next: + dput(dentry); + inode_unlock_shared(inode); + iput(inode); + } + f2fs_putname(buf); + return 0; +} + +#ifdef CONFIG_F2FS_FAULT_INJECTION +static int __maybe_unused inject_stats_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; + int i; + + seq_puts(seq, "fault_type injected_count\n"); + + for (i = 0; i < FAULT_MAX; i++) + seq_printf(seq, "%-24s%-10u\n", f2fs_fault_name[i], + ffi->inject_count[i]); + return 0; +} +#endif + int __init f2fs_init_sysfs(void) { int ret; @@ -1447,6 +1895,11 @@ int __init f2fs_init_sysfs(void) if (ret) goto put_kobject; + ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype, + NULL, "tuning"); + if (ret) + goto put_kobject; + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); if (!f2fs_proc_root) { ret = -ENOMEM; @@ -1454,7 +1907,9 @@ int __init f2fs_init_sysfs(void) } return 0; + put_kobject: + kobject_put(&f2fs_tune); kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); return ret; @@ -1462,6 +1917,7 @@ put_kobject: void f2fs_exit_sysfs(void) { + kobject_put(&f2fs_tune); kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); remove_proc_entry("fs/f2fs", NULL); @@ -1513,6 +1969,14 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) victim_bits_seq_show, sb); proc_create_single_data("discard_plist_info", 0444, sbi->s_proc, discard_plist_seq_show, sb); + proc_create_single_data("disk_map", 0444, sbi->s_proc, + disk_map_seq_show, sb); + proc_create_single_data("donation_list", 0444, sbi->s_proc, + donation_list_seq_show, sb); +#ifdef CONFIG_F2FS_FAULT_INJECTION + proc_create_single_data("inject_stats", 0444, sbi->s_proc, + inject_stats_seq_show, sb); +#endif return 0; put_feature_list_kobj: kobject_put(&sbi->s_feature_list_kobj); diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 4fc95f353a7a..05b935b55216 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -74,23 +74,23 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; - if (pos + count > inode->i_sb->s_maxbytes) + if (pos + count > F2FS_BLK_TO_BYTES(max_file_blocks(inode))) return -EFBIG; while (count) { size_t n = min_t(size_t, count, PAGE_SIZE - offset_in_page(pos)); - struct page *page; + struct folio *folio; void *fsdata = NULL; int res; - res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata); + res = aops->write_begin(NULL, mapping, pos, n, &folio, &fsdata); if (res) return res; - memcpy_to_page(page, offset_in_page(pos), buf, n); + memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, n); - res = aops->write_end(NULL, mapping, pos, n, n, page, fsdata); + res = aops->write_end(NULL, mapping, pos, n, n, folio, fsdata); if (res < 0) return res; if (res != n) @@ -237,7 +237,8 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, pos = le64_to_cpu(dloc.pos); /* Get the descriptor */ - if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes || + if (pos + size < pos || + pos + size > F2FS_BLK_TO_BYTES(max_file_blocks(inode)) || pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) { f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); f2fs_handle_error(F2FS_I_SB(inode), @@ -258,21 +259,23 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - struct page *page; + struct folio *folio; index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); - if (!page || !PageUptodate(page)) { + folio = f2fs_filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); + if (IS_ERR(folio) || !folio_test_uptodate(folio)) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); - if (page) - put_page(page); + if (!IS_ERR(folio)) + folio_put(folio); else if (num_ra_pages > 1) page_cache_ra_unbounded(&ractl, num_ra_pages, 0); - page = read_mapping_page(inode->i_mapping, index, NULL); + folio = read_mapping_folio(inode->i_mapping, index, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); } - return page; + return folio_file_page(folio, index); } static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, @@ -284,6 +287,8 @@ static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, } const struct fsverity_operations f2fs_verityops = { + .inode_info_offs = (int)offsetof(struct f2fs_inode_info, i_verity_info) - + (int)offsetof(struct f2fs_inode_info, vfs_inode), .begin_enable_verity = f2fs_begin_enable_verity, .end_enable_verity = f2fs_end_enable_verity, .get_verity_descriptor = f2fs_get_verity_descriptor, diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 4ae93e1df421..b4e5c406632f 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -23,11 +23,12 @@ #include "xattr.h" #include "segment.h" +static struct kmem_cache *inline_xattr_slab; static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline) { - if (likely(size == sbi->inline_xattr_slab_size)) { + if (likely(size == DEFAULT_XATTR_SLAB_SIZE)) { *is_inline = true; - return f2fs_kmem_cache_alloc(sbi->inline_xattr_slab, + return f2fs_kmem_cache_alloc(inline_xattr_slab, GFP_F2FS_ZERO, false, sbi); } *is_inline = false; @@ -38,7 +39,7 @@ static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr, bool is_inline) { if (is_inline) - kmem_cache_free(sbi->inline_xattr_slab, xattr_addr); + kmem_cache_free(inline_xattr_slab, xattr_addr); else kfree(xattr_addr); } @@ -136,7 +137,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, #ifdef CONFIG_F2FS_FS_SECURITY static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *page) + void *folio) { const struct xattr *xattr; int err = 0; @@ -144,7 +145,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, for (xattr = xattr_array; xattr->name != NULL; xattr++) { err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, xattr->name, xattr->value, - xattr->value_len, (struct page *)page, 0); + xattr->value_len, folio, 0); if (err < 0) break; } @@ -152,10 +153,10 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, } int f2fs_init_security(struct inode *inode, struct inode *dir, - const struct qstr *qstr, struct page *ipage) + const struct qstr *qstr, struct folio *ifolio) { return security_inode_init_security(inode, dir, qstr, - &f2fs_initxattrs, ipage); + f2fs_initxattrs, ifolio); } #endif @@ -189,7 +190,7 @@ const struct xattr_handler f2fs_xattr_security_handler = { .set = f2fs_xattr_generic_set, }; -static const struct xattr_handler *f2fs_xattr_handler_map[] = { +static const struct xattr_handler * const f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, #ifdef CONFIG_F2FS_FS_POSIX_ACL [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, @@ -202,7 +203,7 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, }; -const struct xattr_handler *f2fs_xattr_handlers[] = { +const struct xattr_handler * const f2fs_xattr_handlers[] = { &f2fs_xattr_user_handler, &f2fs_xattr_trusted_handler, #ifdef CONFIG_F2FS_FS_SECURITY @@ -271,25 +272,25 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, return entry; } -static int read_inline_xattr(struct inode *inode, struct page *ipage, +static int read_inline_xattr(struct inode *inode, struct folio *ifolio, void *txattr_addr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int inline_size = inline_xattr_size(inode); - struct page *page = NULL; + struct folio *folio = NULL; void *inline_addr; - if (ipage) { - inline_addr = inline_xattr_addr(inode, ipage); + if (ifolio) { + inline_addr = inline_xattr_addr(inode, ifolio); } else { - page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(folio)) + return PTR_ERR(folio); - inline_addr = inline_xattr_addr(inode, page); + inline_addr = inline_xattr_addr(inode, folio); } memcpy(txattr_addr, inline_addr, inline_size); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return 0; } @@ -299,22 +300,22 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int inline_size = inline_xattr_size(inode); - struct page *xpage; + struct folio *xfolio; void *xattr_addr; /* The inode already has an extended attribute block. */ - xpage = f2fs_get_node_page(sbi, xnid); - if (IS_ERR(xpage)) - return PTR_ERR(xpage); + xfolio = f2fs_get_xnode_folio(sbi, xnid); + if (IS_ERR(xfolio)) + return PTR_ERR(xfolio); - xattr_addr = page_address(xpage); + xattr_addr = folio_address(xfolio); memcpy(txattr_addr + inline_size, xattr_addr, VALID_XATTR_BLOCK_SIZE); - f2fs_put_page(xpage, 1); + f2fs_folio_put(xfolio, true); return 0; } -static int lookup_all_xattrs(struct inode *inode, struct page *ipage, +static int lookup_all_xattrs(struct inode *inode, struct folio *ifolio, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, void **base_addr, int *base_size, @@ -338,7 +339,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, /* read from inline xattr */ if (inline_size) { - err = read_inline_xattr(inode, ipage, txattr_addr); + err = read_inline_xattr(inode, ifolio, txattr_addr); if (err) goto out; @@ -364,10 +365,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name); if (!*xe) { - f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + f2fs_err(F2FS_I_SB(inode), "lookup inode (%lu) has corrupted xattr", inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); - err = -EFSCORRUPTED; + err = -ENODATA; f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); goto out; @@ -385,7 +386,7 @@ out: return err; } -static int read_all_xattrs(struct inode *inode, struct page *ipage, +static int read_all_xattrs(struct inode *inode, struct folio *ifolio, void **base_addr) { struct f2fs_xattr_header *header; @@ -402,7 +403,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, /* read from inline xattr */ if (inline_size) { - err = read_inline_xattr(inode, ipage, txattr_addr); + err = read_inline_xattr(inode, ifolio, txattr_addr); if (err) goto fail; } @@ -429,14 +430,14 @@ fail: } static inline int write_all_xattrs(struct inode *inode, __u32 hsize, - void *txattr_addr, struct page *ipage) + void *txattr_addr, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t inline_size = inline_xattr_size(inode); - struct page *in_page = NULL; + struct folio *in_folio = NULL; void *xattr_addr; void *inline_addr = NULL; - struct page *xpage; + struct folio *xfolio; nid_t new_nid = 0; int err = 0; @@ -446,73 +447,73 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, /* write to inline xattr */ if (inline_size) { - if (ipage) { - inline_addr = inline_xattr_addr(inode, ipage); + if (ifolio) { + inline_addr = inline_xattr_addr(inode, ifolio); } else { - in_page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(in_page)) { + in_folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(in_folio)) { f2fs_alloc_nid_failed(sbi, new_nid); - return PTR_ERR(in_page); + return PTR_ERR(in_folio); } - inline_addr = inline_xattr_addr(inode, in_page); + inline_addr = inline_xattr_addr(inode, in_folio); } - f2fs_wait_on_page_writeback(ipage ? ipage : in_page, + f2fs_folio_wait_writeback(ifolio ? ifolio : in_folio, NODE, true, true); /* no need to use xattr node block */ if (hsize <= inline_size) { err = f2fs_truncate_xattr_node(inode); f2fs_alloc_nid_failed(sbi, new_nid); if (err) { - f2fs_put_page(in_page, 1); + f2fs_folio_put(in_folio, true); return err; } memcpy(inline_addr, txattr_addr, inline_size); - set_page_dirty(ipage ? ipage : in_page); + folio_mark_dirty(ifolio ? ifolio : in_folio); goto in_page_out; } } /* write to xattr node block */ if (F2FS_I(inode)->i_xattr_nid) { - xpage = f2fs_get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + xfolio = f2fs_get_xnode_folio(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xfolio)) { + err = PTR_ERR(xfolio); f2fs_alloc_nid_failed(sbi, new_nid); goto in_page_out; } f2fs_bug_on(sbi, new_nid); - f2fs_wait_on_page_writeback(xpage, NODE, true, true); + f2fs_folio_wait_writeback(xfolio, NODE, true, true); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); - xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + xfolio = f2fs_new_node_folio(&dn, XATTR_NODE_OFFSET); + if (IS_ERR(xfolio)) { + err = PTR_ERR(xfolio); f2fs_alloc_nid_failed(sbi, new_nid); goto in_page_out; } f2fs_alloc_nid_done(sbi, new_nid); } - xattr_addr = page_address(xpage); + xattr_addr = folio_address(xfolio); if (inline_size) memcpy(inline_addr, txattr_addr, inline_size); memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); if (inline_size) - set_page_dirty(ipage ? ipage : in_page); - set_page_dirty(xpage); + folio_mark_dirty(ifolio ? ifolio : in_folio); + folio_mark_dirty(xfolio); - f2fs_put_page(xpage, 1); + f2fs_folio_put(xfolio, true); in_page_out: - f2fs_put_page(in_page, 1); + f2fs_folio_put(in_folio, true); return err; } int f2fs_getxattr(struct inode *inode, int index, const char *name, - void *buffer, size_t buffer_size, struct page *ipage) + void *buffer, size_t buffer_size, struct folio *ifolio) { struct f2fs_xattr_entry *entry = NULL; int error; @@ -528,11 +529,11 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - if (!ipage) + if (!ifolio) f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); - error = lookup_all_xattrs(inode, ipage, index, len, name, + error = lookup_all_xattrs(inode, ifolio, index, len, name, &entry, &base_addr, &base_size, &is_inline); - if (!ipage) + if (!ifolio) f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -584,13 +585,12 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if ((void *)(entry) + sizeof(__u32) > last_base_addr || (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) { - f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + f2fs_err(F2FS_I_SB(inode), "list inode (%lu) has corrupted xattr", inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); - error = -EFSCORRUPTED; f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); - goto cleanup; + break; } if (!prefix) @@ -628,8 +628,9 @@ static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry, static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, - struct page *ipage, int flags) + struct folio *ifolio, int flags) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_entry *here, *last; void *base_addr, *last_base_addr; int found, newsize; @@ -650,8 +651,8 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (size > MAX_VALUE_LEN(inode)) return -E2BIG; - - error = read_all_xattrs(inode, ipage, &base_addr); +retry: + error = read_all_xattrs(inode, ifolio, &base_addr); if (error) return error; @@ -660,7 +661,17 @@ static int __f2fs_setxattr(struct inode *inode, int index, /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name); if (!here) { - f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + if (!F2FS_I(inode)->i_xattr_nid) { + error = f2fs_recover_xattr_data(inode, NULL); + f2fs_notice(F2FS_I_SB(inode), + "recover xattr in inode (%lu), error(%d)", + inode->i_ino, error); + if (!error) { + kfree(base_addr); + goto retry; + } + } + f2fs_err(F2FS_I_SB(inode), "set inode (%lu) has corrupted xattr", inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); error = -EFSCORRUPTED; @@ -748,26 +759,41 @@ static int __f2fs_setxattr(struct inode *inode, int index, memcpy(pval, value, size); last->e_value_size = cpu_to_le16(size); new_hsize += newsize; + /* + * Explicitly add the null terminator. The unused xattr space + * is supposed to always be zeroed, which would make this + * unnecessary, but don't depend on that. + */ + *(u32 *)((u8 *)last + newsize) = 0; } - error = write_all_xattrs(inode, new_hsize, base_addr, ipage); + error = write_all_xattrs(inode, new_hsize, base_addr, ifolio); if (error) goto exit; if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - f2fs_mark_inode_dirty_sync(inode, true); - if (!error && S_ISDIR(inode->i_mode)) - set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); + if (!S_ISDIR(inode->i_mode)) + goto same; + /* + * In restrict mode, fsync() always try to trigger checkpoint for all + * metadata consistency, in other mode, it triggers checkpoint when + * parent's xattr metadata was updated. + */ + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + set_sbi_flag(sbi, SBI_NEED_CP); + else + f2fs_add_ino_entry(sbi, inode->i_ino, XATTR_DIR_INO); same: if (is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; - inode_set_ctime_current(inode); clear_inode_flag(inode, FI_ACL_MODE); } + inode_set_ctime_current(inode); + f2fs_mark_inode_dirty_sync(inode, true); exit: kfree(base_addr); return error; @@ -775,7 +801,7 @@ exit: int f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, - struct page *ipage, int flags) + struct folio *ifolio, int flags) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; @@ -790,14 +816,14 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, return err; /* this case is only from f2fs_init_inode_metadata */ - if (ipage) + if (ifolio) return __f2fs_setxattr(inode, index, name, value, - size, ipage, flags); + size, ifolio, flags); f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); f2fs_down_write(&F2FS_I(inode)->i_xattr_sem); - err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); + err = __f2fs_setxattr(inode, index, name, value, size, NULL, flags); f2fs_up_write(&F2FS_I(inode)->i_xattr_sem); f2fs_unlock_op(sbi); @@ -805,25 +831,14 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, return err; } -int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) +int __init f2fs_init_xattr_cache(void) { - dev_t dev = sbi->sb->s_bdev->bd_dev; - char slab_name[32]; - - sprintf(slab_name, "f2fs_xattr_entry-%u:%u", MAJOR(dev), MINOR(dev)); - - sbi->inline_xattr_slab_size = F2FS_OPTION(sbi).inline_xattr_size * - sizeof(__le32) + XATTR_PADDING_SIZE; - - sbi->inline_xattr_slab = f2fs_kmem_cache_create(slab_name, - sbi->inline_xattr_slab_size); - if (!sbi->inline_xattr_slab) - return -ENOMEM; - - return 0; + inline_xattr_slab = f2fs_kmem_cache_create("f2fs_xattr_entry", + DEFAULT_XATTR_SLAB_SIZE); + return inline_xattr_slab ? 0 : -ENOMEM; } -void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) +void f2fs_destroy_xattr_cache(void) { - kmem_cache_destroy(sbi->inline_xattr_slab); -} + kmem_cache_destroy(inline_xattr_slab); +}
\ No newline at end of file diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index b1811c392e6f..bce3d93e4755 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -89,6 +89,8 @@ struct f2fs_xattr_entry { F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \ DEF_INLINE_RESERVED_SIZE - \ MIN_INLINE_DENTRY_SIZE / sizeof(__le32)) +#define DEFAULT_XATTR_SLAB_SIZE (DEFAULT_INLINE_XATTR_ADDRS * \ + sizeof(__le32) + XATTR_PADDING_SIZE) /* * On-disk structure of f2fs_xattr @@ -125,41 +127,41 @@ extern const struct xattr_handler f2fs_xattr_trusted_handler; extern const struct xattr_handler f2fs_xattr_advise_handler; extern const struct xattr_handler f2fs_xattr_security_handler; -extern const struct xattr_handler *f2fs_xattr_handlers[]; +extern const struct xattr_handler * const f2fs_xattr_handlers[]; -extern int f2fs_setxattr(struct inode *, int, const char *, - const void *, size_t, struct page *, int); -extern int f2fs_getxattr(struct inode *, int, const char *, void *, - size_t, struct page *); -extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); -extern int f2fs_init_xattr_caches(struct f2fs_sb_info *); -extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); +int f2fs_setxattr(struct inode *, int, const char *, const void *, + size_t, struct folio *, int); +int f2fs_getxattr(struct inode *, int, const char *, void *, + size_t, struct folio *); +ssize_t f2fs_listxattr(struct dentry *, char *, size_t); +int __init f2fs_init_xattr_cache(void); +void f2fs_destroy_xattr_cache(void); #else #define f2fs_xattr_handlers NULL #define f2fs_listxattr NULL static inline int f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, - struct page *page, int flags) + struct folio *folio, int flags) { return -EOPNOTSUPP; } static inline int f2fs_getxattr(struct inode *inode, int index, const char *name, void *buffer, - size_t buffer_size, struct page *dpage) + size_t buffer_size, struct folio *dfolio) { return -EOPNOTSUPP; } -static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } -static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } +static inline int __init f2fs_init_xattr_cache(void) { return 0; } +static inline void f2fs_destroy_xattr_cache(void) { } #endif #ifdef CONFIG_F2FS_FS_SECURITY -extern int f2fs_init_security(struct inode *, struct inode *, - const struct qstr *, struct page *); +int f2fs_init_security(struct inode *, struct inode *, + const struct qstr *, struct folio *); #else static inline int f2fs_init_security(struct inode *inode, struct inode *dir, - const struct qstr *qstr, struct page *ipage) + const struct qstr *qstr, struct folio *ifolio) { return 0; } |
