From 5f4ce6abc2dadac67bfb8a14cfb1b9ac3941810f Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 17 Jul 2017 19:16:11 +0800 Subject: f2fs: remove unused input parameter This patch remove unused input parameter in function new_node_page. Signed-off-by: Yunlei He Signed-off-by: Yong Sheng Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/node.c | 7 +++---- fs/f2fs/xattr.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 94a88b233e98..7a17b21411d7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2285,8 +2285,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page); int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino); int remove_inode_page(struct inode *inode); struct page *new_inode_page(struct inode *inode); -struct page *new_node_page(struct dnode_of_data *dn, - unsigned int ofs, struct page *ipage); +struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs); void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); struct page *get_node_page_ra(struct page *parent, int start); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d53fe620939e..adc3a70d63dc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -613,7 +613,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } dn->nid = nids[i]; - npage[i] = new_node_page(dn, noffset[i], NULL); + npage[i] = new_node_page(dn, noffset[i]); if (IS_ERR(npage[i])) { alloc_nid_failed(sbi, nids[i]); err = PTR_ERR(npage[i]); @@ -1022,11 +1022,10 @@ struct page *new_inode_page(struct inode *inode) set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); /* caller should f2fs_put_page(page, 1); */ - return new_node_page(&dn, 0, NULL); + return new_node_page(&dn, 0); } -struct page *new_node_page(struct dnode_of_data *dn, - unsigned int ofs, struct page *ipage) +struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info new_ni; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 832c5110abab..ef3a3721ea6b 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -442,7 +442,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); - xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); + xpage = new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { alloc_nid_failed(sbi, new_nid); return PTR_ERR(xpage); -- cgit From 76a9dd85d43b7b3d94e116437dc882e159887a6a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 16 Jul 2017 15:08:54 +0800 Subject: f2fs: spread struct f2fs_dentry_ptr for inline path Use f2fs_dentry_ptr structure to indicate inline dentry structure as much as possible, so we can wrap inline dentry with size-fixed fields to the one with size-changeable fields. With this change, we can handle size-changeable inline dentry more easily. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 ++++- fs/f2fs/inline.c | 47 ++++++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7a17b21411d7..70777a889e12 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -361,10 +361,11 @@ struct f2fs_flush_device { /* for directory operations */ struct f2fs_dentry_ptr { struct inode *inode; - const void *bitmap; + void *bitmap; struct f2fs_dir_entry *dentry; __u8 (*filename)[F2FS_SLOT_LEN]; int max; + int nr_bitmap; }; static inline void make_dentry_ptr_block(struct inode *inode, @@ -372,6 +373,7 @@ static inline void make_dentry_ptr_block(struct inode *inode, { d->inode = inode; d->max = NR_DENTRY_IN_BLOCK; + d->nr_bitmap = SIZE_OF_DENTRY_BITMAP; d->bitmap = &t->dentry_bitmap; d->dentry = t->dentry; d->filename = t->filename; @@ -382,6 +384,7 @@ static inline void make_dentry_ptr_inline(struct inode *inode, { d->inode = inode; d->max = NR_INLINE_DENTRY; + d->nr_bitmap = INLINE_DENTRY_BITMAP_SIZE; d->bitmap = &t->dentry_bitmap; d->dentry = t->dentry; d->filename = t->filename; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e0fd4376e6fb..2082816a504a 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -342,6 +342,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, struct page *page; struct dnode_of_data dn; struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_ptr src, dst; int err; page = f2fs_grab_cache_page(dir->i_mapping, 0, false); @@ -360,21 +361,20 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, dentry_blk = kmap_atomic(page); + make_dentry_ptr_inline(NULL, &src, inline_dentry); + make_dentry_ptr_block(NULL, &dst, dentry_blk); + /* copy data from inline dentry block to new dentry block */ - memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, - INLINE_DENTRY_BITMAP_SIZE); - memset(dentry_blk->dentry_bitmap + INLINE_DENTRY_BITMAP_SIZE, 0, - SIZE_OF_DENTRY_BITMAP - INLINE_DENTRY_BITMAP_SIZE); + memcpy(dst.bitmap, src.bitmap, src.nr_bitmap); + memset(dst.bitmap + src.nr_bitmap, 0, dst.nr_bitmap - src.nr_bitmap); /* * we do not need to zero out remainder part of dentry and filename * field, since we have used bitmap for marking the usage status of * them, besides, we can also ignore copying/zeroing reserved space * of dentry block, because them haven't been used so far. */ - memcpy(dentry_blk->dentry, inline_dentry->dentry, - sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); - memcpy(dentry_blk->filename, inline_dentry->filename, - NR_INLINE_DENTRY * F2FS_SLOT_LEN); + memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max); + memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN); kunmap_atomic(dentry_blk); if (!PageUptodate(page)) @@ -511,9 +511,10 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, return PTR_ERR(ipage); inline_dentry = inline_data_addr(ipage); - bit_pos = room_for_filename(&inline_dentry->dentry_bitmap, - slots, NR_INLINE_DENTRY); - if (bit_pos >= NR_INLINE_DENTRY) { + make_dentry_ptr_inline(NULL, &d, inline_dentry); + + bit_pos = room_for_filename(d.bitmap, slots, d.max); + if (bit_pos >= d.max) { err = f2fs_convert_inline_dir(dir, ipage, inline_dentry); if (err) return err; @@ -534,7 +535,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true); name_hash = f2fs_dentry_hash(new_name, NULL); - make_dentry_ptr_inline(NULL, &d, inline_dentry); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); @@ -558,6 +558,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, struct inode *inode) { struct f2fs_inline_dentry *inline_dentry; + struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); unsigned int bit_pos; int i; @@ -566,10 +567,11 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_wait_on_page_writeback(page, NODE, true); inline_dentry = inline_data_addr(page); - bit_pos = dentry - inline_dentry->dentry; + make_dentry_ptr_inline(NULL, &d, inline_dentry); + + bit_pos = dentry - d.dentry; for (i = 0; i < slots; i++) - __clear_bit_le(bit_pos + i, - &inline_dentry->dentry_bitmap); + __clear_bit_le(bit_pos + i, d.bitmap); set_page_dirty(page); f2fs_put_page(page, 1); @@ -587,19 +589,20 @@ bool f2fs_empty_inline_dir(struct inode *dir) struct page *ipage; unsigned int bit_pos = 2; struct f2fs_inline_dentry *inline_dentry; + struct f2fs_dentry_ptr d; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return false; inline_dentry = inline_data_addr(ipage); - bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap, - NR_INLINE_DENTRY, - bit_pos); + make_dentry_ptr_inline(NULL, &d, inline_dentry); + + bit_pos = find_next_bit_le(d.bitmap, d.max, bit_pos); f2fs_put_page(ipage, 1); - if (bit_pos < NR_INLINE_DENTRY) + if (bit_pos < d.max) return false; return true; @@ -614,7 +617,9 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct f2fs_dentry_ptr d; int err; - if (ctx->pos == NR_INLINE_DENTRY) + make_dentry_ptr_inline(inode, &d, inline_dentry); + + if (ctx->pos == d.max) return 0; ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); @@ -627,7 +632,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, err = f2fs_fill_dentries(ctx, &d, 0, fstr); if (!err) - ctx->pos = NR_INLINE_DENTRY; + ctx->pos = d.max; f2fs_put_page(ipage, 1); return err < 0 ? err : 0; -- cgit From 8790568255f3f0b29ec417089c47008a3d493490 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 18 Jul 2017 09:48:12 +0800 Subject: f2fs: alloc new nids for xattr block in recovery recovery file A: recovery file B: -get_dnode_of_data -alloc_nid -recover_xattr_data -set_node_addr(sbi, &ni, NEW_ADDR, false); --->bug_on for nid has been used by file A In recovery process, new allocated node blocks may "reuse" xattr block nids, this patch alloc new nids for xattr blocks in recovery process to avoid this problem. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index adc3a70d63dc..5396611df0c3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -19,6 +19,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "xattr.h" #include "trace.h" #include @@ -2190,7 +2191,8 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; - nid_t new_xnid = nid_of_node(page); + nid_t new_xnid; + struct dnode_of_data dn; struct node_info ni; struct page *xpage; @@ -2206,22 +2208,22 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) recover_xnid: /* 2: update xattr nid in inode */ - remove_free_nid(sbi, new_xnid); - f2fs_i_xnid_write(inode, new_xnid); - if (unlikely(inc_valid_node_count(sbi, inode, false))) - f2fs_bug_on(sbi, 1); + if (!alloc_nid(sbi, &new_xnid)) + return -ENOSPC; + + set_new_dnode(&dn, inode, NULL, NULL, new_xnid); + xpage = new_node_page(&dn, XATTR_NODE_OFFSET); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_xnid); + return PTR_ERR(xpage); + } + + alloc_nid_done(sbi, new_xnid); update_inode_page(inode); /* 3: update and set xattr node page dirty */ - xpage = grab_cache_page(NODE_MAPPING(sbi), new_xnid); - if (!xpage) - return -ENOMEM; - - memcpy(F2FS_NODE(xpage), F2FS_NODE(page), PAGE_SIZE); + memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE); - get_node_info(sbi, new_xnid, &ni); - ni.ino = inode->i_ino; - set_node_addr(sbi, &ni, NEW_ADDR, false); set_page_dirty(xpage); f2fs_put_page(xpage, 1); -- cgit From 14af20fcb1833dd776822361891963c90f7b0262 Mon Sep 17 00:00:00 2001 From: "Ernesto A. Fernández" Date: Sun, 23 Jul 2017 22:32:54 -0300 Subject: f2fs: preserve i_mode if __f2fs_set_acl() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When changing a file's acl mask, __f2fs_set_acl() will first set the group bits of i_mode to the value of the mask, and only then set the actual extended attribute representing the new acl. If the second part fails (due to lack of space, for example) and the file had no acl attribute to begin with, the system will from now on assume that the mask permission bits are actual group permission bits, potentially granting access to the wrong users. Prevent this by only changing the inode mode after the acl has been set. Signed-off-by: Ernesto A. Fernández Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index b4b8438c42ef..436b3a1464d9 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -207,15 +207,16 @@ static int __f2fs_set_acl(struct inode *inode, int type, void *value = NULL; size_t size = 0; int error; + umode_t mode = inode->i_mode; switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl && !ipage) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + error = posix_acl_update_mode(inode, &mode, &acl); if (error) return error; - set_acl_inode(inode, inode->i_mode); + set_acl_inode(inode, mode); } break; -- cgit From 640cc18982b1c2049ac3a7223444248aea7d51b4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Jul 2017 10:59:55 -0700 Subject: f2fs: give a try to do atomic write in -ENOMEM case It'd be better to retry writing atomic pages when we get -ENOMEM. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f964b68718c1..f5d139f897dc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -309,17 +309,21 @@ static int __commit_inmem_pages(struct inode *inode, inode_dec_dirty_pages(inode); remove_dirty_inode(inode); } - +retry: fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto retry; + } unlock_page(page); break; } - /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; last_idx = page->index; -- cgit From 7a10f0177e117e9935ee9e5c595fcf3c57215de5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 24 Jul 2017 19:46:29 -0700 Subject: f2fs: don't give partially written atomic data from process crash This patch resolves the below scenario. == Process 1 == == Process 2 == open(w) open(rw) begin write(new_#1) process_crash f_op->flush locks_remove_posix f_op>release read (new_#1) In order to avoid corrupted database caused by new_#1, we must do roll-back at process_crash time. In order to check that, this patch keeps task which triggers transaction begin, and does roll-back in f_op->flush before removing file locks. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 70777a889e12..859ff4265c34 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -536,6 +536,7 @@ struct f2fs_inode_info { struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ + struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2706130c261b..75b3fda99e55 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1495,6 +1495,22 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) return 0; } +static int f2fs_file_flush(struct file *file, fl_owner_t id) +{ + struct inode *inode = file_inode(file); + + /* + * If the process doing a transaction is crashed, we should do + * roll-back. Otherwise, other reader/write can see corrupted database + * until all the writers close its file. Since this should be done + * before dropping file lock, it needs to do in ->flush. + */ + if (f2fs_is_atomic_file(inode) && + F2FS_I(inode)->inmem_task == current) + drop_inmem_pages(inode); + return 0; +} + #define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) #define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) @@ -1614,6 +1630,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) } inc_stat: + F2FS_I(inode)->inmem_task = current; stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); out: @@ -2506,6 +2523,7 @@ const struct file_operations f2fs_file_operations = { .open = f2fs_file_open, .release = f2fs_release_file, .mmap = f2fs_file_mmap, + .flush = f2fs_file_flush, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, .unlocked_ioctl = f2fs_ioctl, -- cgit From dc6febb6bcec7ff1b4a4d306411013b5f648f27e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 22 Jul 2017 08:52:23 +0800 Subject: f2fs: make background threads of f2fs being aware of freezing When ->freeze_fs is called from lvm for doing snapshot, it needs to make sure there will be no more changes in filesystem's data, however, previously, background threads like GC thread wasn't aware of freezing, so in environment with active background threads, data of snapshot becomes unstable. This patch fixes this issue by adding sb_{start,end}_intwrite in below background threads: - GC thread - flush thread - discard thread Note that, don't use sb_start_intwrite() in gc_thread_func() due to: generic/241 reports below bug: ====================================================== WARNING: possible circular locking dependency detected 4.13.0-rc1+ #32 Tainted: G O ------------------------------------------------------ f2fs_gc-250:0/22186 is trying to acquire lock: (&sbi->gc_mutex){+.+...}, at: [] f2fs_sync_fs+0x7b/0x1b0 [f2fs] but task is already holding lock: (sb_internal#2){++++.-}, at: [] gc_thread_func+0x159/0x4a0 [f2fs] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (sb_internal#2){++++.-}: __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 __sb_start_write+0x11d/0x1f0 f2fs_evict_inode+0x2d6/0x4e0 [f2fs] evict+0xa8/0x170 iput+0x1fb/0x2c0 f2fs_sync_inode_meta+0x3f/0xf0 [f2fs] write_checkpoint+0x1b1/0x750 [f2fs] f2fs_sync_fs+0x85/0x1b0 [f2fs] f2fs_do_sync_file.isra.24+0x137/0xa30 [f2fs] f2fs_sync_file+0x34/0x40 [f2fs] vfs_fsync_range+0x4a/0xa0 do_fsync+0x3c/0x60 SyS_fdatasync+0x15/0x20 do_fast_syscall_32+0xa1/0x1b0 entry_SYSENTER_32+0x4c/0x7b -> #1 (&sbi->cp_mutex){+.+...}: __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 __mutex_lock+0x4f/0x830 mutex_lock_nested+0x25/0x30 write_checkpoint+0x2f/0x750 [f2fs] f2fs_sync_fs+0x85/0x1b0 [f2fs] sync_filesystem+0x67/0x80 generic_shutdown_super+0x27/0x100 kill_block_super+0x22/0x50 kill_f2fs_super+0x3a/0x40 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x69/0x80 exit_to_usermode_loop+0x57/0x92 do_fast_syscall_32+0x18c/0x1b0 entry_SYSENTER_32+0x4c/0x7b -> #0 (&sbi->gc_mutex){+.+...}: validate_chain.isra.36+0xc50/0xdb0 __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 __mutex_lock+0x4f/0x830 mutex_lock_nested+0x25/0x30 f2fs_sync_fs+0x7b/0x1b0 [f2fs] f2fs_balance_fs_bg+0xb9/0x200 [f2fs] gc_thread_func+0x302/0x4a0 [f2fs] kthread+0xe9/0x120 ret_from_fork+0x19/0x24 other info that might help us debug this: Chain exists of: &sbi->gc_mutex --> &sbi->cp_mutex --> sb_internal#2 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sb_internal#2); lock(&sbi->cp_mutex); lock(sb_internal#2); lock(&sbi->gc_mutex); *** DEADLOCK *** 1 lock held by f2fs_gc-250:0/22186: #0: (sb_internal#2){++++.-}, at: [] gc_thread_func+0x159/0x4a0 [f2fs] stack backtrace: CPU: 2 PID: 22186 Comm: f2fs_gc-250:0 Tainted: G O 4.13.0-rc1+ #32 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 Call Trace: dump_stack+0x5f/0x92 print_circular_bug+0x1b3/0x1bd validate_chain.isra.36+0xc50/0xdb0 ? __this_cpu_preempt_check+0xf/0x20 __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 ? f2fs_sync_fs+0x7b/0x1b0 [f2fs] __mutex_lock+0x4f/0x830 ? f2fs_sync_fs+0x7b/0x1b0 [f2fs] mutex_lock_nested+0x25/0x30 ? f2fs_sync_fs+0x7b/0x1b0 [f2fs] f2fs_sync_fs+0x7b/0x1b0 [f2fs] f2fs_balance_fs_bg+0xb9/0x200 [f2fs] gc_thread_func+0x302/0x4a0 [f2fs] ? preempt_schedule_common+0x2f/0x4d ? f2fs_gc+0x540/0x540 [f2fs] kthread+0xe9/0x120 ? f2fs_gc+0x540/0x540 [f2fs] ? kthread_create_on_node+0x30/0x30 ret_from_fork+0x19/0x24 The deadlock occurs in below condition: GC Thread Thread B - sb_start_intwrite - f2fs_sync_file - f2fs_sync_fs - mutex_lock(&sbi->gc_mutex) - write_checkpoint - block_operations - f2fs_sync_inode_meta - iput - sb_start_intwrite - mutex_lock(&sbi->gc_mutex) Fix this by altering sb_start_intwrite to sb_start_write_trylock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 9 +++++++-- fs/f2fs/segment.c | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index fa3d2e2df8e7..41c649c9c55c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -55,6 +55,9 @@ static int gc_thread_func(void *data) } #endif + if (!sb_start_write_trylock(sbi->sb)) + continue; + /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -69,12 +72,12 @@ static int gc_thread_func(void *data) * So, I'd like to wait some time to collect dirty segments. */ if (!mutex_trylock(&sbi->gc_mutex)) - continue; + goto next; if (!is_idle(sbi)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); - continue; + goto next; } if (has_enough_invalid_blocks(sbi)) @@ -93,6 +96,8 @@ static int gc_thread_func(void *data) /* balancing f2fs's metadata periodically */ f2fs_balance_fs_bg(sbi); +next: + sb_end_write(sbi->sb); } while (!kthread_should_stop()); return 0; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f5d139f897dc..2a5672a20a62 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -485,6 +485,8 @@ repeat: if (kthread_should_stop()) return 0; + sb_start_intwrite(sbi->sb); + if (!llist_empty(&fcc->issue_list)) { struct flush_cmd *cmd, *next; int ret; @@ -503,6 +505,8 @@ repeat: fcc->dispatch_list = NULL; } + sb_end_intwrite(sbi->sb); + wait_event_interruptible(*q, kthread_should_stop() || !llist_empty(&fcc->issue_list)); goto repeat; @@ -1130,9 +1134,13 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; + sb_start_intwrite(sbi->sb); + __issue_discard_cmd(sbi, true); __wait_discard_cmd(sbi, true); + sb_end_intwrite(sbi->sb); + congestion_wait(BLK_RW_SYNC, HZ/50); } while (!kthread_should_stop()); return 0; -- cgit From e65ef20781cbfcbfe2d62ce37e028964bc34b313 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 Jul 2017 12:58:59 -0700 Subject: f2fs: add ioctl to expose current features This patch adds an ioctl to provide feature information to user. For exapmle, SQLite can use this ioctl to detect whether f2fs support atomic write or not. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 859ff4265c34..f2937986d73c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -112,6 +112,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_ENCRYPT 0x0001 #define F2FS_FEATURE_BLKZONED 0x0002 +#define F2FS_FEATURE_ATOMIC_WRITE 0x0004 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -308,6 +309,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, struct f2fs_flush_device) #define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ struct f2fs_gc_range) +#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 75b3fda99e55..0a5faf21dd4d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2401,6 +2401,16 @@ out: return ret; } +static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); + + /* Must validate to set it with SQLite behavior in Android. */ + sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; + + return put_user(sb_feature, (u32 __user *)arg); +} long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -2443,6 +2453,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_move_range(filp, arg); case F2FS_IOC_FLUSH_DEVICE: return f2fs_ioc_flush_device(filp, arg); + case F2FS_IOC_GET_FEATURES: + return f2fs_ioc_get_features(filp, arg); default: return -ENOTTY; } @@ -2508,6 +2520,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_DEFRAGMENT: case F2FS_IOC_MOVE_RANGE: case F2FS_IOC_FLUSH_DEVICE: + case F2FS_IOC_GET_FEATURES: break; default: return -ENOIOCTLCMD; -- cgit From f247037120ecd3dcbbc196b51ded8b57edf4904f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 19 Jul 2017 00:19:05 +0800 Subject: f2fs: make max inline size changeable This patch tries to make below macros calculating max inline size, inline dentry field size considerring reserving size-changeable space: - MAX_INLINE_DATA - NR_INLINE_DENTRY - INLINE_DENTRY_BITMAP_SIZE - INLINE_RESERVED_SIZE Then, when inline_{data,dentry} options is enabled, it allows us to reserve inline space with different size flexibly for adding newly introduced inode attribute. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +-- fs/f2fs/f2fs.h | 48 +++++++++++++++++++++++----- fs/f2fs/inline.c | 95 ++++++++++++++++++++++++++++---------------------------- fs/f2fs/inode.c | 4 +-- fs/f2fs/super.c | 3 ++ 5 files changed, 95 insertions(+), 59 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 87c1f4150c64..ca978c32ae00 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -813,7 +813,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO); } - if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) { + if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { err = f2fs_convert_inline_inode(inode); if (err) return err; @@ -1857,7 +1857,7 @@ restart: set_new_dnode(&dn, inode, ipage, ipage, 0); if (f2fs_has_inline_data(inode)) { - if (pos + len <= MAX_INLINE_DATA) { + if (pos + len <= MAX_INLINE_DATA(inode)) { read_inline_data(page, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f2937986d73c..b8e63739d1c1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -357,6 +357,25 @@ struct f2fs_flush_device { u32 segments; /* # of segments to flush */ }; +/* for inline stuff */ +#define DEF_INLINE_RESERVED_SIZE 1 + +static inline int get_inline_reserved_size(struct inode *inode); +#define MAX_INLINE_DATA(inode) (sizeof(__le32) * (DEF_ADDRS_PER_INODE -\ + get_inline_reserved_size(inode) -\ + F2FS_INLINE_XATTR_ADDRS)) + +/* for inline dir */ +#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + BITS_PER_BYTE + 1)) +#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \ + BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + NR_INLINE_DENTRY(inode) + \ + INLINE_DENTRY_BITMAP_SIZE(inode))) + /* * For INODE and NODE manager */ @@ -382,14 +401,19 @@ static inline void make_dentry_ptr_block(struct inode *inode, } static inline void make_dentry_ptr_inline(struct inode *inode, - struct f2fs_dentry_ptr *d, struct f2fs_inline_dentry *t) + struct f2fs_dentry_ptr *d, void *t) { + int entry_cnt = NR_INLINE_DENTRY(inode); + int bitmap_size = INLINE_DENTRY_BITMAP_SIZE(inode); + int reserved_size = INLINE_RESERVED_SIZE(inode); + d->inode = inode; - d->max = NR_INLINE_DENTRY; - d->nr_bitmap = INLINE_DENTRY_BITMAP_SIZE; - d->bitmap = &t->dentry_bitmap; - d->dentry = t->dentry; - d->filename = t->filename; + d->max = entry_cnt; + d->nr_bitmap = bitmap_size; + d->bitmap = t; + d->dentry = t + bitmap_size + reserved_size; + d->filename = t + bitmap_size + reserved_size + + SIZE_OF_DIR_ENTRY * entry_cnt; } /* @@ -543,6 +567,8 @@ struct f2fs_inode_info { struct extent_tree *extent_tree; /* cached extent_tree entry */ struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ struct rw_semaphore i_mmap_sem; + + int i_inline_reserved; /* reserved size in inline data */ }; static inline void get_extent_info(struct extent_info *ext, @@ -2075,11 +2101,12 @@ static inline bool f2fs_is_drop_cache(struct inode *inode) return is_inode_flag_set(inode, FI_DROP_CACHE); } -static inline void *inline_data_addr(struct page *page) +static inline void *inline_data_addr(struct inode *inode, struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); + int reserved_size = get_inline_reserved_size(inode); - return (void *)&(ri->i_addr[1]); + return (void *)&(ri->i_addr[reserved_size]); } static inline int f2fs_has_inline_dentry(struct inode *inode) @@ -2170,6 +2197,11 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, return kmalloc(size, flags); } +static inline int get_inline_reserved_size(struct inode *inode) +{ + return F2FS_I(inode)->i_inline_reserved; +} + #define get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 2082816a504a..f38be791fdf9 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -22,7 +22,7 @@ bool f2fs_may_inline_data(struct inode *inode) if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; - if (i_size_read(inode) > MAX_INLINE_DATA) + if (i_size_read(inode) > MAX_INLINE_DATA(inode)) return false; if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) @@ -44,6 +44,7 @@ bool f2fs_may_inline_dentry(struct inode *inode) void read_inline_data(struct page *page, struct page *ipage) { + struct inode *inode = page->mapping->host; void *src_addr, *dst_addr; if (PageUptodate(page)) @@ -51,12 +52,12 @@ void read_inline_data(struct page *page, struct page *ipage) f2fs_bug_on(F2FS_P_SB(page), page->index); - zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE); + zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE); /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); + src_addr = inline_data_addr(inode, ipage); dst_addr = kmap_atomic(page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); flush_dcache_page(page); kunmap_atomic(dst_addr); if (!PageUptodate(page)) @@ -67,13 +68,13 @@ void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from) { void *addr; - if (from >= MAX_INLINE_DATA) + if (from >= MAX_INLINE_DATA(inode)) return; - addr = inline_data_addr(ipage); + addr = inline_data_addr(inode, ipage); f2fs_wait_on_page_writeback(ipage, NODE, true); - memset(addr + from, 0, MAX_INLINE_DATA - from); + memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); set_page_dirty(ipage); if (from == 0) @@ -216,8 +217,8 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) f2fs_wait_on_page_writeback(dn.inode_page, NODE, true); src_addr = kmap_atomic(page); - dst_addr = inline_data_addr(dn.inode_page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + dst_addr = inline_data_addr(inode, dn.inode_page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); kunmap_atomic(src_addr); set_page_dirty(dn.inode_page); @@ -255,9 +256,9 @@ process_inline: f2fs_wait_on_page_writeback(ipage, NODE, true); - src_addr = inline_data_addr(npage); - dst_addr = inline_data_addr(ipage); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + src_addr = inline_data_addr(inode, npage); + dst_addr = inline_data_addr(inode, ipage); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); set_inode_flag(inode, FI_INLINE_DATA); set_inode_flag(inode, FI_DATA_EXIST); @@ -285,11 +286,11 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, struct fscrypt_name *fname, struct page **res_page) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - struct f2fs_inline_dentry *inline_dentry; struct qstr name = FSTR_TO_QSTR(&fname->disk_name); struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; struct page *ipage; + void *inline_dentry; f2fs_hash_t namehash; ipage = get_node_page(sbi, dir->i_ino); @@ -300,9 +301,9 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, namehash = f2fs_dentry_hash(&name, fname); - inline_dentry = inline_data_addr(ipage); + inline_dentry = inline_data_addr(dir, ipage); - make_dentry_ptr_inline(NULL, &d, inline_dentry); + make_dentry_ptr_inline(dir, &d, inline_dentry); de = find_target_dentry(fname, namehash, NULL, &d); unlock_page(ipage); if (de) @@ -316,19 +317,19 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, int make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { - struct f2fs_inline_dentry *inline_dentry; struct f2fs_dentry_ptr d; + void *inline_dentry; - inline_dentry = inline_data_addr(ipage); + inline_dentry = inline_data_addr(inode, ipage); - make_dentry_ptr_inline(NULL, &d, inline_dentry); + make_dentry_ptr_inline(inode, &d, inline_dentry); do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); /* update i_size to MAX_INLINE_DATA */ - if (i_size_read(inode) < MAX_INLINE_DATA) - f2fs_i_size_write(inode, MAX_INLINE_DATA); + if (i_size_read(inode) < MAX_INLINE_DATA(inode)) + f2fs_i_size_write(inode, MAX_INLINE_DATA(inode)); return 0; } @@ -337,7 +338,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, * release ipage in this function. */ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, - struct f2fs_inline_dentry *inline_dentry) + void *inline_dentry) { struct page *page; struct dnode_of_data dn; @@ -357,12 +358,12 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, goto out; f2fs_wait_on_page_writeback(page, DATA, true); - zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE); + zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE); dentry_blk = kmap_atomic(page); - make_dentry_ptr_inline(NULL, &src, inline_dentry); - make_dentry_ptr_block(NULL, &dst, dentry_blk); + make_dentry_ptr_inline(dir, &src, inline_dentry); + make_dentry_ptr_block(dir, &dst, dentry_blk); /* copy data from inline dentry block to new dentry block */ memcpy(dst.bitmap, src.bitmap, src.nr_bitmap); @@ -395,14 +396,13 @@ out: return err; } -static int f2fs_add_inline_entries(struct inode *dir, - struct f2fs_inline_dentry *inline_dentry) +static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) { struct f2fs_dentry_ptr d; unsigned long bit_pos = 0; int err = 0; - make_dentry_ptr_inline(NULL, &d, inline_dentry); + make_dentry_ptr_inline(dir, &d, inline_dentry); while (bit_pos < d.max) { struct f2fs_dir_entry *de; @@ -444,19 +444,19 @@ punch_dentry_pages: } static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, - struct f2fs_inline_dentry *inline_dentry) + void *inline_dentry) { - struct f2fs_inline_dentry *backup_dentry; + void *backup_dentry; int err; backup_dentry = f2fs_kmalloc(F2FS_I_SB(dir), - sizeof(struct f2fs_inline_dentry), GFP_F2FS_ZERO); + MAX_INLINE_DATA(dir), GFP_F2FS_ZERO); if (!backup_dentry) { f2fs_put_page(ipage, 1); return -ENOMEM; } - memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA); + memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir)); truncate_inline_inode(dir, ipage, 0); unlock_page(ipage); @@ -473,9 +473,9 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, return 0; recover: lock_page(ipage); - memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); + memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); - f2fs_i_size_write(dir, MAX_INLINE_DATA); + f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); set_page_dirty(ipage); f2fs_put_page(ipage, 1); @@ -484,7 +484,7 @@ recover: } static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, - struct f2fs_inline_dentry *inline_dentry) + void *inline_dentry) { if (!F2FS_I(dir)->i_dir_level) return f2fs_move_inline_dirents(dir, ipage, inline_dentry); @@ -500,7 +500,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct page *ipage; unsigned int bit_pos; f2fs_hash_t name_hash; - struct f2fs_inline_dentry *inline_dentry = NULL; + void *inline_dentry = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(new_name->len); struct page *page = NULL; @@ -510,8 +510,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, if (IS_ERR(ipage)) return PTR_ERR(ipage); - inline_dentry = inline_data_addr(ipage); - make_dentry_ptr_inline(NULL, &d, inline_dentry); + inline_dentry = inline_data_addr(dir, ipage); + make_dentry_ptr_inline(dir, &d, inline_dentry); bit_pos = room_for_filename(d.bitmap, slots, d.max); if (bit_pos >= d.max) { @@ -557,8 +557,8 @@ out: void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, struct inode *inode) { - struct f2fs_inline_dentry *inline_dentry; struct f2fs_dentry_ptr d; + void *inline_dentry; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); unsigned int bit_pos; int i; @@ -566,8 +566,8 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, lock_page(page); f2fs_wait_on_page_writeback(page, NODE, true); - inline_dentry = inline_data_addr(page); - make_dentry_ptr_inline(NULL, &d, inline_dentry); + inline_dentry = inline_data_addr(dir, page); + make_dentry_ptr_inline(dir, &d, inline_dentry); bit_pos = dentry - d.dentry; for (i = 0; i < slots; i++) @@ -588,15 +588,15 @@ bool f2fs_empty_inline_dir(struct inode *dir) struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; unsigned int bit_pos = 2; - struct f2fs_inline_dentry *inline_dentry; + void *inline_dentry; struct f2fs_dentry_ptr d; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return false; - inline_dentry = inline_data_addr(ipage); - make_dentry_ptr_inline(NULL, &d, inline_dentry); + inline_dentry = inline_data_addr(dir, ipage); + make_dentry_ptr_inline(dir, &d, inline_dentry); bit_pos = find_next_bit_le(d.bitmap, d.max, bit_pos); @@ -612,9 +612,9 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct fscrypt_str *fstr) { struct inode *inode = file_inode(file); - struct f2fs_inline_dentry *inline_dentry = NULL; struct page *ipage = NULL; struct f2fs_dentry_ptr d; + void *inline_dentry = NULL; int err; make_dentry_ptr_inline(inode, &d, inline_dentry); @@ -626,7 +626,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (IS_ERR(ipage)) return PTR_ERR(ipage); - inline_dentry = inline_data_addr(ipage); + inline_dentry = inline_data_addr(inode, ipage); make_dentry_ptr_inline(inode, &d, inline_dentry); @@ -657,7 +657,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, goto out; } - ilen = min_t(size_t, MAX_INLINE_DATA, i_size_read(inode)); + ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode)); if (start >= ilen) goto out; if (start + len < ilen) @@ -666,7 +666,8 @@ int f2fs_inline_data_fiemap(struct inode *inode, get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; - byteaddr += (char *)inline_data_addr(ipage) - (char *)F2FS_INODE(ipage); + byteaddr += (char *)inline_data_addr(inode, ipage) - + (char *)F2FS_INODE(ipage); err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); out: f2fs_put_page(ipage, 1); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 6cd312a17c69..32ec6b23fe01 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -87,9 +87,9 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) static void __recover_inline_status(struct inode *inode, struct page *ipage) { - void *inline_data = inline_data_addr(ipage); + void *inline_data = inline_data_addr(inode, ipage); __le32 *start = inline_data; - __le32 *end = start + MAX_INLINE_DATA / sizeof(__le32); + __le32 *end = start + MAX_INLINE_DATA(inode) / sizeof(__le32); while (start < end) { if (*start++) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 32e4c025e97e..2d236384f938 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -446,6 +446,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) #endif /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; + + fi->i_inline_reserved = DEF_INLINE_RESERVED_SIZE; + return &fi->vfs_inode; } -- cgit From 7a2af766af15887754f7f7a0869b4603b390876a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 19 Jul 2017 00:19:06 +0800 Subject: f2fs: enhance on-disk inode structure scalability This patch add new flag F2FS_EXTRA_ATTR storing in inode.i_inline to indicate that on-disk structure of current inode is extended. In order to extend, we changed the inode structure a bit: Original one: struct f2fs_inode { ... struct f2fs_extent i_ext; __le32 i_addr[DEF_ADDRS_PER_INODE]; __le32 i_nid[DEF_NIDS_PER_INODE]; } Extended one: struct f2fs_inode { ... struct f2fs_extent i_ext; union { struct { __le16 i_extra_isize; __le16 i_padding; __le32 i_extra_end[0]; }; __le32 i_addr[DEF_ADDRS_PER_INODE]; }; __le32 i_nid[DEF_NIDS_PER_INODE]; } Once F2FS_EXTRA_ATTR is set, we will steal four bytes in the head of i_addr field for storing i_extra_isize and i_padding. with i_extra_isize, we can calculate actual size of reserved space in i_addr, available attribute fields included in total extra attribute fields for current inode can be described as below: +--------------------+ | .i_mode | | ... | | .i_ext | +--------------------+ | .i_extra_isize |-----+ | .i_padding | | | .i_prjid | | | .i_atime_extra | | | .i_ctime_extra | | | .i_mtime_extra |<----+ | .i_inode_cs |<----- store blkaddr/inline from here | .i_xattr_cs | | ... | +--------------------+ | | | block address | | | +--------------------+ | .i_nid | +--------------------+ | node_footer | | (nid, ino, offset) | +--------------------+ Hence, with this patch, we would enhance scalability of f2fs inode for storing more newly added attribute. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 15 ++++++++---- fs/f2fs/f2fs.h | 72 +++++++++++++++++++++++++++++++++++++++++------------- fs/f2fs/file.c | 23 +++++++++++------ fs/f2fs/gc.c | 2 +- fs/f2fs/inode.c | 32 +++++++++++++++--------- fs/f2fs/namei.c | 5 ++++ fs/f2fs/node.c | 7 ++++-- fs/f2fs/recovery.c | 7 +++--- fs/f2fs/super.c | 11 ++++++--- 9 files changed, 124 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ca978c32ae00..aefc2a5745d3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -460,10 +460,14 @@ static void __set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn = F2FS_NODE(dn->node_page); __le32 *addr_array; + int base = 0; + + if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) + base = get_extra_isize(dn->inode); /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); - addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); + addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); } /* @@ -507,8 +511,8 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) f2fs_wait_on_page_writeback(dn->node_page, NODE, true); for (; count > 0; dn->ofs_in_node++) { - block_t blkaddr = - datablock_addr(dn->node_page, dn->ofs_in_node); + block_t blkaddr = datablock_addr(dn->inode, + dn->node_page, dn->ofs_in_node); if (blkaddr == NULL_ADDR) { dn->data_blkaddr = NEW_ADDR; __set_data_blkaddr(dn); @@ -755,7 +759,8 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = datablock_addr(dn->inode, + dn->node_page, dn->ofs_in_node); if (dn->data_blkaddr == NEW_ADDR) goto alloc; @@ -902,7 +907,7 @@ next_dnode: end_offset = ADDRS_PER_PAGE(dn.node_page, inode); next_block: - blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { if (create) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b8e63739d1c1..d0682fd4c690 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -110,9 +110,10 @@ struct f2fs_mount_info { unsigned int opt; }; -#define F2FS_FEATURE_ENCRYPT 0x0001 -#define F2FS_FEATURE_BLKZONED 0x0002 -#define F2FS_FEATURE_ATOMIC_WRITE 0x0004 +#define F2FS_FEATURE_ENCRYPT 0x0001 +#define F2FS_FEATURE_BLKZONED 0x0002 +#define F2FS_FEATURE_ATOMIC_WRITE 0x0004 +#define F2FS_FEATURE_EXTRA_ATTR 0x0008 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -359,10 +360,10 @@ struct f2fs_flush_device { /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 - -static inline int get_inline_reserved_size(struct inode *inode); -#define MAX_INLINE_DATA(inode) (sizeof(__le32) * (DEF_ADDRS_PER_INODE -\ - get_inline_reserved_size(inode) -\ +static inline int get_extra_isize(struct inode *inode); +#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ + (CUR_ADDRS_PER_INODE(inode) - \ + DEF_INLINE_RESERVED_SIZE - \ F2FS_INLINE_XATTR_ADDRS)) /* for inline dir */ @@ -568,7 +569,7 @@ struct f2fs_inode_info { struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ struct rw_semaphore i_mmap_sem; - int i_inline_reserved; /* reserved size in inline data */ + int i_extra_isize; /* size of extra space located in i_addr */ }; static inline void get_extent_info(struct extent_info *ext, @@ -1792,20 +1793,38 @@ static inline bool IS_INODE(struct page *page) return RAW_IS_INODE(p); } +static inline int offset_in_addr(struct f2fs_inode *i) +{ + return (i->i_inline & F2FS_EXTRA_ATTR) ? + (le16_to_cpu(i->i_extra_isize) / sizeof(__le32)) : 0; +} + static inline __le32 *blkaddr_in_node(struct f2fs_node *node) { return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; } -static inline block_t datablock_addr(struct page *node_page, - unsigned int offset) +static inline int f2fs_has_extra_attr(struct inode *inode); +static inline block_t datablock_addr(struct inode *inode, + struct page *node_page, unsigned int offset) { struct f2fs_node *raw_node; __le32 *addr_array; + int base = 0; + bool is_inode = IS_INODE(node_page); raw_node = F2FS_NODE(node_page); + + /* from GC path only */ + if (!inode) { + if (is_inode) + base = offset_in_addr(&raw_node->i); + } else if (f2fs_has_extra_attr(inode) && is_inode) { + base = get_extra_isize(inode); + } + addr_array = blkaddr_in_node(raw_node); - return le32_to_cpu(addr_array[offset]); + return le32_to_cpu(addr_array[base + offset]); } static inline int f2fs_test_bit(unsigned int nr, char *addr) @@ -1896,6 +1915,7 @@ enum { FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ FI_HOT_DATA, /* indicate file is hot */ + FI_EXTRA_ATTR, /* indicate file has extra attribute */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2015,6 +2035,8 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_DATA_EXIST, &fi->flags); if (ri->i_inline & F2FS_INLINE_DOTS) set_bit(FI_INLINE_DOTS, &fi->flags); + if (ri->i_inline & F2FS_EXTRA_ATTR) + set_bit(FI_EXTRA_ATTR, &fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) @@ -2031,6 +2053,13 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_DATA_EXIST; if (is_inode_flag_set(inode, FI_INLINE_DOTS)) ri->i_inline |= F2FS_INLINE_DOTS; + if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) + ri->i_inline |= F2FS_EXTRA_ATTR; +} + +static inline int f2fs_has_extra_attr(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_EXTRA_ATTR); } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -2041,8 +2070,8 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) static inline unsigned int addrs_per_inode(struct inode *inode) { if (f2fs_has_inline_xattr(inode)) - return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; - return DEF_ADDRS_PER_INODE; + return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS; + return CUR_ADDRS_PER_INODE(inode); } static inline void *inline_xattr_addr(struct page *page) @@ -2104,9 +2133,9 @@ static inline bool f2fs_is_drop_cache(struct inode *inode) static inline void *inline_data_addr(struct inode *inode, struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); - int reserved_size = get_inline_reserved_size(inode); + int extra_size = get_extra_isize(inode); - return (void *)&(ri->i_addr[reserved_size]); + return (void *)&(ri->i_addr[extra_size + DEF_INLINE_RESERVED_SIZE]); } static inline int f2fs_has_inline_dentry(struct inode *inode) @@ -2197,15 +2226,19 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, return kmalloc(size, flags); } -static inline int get_inline_reserved_size(struct inode *inode) +static inline int get_extra_isize(struct inode *inode) { - return F2FS_I(inode)->i_inline_reserved; + return F2FS_I(inode)->i_extra_isize / sizeof(__le32); } #define get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) +#define F2FS_TOTAL_EXTRA_ATTR_SIZE \ + (offsetof(struct f2fs_inode, i_extra_end) - \ + offsetof(struct f2fs_inode, i_extra_isize)) \ + /* * file.c */ @@ -2798,6 +2831,11 @@ static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED); } +static inline int f2fs_sb_has_extra_attr(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_EXTRA_ATTR); +} + #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkaddr) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0a5faf21dd4d..d2ca4b71cb53 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -382,7 +382,8 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) dn.ofs_in_node++, pgofs++, data_ofs = (loff_t)pgofs << PAGE_SHIFT) { block_t blkaddr; - blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node); if (__found_offset(blkaddr, dirty, pgofs, whence)) { f2fs_put_dnode(&dn); @@ -467,9 +468,13 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) struct f2fs_node *raw_node; int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; + int base = 0; + + if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) + base = get_extra_isize(dn->inode); raw_node = F2FS_NODE(dn->node_page); - addr = blkaddr_in_node(raw_node) + ofs; + addr = blkaddr_in_node(raw_node) + base + ofs; for (; count > 0; count--, addr++, dn->ofs_in_node++) { block_t blkaddr = le32_to_cpu(*addr); @@ -927,7 +932,8 @@ next_dnode: done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { - *blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + *blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node); if (!is_checkpointed_data(sbi, *blkaddr)) { if (test_opt(sbi, LFS)) { @@ -1003,8 +1009,8 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); do { - dn.data_blkaddr = datablock_addr(dn.node_page, - dn.ofs_in_node); + dn.data_blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node); truncate_data_blocks_range(&dn, 1); if (do_replace[i]) { @@ -1173,7 +1179,8 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, int ret; for (; index < end; index++, dn->ofs_in_node++) { - if (datablock_addr(dn->node_page, dn->ofs_in_node) == NULL_ADDR) + if (datablock_addr(dn->inode, dn->node_page, + dn->ofs_in_node) == NULL_ADDR) count++; } @@ -1184,8 +1191,8 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, dn->ofs_in_node = ofs_in_node; for (index = start; index < end; index++, dn->ofs_in_node++) { - dn->data_blkaddr = - datablock_addr(dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = datablock_addr(dn->inode, + dn->node_page, dn->ofs_in_node); /* * reserve_new_blocks will not guarantee entire block * allocation. diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 41c649c9c55c..f57cadae1a30 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -587,7 +587,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } *nofs = ofs_of_node(node_page); - source_blkaddr = datablock_addr(node_page, ofs_in_node); + source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); if (source_blkaddr != blkaddr) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 32ec6b23fe01..0a6699a23dfb 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -49,20 +49,22 @@ void f2fs_set_inode_flags(struct inode *inode) static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { + int extra_size = get_extra_isize(inode); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - if (ri->i_addr[0]) - inode->i_rdev = - old_decode_dev(le32_to_cpu(ri->i_addr[0])); + if (ri->i_addr[extra_size]) + inode->i_rdev = old_decode_dev( + le32_to_cpu(ri->i_addr[extra_size])); else - inode->i_rdev = - new_decode_dev(le32_to_cpu(ri->i_addr[1])); + inode->i_rdev = new_decode_dev( + le32_to_cpu(ri->i_addr[extra_size + 1])); } } static bool __written_first_block(struct f2fs_inode *ri) { - block_t addr = le32_to_cpu(ri->i_addr[0]); + block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); if (addr != NEW_ADDR && addr != NULL_ADDR) return true; @@ -71,16 +73,18 @@ static bool __written_first_block(struct f2fs_inode *ri) static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { + int extra_size = get_extra_isize(inode); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[0] = + ri->i_addr[extra_size] = cpu_to_le32(old_encode_dev(inode->i_rdev)); - ri->i_addr[1] = 0; + ri->i_addr[extra_size + 1] = 0; } else { - ri->i_addr[0] = 0; - ri->i_addr[1] = + ri->i_addr[extra_size] = 0; + ri->i_addr[extra_size + 1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); - ri->i_addr[2] = 0; + ri->i_addr[extra_size + 2] = 0; } } } @@ -153,6 +157,9 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); + fi->i_extra_isize = f2fs_has_extra_attr(inode) ? + le16_to_cpu(ri->i_extra_isize) : 0; + /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); @@ -292,6 +299,9 @@ int update_inode(struct inode *inode, struct page *node_page) ri->i_generation = cpu_to_le32(inode->i_generation); ri->i_dir_level = F2FS_I(inode)->i_dir_level; + if (f2fs_has_extra_attr(inode)) + ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 760d85223c81..26b4d2b54812 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -72,6 +72,11 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_inode_flag(inode, FI_NEW_INODE); + if (f2fs_sb_has_extra_attr(sbi->sb)) { + set_inode_flag(inode, FI_EXTRA_ATTR); + F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; + } + if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5396611df0c3..547b84fc10f6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -655,7 +655,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) dn->nid = nids[level]; dn->ofs_in_node = offset[level]; dn->node_page = npage[level]; - dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = datablock_addr(dn->inode, + dn->node_page, dn->ofs_in_node); return 0; release_pages: @@ -2263,7 +2264,9 @@ retry: dst->i_blocks = cpu_to_le64(1); dst->i_links = cpu_to_le32(1); dst->i_xattr_nid = 0; - dst->i_inline = src->i_inline & F2FS_INLINE_XATTR; + dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); + if (dst->i_inline & F2FS_EXTRA_ATTR) + dst->i_extra_isize = src->i_extra_isize; new_ni = old_ni; new_ni.ino = ino; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 907d6b7dde6a..2d9b8182691f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -361,7 +361,8 @@ out: return 0; truncate_out: - if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr) + if (datablock_addr(tdn.inode, tdn.node_page, + tdn.ofs_in_node) == blkaddr) truncate_data_blocks_range(&tdn, 1); if (dn->inode->i_ino == nid && !dn->inode_page_locked) unlock_page(dn->inode_page); @@ -414,8 +415,8 @@ retry_dn: for (; start < end; start++, dn.ofs_in_node++) { block_t src, dest; - src = datablock_addr(dn.node_page, dn.ofs_in_node); - dest = datablock_addr(page, dn.ofs_in_node); + src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + dest = datablock_addr(dn.inode, page, dn.ofs_in_node); /* skip recovering if dest is the same as src */ if (src == dest) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2d236384f938..b30097190605 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -447,8 +447,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; - fi->i_inline_reserved = DEF_INLINE_RESERVED_SIZE; - return &fi->vfs_inode; } @@ -1306,9 +1304,16 @@ static const struct export_operations f2fs_export_ops = { static loff_t max_file_blocks(void) { - loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); + loff_t result = 0; loff_t leaf_count = ADDRS_PER_BLOCK; + /* + * note: previously, result is equal to (DEF_ADDRS_PER_INODE - + * F2FS_INLINE_XATTR_ADDRS), but now f2fs try to reserve more + * space in inode.i_addr, it will be more safe to reassign + * result as zero. + */ + /* two direct node blocks */ result += (leaf_count * 2); -- cgit From a6d3a479ae082173c6102eba0e502cc439dacf21 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Jul 2017 17:12:06 +0800 Subject: f2fs: record quota during dot{,dot} recovery In ->lookup(), we will have a try to recover dot or dotdot for corrupted directory, once disk quota is on, if it allocates new block during dotdot recovery, we need to record disk quota info for the allocation, so this patch fixes this issue by adding missing dquot_initialize() in __recover_dot_dentries. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 26b4d2b54812..98e13f123d87 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -266,6 +266,10 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) return 0; } + err = dquot_initialize(dir); + if (err) + return err; + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); -- cgit From 5c57132eaf5265937e46340bfbfb97ffb078c423 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 Jul 2017 00:01:41 +0800 Subject: f2fs: support project quota This patch adds to support plain project quota. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 29 +++++++++++++++++++++++++++++ fs/f2fs/file.c | 13 ------------- fs/f2fs/inode.c | 24 +++++++++++++++++++++++- fs/f2fs/namei.c | 31 +++++++++++++++++++++++++++++++ fs/f2fs/node.c | 7 ++++++- fs/f2fs/super.c | 17 ++++++++++++++++- 6 files changed, 105 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d0682fd4c690..d486539086b6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -91,6 +91,7 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_LFS 0x00040000 #define F2FS_MOUNT_USRQUOTA 0x00080000 #define F2FS_MOUNT_GRPQUOTA 0x00100000 +#define F2FS_MOUNT_PRJQUOTA 0x00200000 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -114,6 +115,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_BLKZONED 0x0002 #define F2FS_FEATURE_ATOMIC_WRITE 0x0004 #define F2FS_FEATURE_EXTRA_ATTR 0x0008 +#define F2FS_FEATURE_PRJQUOTA 0x0010 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -570,6 +572,7 @@ struct f2fs_inode_info { struct rw_semaphore i_mmap_sem; int i_extra_isize; /* size of extra space located in i_addr */ + kprojid_t i_projid; /* id for project quota */ }; static inline void get_extent_info(struct extent_info *ext, @@ -1887,6 +1890,20 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) *addr ^= mask; } +#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) +#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) +#define F2FS_FL_INHERITED (FS_PROJINHERIT_FL) + +static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & F2FS_REG_FLMASK; + else + return flags & F2FS_OTHER_FLMASK; +} + /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -1916,6 +1933,7 @@ enum { FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ FI_HOT_DATA, /* indicate file is hot */ FI_EXTRA_ATTR, /* indicate file has extra attribute */ + FI_PROJ_INHERIT, /* indicate file inherits projectid */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2239,6 +2257,12 @@ static inline int get_extra_isize(struct inode *inode) (offsetof(struct f2fs_inode, i_extra_end) - \ offsetof(struct f2fs_inode, i_extra_isize)) \ +#define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr)) +#define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \ + ((offsetof(typeof(*f2fs_inode), field) + \ + sizeof((f2fs_inode)->field)) \ + <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \ + /* * file.c */ @@ -2836,6 +2860,11 @@ static inline int f2fs_sb_has_extra_attr(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_EXTRA_ATTR); } +static inline int f2fs_sb_has_project_quota(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA); +} + #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkaddr) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d2ca4b71cb53..a0413c951458 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1518,19 +1518,6 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) return 0; } -#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) -#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) - -static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) -{ - if (S_ISDIR(mode)) - return flags; - else if (S_ISREG(mode)) - return flags & F2FS_REG_FLMASK; - else - return flags & F2FS_OTHER_FLMASK; -} - static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0a6699a23dfb..f15e663a1a15 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -114,6 +114,7 @@ static int do_read_inode(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; + projid_t i_projid; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -173,6 +174,16 @@ static int do_read_inode(struct inode *inode) if (!need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; + if (fi->i_flags & FS_PROJINHERIT_FL) + set_inode_flag(inode, FI_PROJ_INHERIT); + + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) + i_projid = (projid_t)le32_to_cpu(ri->i_projid); + else + i_projid = F2FS_DEF_PROJID; + fi->i_projid = make_kprojid(&init_user_ns, i_projid); + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -299,9 +310,20 @@ int update_inode(struct inode *inode, struct page *node_page) ri->i_generation = cpu_to_le32(inode->i_generation); ri->i_dir_level = F2FS_I(inode)->i_dir_level; - if (f2fs_has_extra_attr(inode)) + if (f2fs_has_extra_attr(inode)) { ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + i_projid)) { + projid_t i_projid; + + i_projid = from_kprojid(&init_user_ns, + F2FS_I(inode)->i_projid); + ri->i_projid = cpu_to_le32(i_projid); + } + } + __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 98e13f123d87..e241c1b2c636 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -58,6 +58,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto fail; } + if (f2fs_sb_has_project_quota(sbi->sb) && + (F2FS_I(dir)->i_flags & FS_PROJINHERIT_FL)) + F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; + else + F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, + F2FS_DEF_PROJID); + err = dquot_initialize(inode); if (err) goto fail_drop; @@ -90,6 +97,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); + F2FS_I(inode)->i_flags = + f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); + + if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL) + set_inode_flag(inode, FI_PROJ_INHERIT); + trace_f2fs_new_inode(inode, 0); return inode; @@ -209,6 +222,11 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, !fscrypt_has_permitted_context(dir, inode)) return -EPERM; + if (is_inode_flag_set(dir, FI_PROJ_INHERIT) && + (!projid_eq(F2FS_I(dir)->i_projid, + F2FS_I(old_dentry->d_inode)->i_projid))) + return -EXDEV; + err = dquot_initialize(dir); if (err) return err; @@ -733,6 +751,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } + if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + (!projid_eq(F2FS_I(new_dir)->i_projid, + F2FS_I(old_dentry->d_inode)->i_projid))) + return -EXDEV; + err = dquot_initialize(old_dir); if (err) goto out; @@ -921,6 +944,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, !fscrypt_has_permitted_context(old_dir, new_inode))) return -EPERM; + if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + !projid_eq(F2FS_I(new_dir)->i_projid, + F2FS_I(old_dentry->d_inode)->i_projid)) || + (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + !projid_eq(F2FS_I(old_dir)->i_projid, + F2FS_I(new_dentry->d_inode)->i_projid))) + return -EXDEV; + err = dquot_initialize(old_dir); if (err) goto out; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 547b84fc10f6..11e2d49203b3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2265,8 +2265,13 @@ retry: dst->i_links = cpu_to_le32(1); dst->i_xattr_nid = 0; dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); - if (dst->i_inline & F2FS_EXTRA_ATTR) + if (dst->i_inline & F2FS_EXTRA_ATTR) { dst->i_extra_isize = src->i_extra_isize; + if (f2fs_sb_has_project_quota(sbi->sb) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_projid)) + dst->i_projid = src->i_projid; + } new_ni = old_ni; new_ni.ino = ino; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b30097190605..1241739f4d54 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -109,6 +109,7 @@ enum { Opt_nolazytime, Opt_usrquota, Opt_grpquota, + Opt_prjquota, Opt_err, }; @@ -146,6 +147,7 @@ static match_table_t f2fs_tokens = { {Opt_nolazytime, "nolazytime"}, {Opt_usrquota, "usrquota"}, {Opt_grpquota, "grpquota"}, + {Opt_prjquota, "prjquota"}, {Opt_err, NULL}, }; @@ -392,9 +394,13 @@ static int parse_options(struct super_block *sb, char *options) case Opt_grpquota: set_opt(sbi, GRPQUOTA); break; + case Opt_prjquota: + set_opt(sbi, PRJQUOTA); + break; #else case Opt_usrquota: case Opt_grpquota: + case Opt_prjquota: f2fs_msg(sb, KERN_INFO, "quota operations not supported"); break; @@ -814,6 +820,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",usrquota"); if (test_opt(sbi, GRPQUOTA)) seq_puts(seq, ",grpquota"); + if (test_opt(sbi, PRJQUOTA)) + seq_puts(seq, ",prjquota"); #endif return 0; @@ -1172,6 +1180,12 @@ static void f2fs_quota_off_umount(struct super_block *sb) f2fs_quota_off(sb, type); } +int f2fs_get_projid(struct inode *inode, kprojid_t *projid) +{ + *projid = F2FS_I(inode)->i_projid; + return 0; +} + static const struct dquot_operations f2fs_quota_operations = { .get_reserved_space = f2fs_get_reserved_space, .write_dquot = dquot_commit, @@ -1181,6 +1195,7 @@ static const struct dquot_operations f2fs_quota_operations = { .write_info = dquot_commit_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, + .get_projid = f2fs_get_projid, .get_next_id = dquot_get_next_id, }; @@ -1964,7 +1979,7 @@ try_onemore: #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; sb->s_qcop = &f2fs_quotactl_ops; - sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif sb->s_op = &f2fs_sops; -- cgit From dc6b20551044a05cd4d8ad2356a6bd888570f52a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 26 Jul 2017 11:24:13 -0700 Subject: f2fs: avoid naming confusion of sysfs init This patch changes the function names of sysfs init to follow ext4. f2fs_init_sysfs <-> f2fs_register_sysfs f2fs_exit_sysfs <-> f2fs_unregister_sysfs Suggested-by: Chao Yu Reivewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 ++++---- fs/f2fs/super.c | 12 ++++++------ fs/f2fs/sysfs.c | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d486539086b6..a1bfd0b9be44 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2820,10 +2820,10 @@ void destroy_extent_cache(void); /* * sysfs.c */ -int __init f2fs_register_sysfs(void); -void f2fs_unregister_sysfs(void); -int f2fs_init_sysfs(struct f2fs_sb_info *sbi); -void f2fs_exit_sysfs(struct f2fs_sb_info *sbi); +int __init f2fs_init_sysfs(void); +void f2fs_exit_sysfs(void); +int f2fs_register_sysfs(struct f2fs_sb_info *sbi); +void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi); /* * crypto support diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1241739f4d54..f44a09375e64 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -649,7 +649,7 @@ static void f2fs_put_super(struct super_block *sb) kfree(sbi->ckpt); - f2fs_exit_sysfs(sbi); + f2fs_unregister_sysfs(sbi); sb->s_fs_info = NULL; if (sbi->s_chksum_driver) @@ -2145,7 +2145,7 @@ try_onemore: goto free_root_inode; } - err = f2fs_init_sysfs(sbi); + err = f2fs_register_sysfs(sbi); if (err) goto free_root_inode; @@ -2216,7 +2216,7 @@ skip_recovery: free_sysfs: f2fs_sync_inode_meta(sbi); - f2fs_exit_sysfs(sbi); + f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; @@ -2334,7 +2334,7 @@ static int __init init_f2fs_fs(void) err = create_extent_cache(); if (err) goto free_checkpoint_caches; - err = f2fs_register_sysfs(); + err = f2fs_init_sysfs(); if (err) goto free_extent_cache; err = register_shrinker(&f2fs_shrinker_info); @@ -2353,7 +2353,7 @@ free_filesystem: free_shrinker: unregister_shrinker(&f2fs_shrinker_info); free_sysfs: - f2fs_unregister_sysfs(); + f2fs_exit_sysfs(); free_extent_cache: destroy_extent_cache(); free_checkpoint_caches: @@ -2373,7 +2373,7 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); - f2fs_unregister_sysfs(); + f2fs_exit_sysfs(); destroy_extent_cache(); destroy_checkpoint_caches(); destroy_segment_manager_caches(); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 71191d89917d..5a78b9af92ef 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -304,7 +304,7 @@ static const struct file_operations f2fs_seq_##_name##_fops = { \ F2FS_PROC_FILE_DEF(segment_info); F2FS_PROC_FILE_DEF(segment_bits); -int __init f2fs_register_sysfs(void) +int __init f2fs_init_sysfs(void) { f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); @@ -314,13 +314,13 @@ int __init f2fs_register_sysfs(void) return 0; } -void f2fs_unregister_sysfs(void) +void f2fs_exit_sysfs(void) { kset_unregister(f2fs_kset); remove_proc_entry("fs/f2fs", NULL); } -int f2fs_init_sysfs(struct f2fs_sb_info *sbi) +int f2fs_register_sysfs(struct f2fs_sb_info *sbi) { struct super_block *sb = sbi->sb; int err; @@ -351,7 +351,7 @@ err_out: return err; } -void f2fs_exit_sysfs(struct f2fs_sb_info *sbi) +void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) { kobject_del(&sbi->s_kobj); kobject_put(&sbi->s_kobj); -- cgit From b6a245eb34cd935f48235e1160d8b7539b228e8e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 28 Jul 2017 02:29:12 -0700 Subject: f2fs: don't need to wait for node writes for atomic write We have a node chain to serialize node block writes, so if any IOs for node block writes are reordered, we'll get broken node chain. IOWs, roll-forward recovery will see all or none node blocks given fsync mark. E.g., Node chain consists of: N1 -> N2 -> N3 -> NFSYNC -> N1' -> N2' -> N'FSYNC Reordered to: 1) N1 -> N2 -> N3 -> N2' -> NFSYNC -> N'FSYNC -> power-cut 2) N1 -> N2 -> N3 -> N1' -> NFSYNC -> power-cut 3) N1 -> N2 -> NFSYNC -> N1' -> N'FSYNC -> N3 -> power-cut 4) N1 -> NFSYNC -> N1' -> N2' -> N'FSYNC -> N3 -> power-cut Roll-forward recovery can proceed to: 1) N1 -> N2 -> N3 -> NFSYNC -> X 2) N1 -> N2 -> N3 -> NFSYNC -> N1' -> X 3) N1 -> N2 -> N3 -> FSYNC -> N1' -> X 4) N1 -> X Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a0413c951458..0246d19d96c3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -274,9 +274,19 @@ sync_nodes: goto sync_nodes; } - ret = wait_on_node_pages_writeback(sbi, ino); - if (ret) - goto out; + /* + * If it's atomic_write, it's just fine to keep write ordering. So + * here we don't need to wait for node write completion, since we use + * node chain which serializes node blocks. If one of node writes are + * reordered, we can see simply broken chain, resulting in stopping + * roll-forward recovery. It means we'll recover all or none node blocks + * given fsync mark. + */ + if (!atomic) { + ret = wait_on_node_pages_writeback(sbi, ino); + if (ret) + goto out; + } /* once recovery info is written, don't need to tack this */ remove_ino_entry(sbi, ino, APPEND_INO); -- cgit From 2c1d03056991286c689be3348fb8b844bcd20e23 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 29 Jul 2017 00:32:52 +0800 Subject: f2fs: support F2FS_IOC_FS{GET,SET}XATTR This patch adds FS_IOC_FSSETXATTR/FS_IOC_FSGETXATTR ioctl interface support for f2fs. The interface is kept consistent with the one of ext4/xfs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 + fs/f2fs/file.c | 264 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 239 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a1bfd0b9be44..5aedd609cceb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -337,6 +337,9 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION #endif +#define F2FS_IOC_FSGETXATTR FS_IOC_FSGETXATTR +#define F2FS_IOC_FSSETXATTR FS_IOC_FSSETXATTR + struct f2fs_gc_range { u32 sync; u64 start; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0246d19d96c3..407518f42e45 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -662,7 +662,7 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int flags; - flags = fi->i_flags & FS_FL_USER_VISIBLE; + flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL); if (flags & FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; if (flags & FS_COMPR_FL) @@ -1532,16 +1532,47 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; + unsigned int flags = fi->i_flags & + (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL); return put_user(flags, (int __user *)arg); } +static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int oldflags; + + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) + return -EPERM; + + flags = f2fs_mask_flags(inode->i_mode, flags); + + oldflags = fi->i_flags; + + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) + if (!capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + flags = flags & (FS_FL_USER_MODIFIABLE | FS_PROJINHERIT_FL); + flags |= oldflags & ~(FS_FL_USER_MODIFIABLE | FS_PROJINHERIT_FL); + fi->i_flags = flags; + + if (fi->i_flags & FS_PROJINHERIT_FL) + set_inode_flag(inode, FI_PROJ_INHERIT); + else + clear_inode_flag(inode, FI_PROJ_INHERIT); + + inode->i_ctime = current_time(inode); + f2fs_set_inode_flags(inode); + f2fs_mark_inode_dirty_sync(inode, false); + return 0; +} + static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int flags; - unsigned int oldflags; int ret; if (!inode_owner_or_capable(inode)) @@ -1556,31 +1587,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) inode_lock(inode); - /* Is it quota file? Do not allow user to mess with it */ - if (IS_NOQUOTA(inode)) { - ret = -EPERM; - goto unlock_out; - } - - flags = f2fs_mask_flags(inode->i_mode, flags); - - oldflags = fi->i_flags; - - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - ret = -EPERM; - goto unlock_out; - } - } + ret = __f2fs_ioc_setflags(inode, flags); - flags = flags & FS_FL_USER_MODIFIABLE; - flags |= oldflags & ~FS_FL_USER_MODIFIABLE; - fi->i_flags = flags; - - inode->i_ctime = current_time(inode); - f2fs_set_inode_flags(inode); - f2fs_mark_inode_dirty_sync(inode, false); -unlock_out: inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -2416,6 +2424,200 @@ static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) return put_user(sb_feature, (u32 __user *)arg); } +#ifdef CONFIG_QUOTA +static int f2fs_ioc_setproject(struct file *filp, __u32 projid) +{ + struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct super_block *sb = sbi->sb; + struct dquot *transfer_to[MAXQUOTAS] = {}; + struct page *ipage; + kprojid_t kprojid; + int err; + + if (!f2fs_sb_has_project_quota(sb)) { + if (projid != F2FS_DEF_PROJID) + return -EOPNOTSUPP; + else + return 0; + } + + if (!f2fs_has_extra_attr(inode)) + return -EOPNOTSUPP; + + kprojid = make_kprojid(&init_user_ns, (projid_t)projid); + + if (projid_eq(kprojid, F2FS_I(inode)->i_projid)) + return 0; + + err = mnt_want_write_file(filp); + if (err) + return err; + + err = -EPERM; + inode_lock(inode); + + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) + goto out_unlock; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out_unlock; + } + + if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize, + i_projid)) { + err = -EOVERFLOW; + f2fs_put_page(ipage, 1); + goto out_unlock; + } + f2fs_put_page(ipage, 1); + + dquot_initialize(inode); + + transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); + if (!IS_ERR(transfer_to[PRJQUOTA])) { + err = __dquot_transfer(inode, transfer_to); + dqput(transfer_to[PRJQUOTA]); + if (err) + goto out_dirty; + } + + F2FS_I(inode)->i_projid = kprojid; + inode->i_ctime = current_time(inode); +out_dirty: + f2fs_mark_inode_dirty_sync(inode, true); +out_unlock: + inode_unlock(inode); + mnt_drop_write_file(filp); + return err; +} +#else +static int f2fs_ioc_setproject(struct file *filp, __u32 projid) +{ + if (projid != F2FS_DEF_PROJID) + return -EOPNOTSUPP; + return 0; +} +#endif + +/* Transfer internal flags to xflags */ +static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags) +{ + __u32 xflags = 0; + + if (iflags & FS_SYNC_FL) + xflags |= FS_XFLAG_SYNC; + if (iflags & FS_IMMUTABLE_FL) + xflags |= FS_XFLAG_IMMUTABLE; + if (iflags & FS_APPEND_FL) + xflags |= FS_XFLAG_APPEND; + if (iflags & FS_NODUMP_FL) + xflags |= FS_XFLAG_NODUMP; + if (iflags & FS_NOATIME_FL) + xflags |= FS_XFLAG_NOATIME; + if (iflags & FS_PROJINHERIT_FL) + xflags |= FS_XFLAG_PROJINHERIT; + return xflags; +} + +#define F2FS_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ + FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ + FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) + +/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ +#define F2FS_FL_XFLAG_VISIBLE (FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_PROJINHERIT_FL) + +/* Transfer xflags flags to internal */ +static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags) +{ + unsigned long iflags = 0; + + if (xflags & FS_XFLAG_SYNC) + iflags |= FS_SYNC_FL; + if (xflags & FS_XFLAG_IMMUTABLE) + iflags |= FS_IMMUTABLE_FL; + if (xflags & FS_XFLAG_APPEND) + iflags |= FS_APPEND_FL; + if (xflags & FS_XFLAG_NODUMP) + iflags |= FS_NODUMP_FL; + if (xflags & FS_XFLAG_NOATIME) + iflags |= FS_NOATIME_FL; + if (xflags & FS_XFLAG_PROJINHERIT) + iflags |= FS_PROJINHERIT_FL; + + return iflags; +} + +static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct fsxattr fa; + + memset(&fa, 0, sizeof(struct fsxattr)); + fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags & + (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL)); + + if (f2fs_sb_has_project_quota(inode->i_sb)) + fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, + fi->i_projid); + + if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa))) + return -EFAULT; + return 0; +} + +static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct fsxattr fa; + unsigned int flags; + int err; + + if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa))) + return -EFAULT; + + /* Make sure caller has proper permission */ + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (fa.fsx_xflags & ~F2FS_SUPPORTED_FS_XFLAGS) + return -EOPNOTSUPP; + + flags = f2fs_xflags_to_iflags(fa.fsx_xflags); + if (f2fs_mask_flags(inode->i_mode, flags) != flags) + return -EOPNOTSUPP; + + err = mnt_want_write_file(filp); + if (err) + return err; + + inode_lock(inode); + flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) | + (flags & F2FS_FL_XFLAG_VISIBLE); + err = __f2fs_ioc_setflags(inode, flags); + inode_unlock(inode); + mnt_drop_write_file(filp); + if (err) + return err; + + err = f2fs_ioc_setproject(filp, fa.fsx_projid); + if (err) + return err; + + return 0; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -2459,6 +2661,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_flush_device(filp, arg); case F2FS_IOC_GET_FEATURES: return f2fs_ioc_get_features(filp, arg); + case F2FS_IOC_FSGETXATTR: + return f2fs_ioc_fsgetxattr(filp, arg); + case F2FS_IOC_FSSETXATTR: + return f2fs_ioc_fssetxattr(filp, arg); default: return -ENOTTY; } @@ -2525,6 +2731,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_MOVE_RANGE: case F2FS_IOC_FLUSH_DEVICE: case F2FS_IOC_GET_FEATURES: + case F2FS_IOC_FSGETXATTR: + case F2FS_IOC_FSSETXATTR: break; default: return -ENOIOCTLCMD; -- cgit From ddc34e328d067b15b937dea7ad29034920130200 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 29 Jul 2017 00:32:53 +0800 Subject: f2fs: introduce f2fs_statfs_project This patch introduces f2fs_statfs_project, it enables to show usage status of directory tree which is limited with project quota. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f44a09375e64..d53a08ecd4d4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -705,6 +705,48 @@ static int f2fs_unfreeze(struct super_block *sb) return 0; } +#ifdef CONFIG_QUOTA +static int f2fs_statfs_project(struct super_block *sb, + kprojid_t projid, struct kstatfs *buf) +{ + struct kqid qid; + struct dquot *dquot; + u64 limit; + u64 curblock; + + qid = make_kqid_projid(projid); + dquot = dqget(sb, qid); + if (IS_ERR(dquot)) + return PTR_ERR(dquot); + spin_lock(&dq_data_lock); + + limit = (dquot->dq_dqb.dqb_bsoftlimit ? + dquot->dq_dqb.dqb_bsoftlimit : + dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; + if (limit && buf->f_blocks > limit) { + curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; + buf->f_blocks = limit; + buf->f_bfree = buf->f_bavail = + (buf->f_blocks > curblock) ? + (buf->f_blocks - curblock) : 0; + } + + limit = dquot->dq_dqb.dqb_isoftlimit ? + dquot->dq_dqb.dqb_isoftlimit : + dquot->dq_dqb.dqb_ihardlimit; + if (limit && buf->f_files > limit) { + buf->f_files = limit; + buf->f_ffree = + (buf->f_files > dquot->dq_dqb.dqb_curinodes) ? + (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0; + } + + spin_unlock(&dq_data_lock); + dqput(dquot); + return 0; +} +#endif + static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; @@ -740,6 +782,12 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); +#ifdef CONFIG_QUOTA + if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) && + sb_has_quota_limits_enabled(sb, PRJQUOTA)) { + f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf); + } +#endif return 0; } -- cgit From 401db79f61e0810e7c9fa3c7425c0aef84e5c30a Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 27 Jul 2017 20:11:00 +0800 Subject: f2fs: provide f2fs_balance_fs to __write_node_page Let node writeback also do f2fs_balance_fs to ensure there are always enough free segments. Signed-off-by: Yunlong Song Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/node.c | 16 ++++++++++------ 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5b876f6d3f6b..3c84a2520796 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1017,7 +1017,7 @@ retry_flush_nodes: if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - err = sync_node_pages(sbi, &wbc); + err = sync_node_pages(sbi, &wbc, false); if (err) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5aedd609cceb..1c31e4e7bde6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2390,7 +2390,8 @@ struct page *get_node_page_ra(struct page *parent, int start); void move_node_page(struct page *node_page, int gc_type); int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic); -int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc); +int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, + bool do_balance); void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 11e2d49203b3..14b79a396a3f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1327,7 +1327,7 @@ continue_unlock: } static int __write_node_page(struct page *page, bool atomic, bool *submitted, - struct writeback_control *wbc) + struct writeback_control *wbc, bool do_balance) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; @@ -1396,6 +1396,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (submitted) *submitted = fio.submitted; + if (do_balance) + f2fs_balance_fs(sbi, false); return 0; redirty_out: @@ -1406,7 +1408,7 @@ redirty_out: static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { - return __write_node_page(page, false, NULL, wbc); + return __write_node_page(page, false, NULL, wbc, false); } int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, @@ -1494,7 +1496,7 @@ continue_unlock: ret = __write_node_page(page, atomic && page == last_page, - &submitted, wbc); + &submitted, wbc, true); if (ret) { unlock_page(page); f2fs_put_page(last_page, 0); @@ -1531,7 +1533,8 @@ out: return ret ? -EIO: 0; } -int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) +int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, + bool do_balance) { pgoff_t index, end; struct pagevec pvec; @@ -1609,7 +1612,8 @@ continue_unlock: set_fsync_mark(page, 0); set_dentry_mark(page, 0); - ret = __write_node_page(page, false, &submitted, wbc); + ret = __write_node_page(page, false, &submitted, + wbc, do_balance); if (ret) unlock_page(page); else if (submitted) @@ -1698,7 +1702,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, NODE, wbc); wbc->sync_mode = WB_SYNC_NONE; blk_start_plug(&plug); - sync_node_pages(sbi, wbc); + sync_node_pages(sbi, wbc, true); blk_finish_plug(&plug); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return 0; -- cgit From 4f31d26b0c17f2aae6a6afeb823a87e20671ab4b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 30 Jul 2017 09:45:14 -0700 Subject: f2fs: return wrong error number on f2fs_quota_write This must return size, not error number. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d53a08ecd4d4..4c28576c5b30 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1122,7 +1122,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, } if (len == towrite) - return err; + return 0; inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); -- cgit From 704956ecf5bcdc14d14650f39f2b545b34c96265 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 31 Jul 2017 20:19:09 +0800 Subject: f2fs: support inode checksum This patch adds to support inode checksum in f2fs. Signed-off-by: Chao Yu [Jaegeuk Kim: fix verification flow] Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 32 +++++++++++++++++++++++++ fs/f2fs/inode.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/node.c | 7 ++++++ fs/f2fs/segment.c | 5 +++- fs/f2fs/super.c | 5 ++++ 5 files changed, 118 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1c31e4e7bde6..7c1244ba92a8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -116,6 +116,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_ATOMIC_WRITE 0x0004 #define F2FS_FEATURE_EXTRA_ATTR 0x0008 #define F2FS_FEATURE_PRJQUOTA 0x0010 +#define F2FS_FEATURE_INODE_CHKSUM 0x0020 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -1085,6 +1086,9 @@ struct f2fs_sb_info { /* Reference to checksum algorithm driver via cryptoapi */ struct crypto_shash *s_chksum_driver; + /* Precomputed FS UUID checksum for seeding other checksums */ + __u32 s_chksum_seed; + /* For fault injection */ #ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info fault_info; @@ -1176,6 +1180,27 @@ static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, return f2fs_crc32(sbi, buf, buf_size) == blk_crc; } +static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, + const void *address, unsigned int length) +{ + struct { + struct shash_desc shash; + char ctx[4]; + } desc; + int err; + + BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx)); + + desc.shash.tfm = sbi->s_chksum_driver; + desc.shash.flags = 0; + *(u32 *)desc.ctx = crc; + + err = crypto_shash_update(&desc.shash, address, length); + BUG_ON(err); + + return *(u32 *)desc.ctx; +} + static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) { return container_of(inode, struct f2fs_inode_info, vfs_inode); @@ -2285,6 +2310,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); * inode.c */ void f2fs_set_inode_flags(struct inode *inode); +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); @@ -2869,6 +2896,11 @@ static inline int f2fs_sb_has_project_quota(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA); } +static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); +} + #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkaddr) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f15e663a1a15..b4c401d456e7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -108,6 +108,76 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) return; } +static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_inode *ri = &F2FS_NODE(page)->i; + int extra_isize = le32_to_cpu(ri->i_extra_isize); + + if (!f2fs_sb_has_inode_chksum(sbi->sb)) + return false; + + if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + return false; + + if (!F2FS_FITS_IN_INODE(ri, extra_isize, i_inode_checksum)) + return false; + + return true; +} + +static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_node *node = F2FS_NODE(page); + struct f2fs_inode *ri = &node->i; + __le32 ino = node->footer.ino; + __le32 gen = ri->i_generation; + __u32 chksum, chksum_seed; + __u32 dummy_cs = 0; + unsigned int offset = offsetof(struct f2fs_inode, i_inode_checksum); + unsigned int cs_size = sizeof(dummy_cs); + + chksum = f2fs_chksum(sbi, sbi->s_chksum_seed, (__u8 *)&ino, + sizeof(ino)); + chksum_seed = f2fs_chksum(sbi, chksum, (__u8 *)&gen, sizeof(gen)); + + chksum = f2fs_chksum(sbi, chksum_seed, (__u8 *)ri, offset); + chksum = f2fs_chksum(sbi, chksum, (__u8 *)&dummy_cs, cs_size); + offset += cs_size; + chksum = f2fs_chksum(sbi, chksum, (__u8 *)ri + offset, + F2FS_BLKSIZE - offset); + return chksum; +} + +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_inode *ri; + __u32 provided, calculated; + + if (!f2fs_enable_inode_chksum(sbi, page)) + return true; + + ri = &F2FS_NODE(page)->i; + provided = le32_to_cpu(ri->i_inode_checksum); + calculated = f2fs_inode_chksum(sbi, page); + + if (provided != calculated) + f2fs_msg(sbi->sb, KERN_WARNING, + "checksum invalid, ino = %x, %x vs. %x", + ino_of_node(page), provided, calculated); + + return provided == calculated; +} + +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_inode *ri = &F2FS_NODE(page)->i; + + if (!f2fs_enable_inode_chksum(sbi, page)) + return; + + ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 14b79a396a3f..0176035d8ced 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1171,6 +1171,11 @@ repeat: err = -EIO; goto out_err; } + + if (!f2fs_inode_chksum_verify(sbi, page)) { + err = -EBADMSG; + goto out_err; + } page_hit: if(unlikely(nid != nid_of_node(page))) { f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " @@ -2275,6 +2280,8 @@ retry: F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; + + f2fs_inode_chksum_set(sbi, ipage); } new_ni = old_ni; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2a5672a20a62..8911f50ddef6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2214,9 +2214,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&sit_i->sentry_lock); - if (page && IS_NODESEG(type)) + if (page && IS_NODESEG(type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + f2fs_inode_chksum_set(sbi, page); + } + if (add_list) { struct f2fs_bio_info *io; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4c28576c5b30..fc757c8861b7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1993,6 +1993,11 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; + /* precompute checksum seed for metadata */ + if (f2fs_sb_has_inode_chksum(sb)) + sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, + sizeof(raw_super->uuid)); + /* * The BLKZONED feature indicates that the drive was formatted with * zone alignment optimization. This is optional for host-aware -- cgit From bf9e697ecd4214c86ff35764449348db45c697b1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 Jul 2017 17:14:09 -0700 Subject: f2fs: expose features to sysfs entry This patch exposes what features are supported by current f2fs build to sysfs entry via: /sys/fs/f2fs/features/ /sys/fs/f2fs/dev/features Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 130 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 5a78b9af92ef..1e31d0c5b6ab 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -18,7 +18,6 @@ #include "gc.h" static struct proc_dir_entry *f2fs_proc_root; -static struct kset *f2fs_kset; /* Sysfs support for f2fs */ enum { @@ -41,6 +40,7 @@ struct f2fs_attr { const char *, size_t); int struct_type; int offset; + int id; }; static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) @@ -76,6 +76,34 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, BD_PART_WRITTEN(sbi))); } +static ssize_t features_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->sb; + int len = 0; + + if (!sb->s_bdev->bd_part) + return snprintf(buf, PAGE_SIZE, "0\n"); + + if (f2fs_sb_has_crypto(sb)) + len += snprintf(buf, PAGE_SIZE - len, "%s", + "encryption"); + if (f2fs_sb_mounted_blkzoned(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "blkzoned"); + if (f2fs_sb_has_extra_attr(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "extra_attr"); + if (f2fs_sb_has_project_quota(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "projquota"); + if (f2fs_sb_has_inode_chksum(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "inode_checksum"); + len += snprintf(buf + len, PAGE_SIZE - len, "\n"); + return len; +} + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -155,6 +183,30 @@ static void f2fs_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } +enum feat_id { + FEAT_CRYPTO = 0, + FEAT_BLKZONED, + FEAT_ATOMIC_WRITE, + FEAT_EXTRA_ATTR, + FEAT_PROJECT_QUOTA, + FEAT_INODE_CHECKSUM, +}; + +static ssize_t f2fs_feature_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + switch (a->id) { + case FEAT_CRYPTO: + case FEAT_BLKZONED: + case FEAT_ATOMIC_WRITE: + case FEAT_EXTRA_ATTR: + case FEAT_PROJECT_QUOTA: + case FEAT_INODE_CHECKSUM: + return snprintf(buf, PAGE_SIZE, "supported\n"); + } + return 0; +} + #define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ static struct f2fs_attr f2fs_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ @@ -172,6 +224,13 @@ static struct f2fs_attr f2fs_attr_##_name = { \ #define F2FS_GENERAL_RO_ATTR(name) \ static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL) +#define F2FS_FEATURE_RO_ATTR(_name, _id) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .show = f2fs_feature_show, \ + .id = _id, \ +} + F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); @@ -196,6 +255,18 @@ F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); +F2FS_GENERAL_RO_ATTR(features); + +#ifdef CONFIG_F2FS_FS_ENCRYPTION +F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); +#endif +#ifdef CONFIG_BLK_DEV_ZONED +F2FS_FEATURE_RO_ATTR(block_zoned, FEAT_BLKZONED); +#endif +F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE); +F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR); +F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA); +F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -222,21 +293,53 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(lifetime_write_kbytes), + ATTR_LIST(features), ATTR_LIST(reserved_blocks), NULL, }; +static struct attribute *f2fs_feat_attrs[] = { +#ifdef CONFIG_F2FS_FS_ENCRYPTION + ATTR_LIST(encryption), +#endif +#ifdef CONFIG_BLK_DEV_ZONED + ATTR_LIST(block_zoned), +#endif + ATTR_LIST(atomic_write), + ATTR_LIST(extra_attr), + ATTR_LIST(project_quota), + ATTR_LIST(inode_checksum), + NULL, +}; + static const struct sysfs_ops f2fs_attr_ops = { .show = f2fs_attr_show, .store = f2fs_attr_store, }; -static struct kobj_type f2fs_ktype = { +static struct kobj_type f2fs_sb_ktype = { .default_attrs = f2fs_attrs, .sysfs_ops = &f2fs_attr_ops, .release = f2fs_sb_release, }; +static struct kobj_type f2fs_ktype = { + .sysfs_ops = &f2fs_attr_ops, +}; + +static struct kset f2fs_kset = { + .kobj = {.ktype = &f2fs_ktype}, +}; + +static struct kobj_type f2fs_feat_ktype = { + .default_attrs = f2fs_feat_attrs, + .sysfs_ops = &f2fs_attr_ops, +}; + +static struct kobject f2fs_feat = { + .kset = &f2fs_kset, +}; + static int segment_info_seq_show(struct seq_file *seq, void *offset) { struct super_block *sb = seq->private; @@ -306,18 +409,29 @@ F2FS_PROC_FILE_DEF(segment_bits); int __init f2fs_init_sysfs(void) { - f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + int ret; - f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); - if (!f2fs_kset) - return -ENOMEM; - return 0; + kobject_set_name(&f2fs_kset.kobj, "f2fs"); + f2fs_kset.kobj.parent = fs_kobj; + ret = kset_register(&f2fs_kset); + if (ret) + return ret; + + ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, + NULL, "features"); + if (ret) + kset_unregister(&f2fs_kset); + else + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + return ret; } void f2fs_exit_sysfs(void) { - kset_unregister(f2fs_kset); + kobject_put(&f2fs_feat); + kset_unregister(&f2fs_kset); remove_proc_entry("fs/f2fs", NULL); + f2fs_proc_root = NULL; } int f2fs_register_sysfs(struct f2fs_sb_info *sbi) @@ -325,6 +439,13 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) struct super_block *sb = sbi->sb; int err; + sbi->s_kobj.kset = &f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL, + "%s", sb->s_id); + if (err) + return err; + if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -334,32 +455,15 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, &f2fs_seq_segment_bits_fops, sb); } - - sbi->s_kobj.kset = f2fs_kset; - init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, - "%s", sb->s_id); - if (err) - goto err_out; return 0; -err_out: - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); - } - return err; } void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) { - kobject_del(&sbi->s_kobj); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); - if (sbi->s_proc) { remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } + kobject_del(&sbi->s_kobj); } -- cgit From a36c106dffb616250117efb1cab271c19a8f94ff Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 2 Aug 2017 20:58:29 -0700 Subject: f2fs: use printk_ratelimited for f2fs_msg This patch reduces contention of printks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fc757c8861b7..b84367856f8e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -159,7 +159,7 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); va_end(args); } -- cgit From 6415fedc572219e0c8e6b4d3c17c46ed36997ae7 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 2 Aug 2017 21:20:13 +0800 Subject: f2fs: update cur_valid_map_mir together with cur_valid_map When cur_valid_map passes the f2fs_test_and_set(,clear)_bit test, cur_valid_map_mir update is skipped unlikely, so fix it. The fix now changes the mirror check together with cur_valid_map all the time. Signed-off-by: Yunlong Song Signed-off-by: Chao Yu [Jaegeuk Kim: Fix unused variable and add unlikely for corner condition.] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8911f50ddef6..7781203a4077 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1503,6 +1503,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) struct seg_entry *se; unsigned int segno, offset; long int new_vblocks; + bool exist; +#ifdef CONFIG_F2FS_CHECK_FS + bool mir_exist; +#endif segno = GET_SEGNO(sbi, blkaddr); @@ -1519,17 +1523,23 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) { + exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - if (f2fs_test_and_set_bit(offset, - se->cur_valid_map_mir)) - f2fs_bug_on(sbi, 1); - else - WARN_ON(1); -#else + mir_exist = f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); + } #endif + if (unlikely(exist)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); } + if (f2fs_discard_en(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; @@ -1540,17 +1550,23 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) se->ckpt_valid_blocks++; } } else { - if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { + exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - if (!f2fs_test_and_clear_bit(offset, - se->cur_valid_map_mir)) - f2fs_bug_on(sbi, 1); - else - WARN_ON(1); -#else + mir_exist = f2fs_test_and_clear_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); + } #endif + if (unlikely(!exist)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); } + if (f2fs_discard_en(sbi) && f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; -- cgit From 35ee82ca133a58011e648a407d4ab13275c975d4 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 2 Aug 2017 22:16:54 +0800 Subject: f2fs: do not change the valid_block value if cur_valid_map was wrongly set or cleared Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7781203a4077..1675a2dcd599 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1538,6 +1538,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_msg(sbi->sb, KERN_ERR, "Bitmap was wrongly set, blk:%u", blkaddr); f2fs_bug_on(sbi, 1); + se->valid_blocks--; + del = 0; } if (f2fs_discard_en(sbi) && @@ -1565,6 +1567,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_msg(sbi->sb, KERN_ERR, "Bitmap was wrongly cleared, blk:%u", blkaddr); f2fs_bug_on(sbi, 1); + se->valid_blocks++; + del = 0; } if (f2fs_discard_en(sbi) && -- cgit From b0af6d491a6b5f5622fa91ac75f34f3640f862c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 2 Aug 2017 23:21:48 +0800 Subject: f2fs: add app/fs io stat This patch enables inner app/fs io stats and introduces below virtual fs nodes for exposing stats info: /sys/fs/f2fs//iostat_enable /proc/fs/f2fs//iostat_info Signed-off-by: Chao Yu [Jaegeuk Kim: fix wrong stat assignment] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 34 +++++++++++++++++++++--------- fs/f2fs/data.c | 35 +++++++++++++++++++++++-------- fs/f2fs/f2fs.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++--- fs/f2fs/file.c | 7 ++++++- fs/f2fs/gc.c | 3 +++ fs/f2fs/inline.c | 1 + fs/f2fs/node.c | 15 +++++++------ fs/f2fs/segment.c | 21 +++++++++++++++++-- fs/f2fs/super.c | 4 ++++ fs/f2fs/sysfs.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 200 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3c84a2520796..da5b49183e09 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -230,8 +230,9 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); } -static int f2fs_write_meta_page(struct page *page, - struct writeback_control *wbc) +static int __f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc, + enum iostat_type io_type) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); @@ -244,7 +245,7 @@ static int f2fs_write_meta_page(struct page *page, if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; - write_meta_page(sbi, page); + write_meta_page(sbi, page, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) @@ -263,6 +264,12 @@ redirty_out: return AOP_WRITEPAGE_ACTIVATE; } +static int f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc) +{ + return __f2fs_write_meta_page(page, wbc, FS_META_IO); +} + static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { @@ -283,7 +290,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); - written = sync_meta_pages(sbi, META, wbc->nr_to_write); + written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -295,7 +302,7 @@ skip_write: } long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, - long nr_to_write) + long nr_to_write, enum iostat_type io_type) { struct address_space *mapping = META_MAPPING(sbi); pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; @@ -346,7 +353,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - if (mapping->a_ops->writepage(page, &wbc)) { + if (__f2fs_write_meta_page(page, &wbc, io_type)) { unlock_page(page); break; } @@ -904,7 +911,14 @@ retry: if (inode) { unsigned long cur_ino = inode->i_ino; + if (is_dir) + F2FS_I(inode)->cp_task = current; + filemap_fdatawrite(inode->i_mapping); + + if (is_dir) + F2FS_I(inode)->cp_task = NULL; + iput(inode); /* We need to give cpu to another writers. */ if (ino == cur_ino) { @@ -1017,7 +1031,7 @@ retry_flush_nodes: if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - err = sync_node_pages(sbi, &wbc, false); + err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); if (err) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1115,7 +1129,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX); + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); if (unlikely(f2fs_cp_error(sbi))) return -EIO; } @@ -1194,7 +1208,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT BITS pages */ while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX); + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); if (unlikely(f2fs_cp_error(sbi))) return -EIO; } @@ -1249,7 +1263,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) percpu_counter_set(&sbi->alloc_valid_block_count, 0); /* Here, we only have one bio having CP pack */ - sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO); /* wait for previous submitted meta pages writeback */ wait_on_all_pages_writeback(sbi); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aefc2a5745d3..c43262dc36de 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1475,7 +1475,8 @@ out: } static int __write_data_page(struct page *page, bool *submitted, - struct writeback_control *wbc) + struct writeback_control *wbc, + enum iostat_type io_type) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1496,6 +1497,7 @@ static int __write_data_page(struct page *page, bool *submitted, .encrypted_page = NULL, .submitted = false, .need_lock = LOCK_RETRY, + .io_type = io_type, }; trace_f2fs_writepage(page, DATA); @@ -1602,7 +1604,7 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, NULL, wbc); + return __write_data_page(page, NULL, wbc, FS_DATA_IO); } /* @@ -1611,7 +1613,8 @@ static int f2fs_write_data_page(struct page *page, * warm/hot data page. */ static int f2fs_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc) + struct writeback_control *wbc, + enum iostat_type io_type) { int ret = 0; int done = 0; @@ -1701,7 +1704,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, &submitted, wbc); + ret = __write_data_page(page, &submitted, wbc, io_type); if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to @@ -1756,8 +1759,9 @@ continue_unlock: return ret; } -static int f2fs_write_data_pages(struct address_space *mapping, - struct writeback_control *wbc) +int __f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc, + enum iostat_type io_type) { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1794,7 +1798,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, goto skip_write; blk_start_plug(&plug); - ret = f2fs_write_cache_pages(mapping, wbc); + ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug); if (wbc->sync_mode == WB_SYNC_ALL) @@ -1813,6 +1817,16 @@ skip_write: return 0; } +static int f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + + return __f2fs_write_data_pages(mapping, wbc, + F2FS_I(inode)->cp_task == current ? + FS_CP_DATA_IO : FS_DATA_IO); +} + static void f2fs_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; @@ -2079,10 +2093,13 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) up_read(&F2FS_I(inode)->dio_rwsem[rw]); if (rw == WRITE) { - if (err > 0) + if (err > 0) { + f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, + err); set_inode_flag(inode, FI_UPDATE_WRITE); - else if (err < 0) + } else if (err < 0) { f2fs_write_failed(mapping, offset + count); + } } trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7c1244ba92a8..0ccccbfdeeab 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -557,6 +557,7 @@ struct f2fs_inode_info { f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ struct task_struct *task; /* lookup and create consistency */ + struct task_struct *cp_task; /* separate cp/wb IO stats*/ nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ @@ -863,6 +864,23 @@ enum need_lock_type { LOCK_RETRY, }; +enum iostat_type { + APP_DIRECT_IO, /* app direct IOs */ + APP_BUFFERED_IO, /* app buffered IOs */ + APP_WRITE_IO, /* app write IOs */ + APP_MAPPED_IO, /* app mapped IOs */ + FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ + FS_NODE_IO, /* node IOs from kworker/fsync/reclaimer */ + FS_META_IO, /* meta IOs from kworker/reclaimer */ + FS_GC_DATA_IO, /* data IOs from forground gc */ + FS_GC_NODE_IO, /* node IOs from forground gc */ + FS_CP_DATA_IO, /* data IOs from checkpoint */ + FS_CP_NODE_IO, /* node IOs from checkpoint */ + FS_CP_META_IO, /* meta IOs from checkpoint */ + FS_DISCARD, /* discard */ + NR_IO_TYPE, +}; + struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ @@ -877,6 +895,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ + enum iostat_type io_type; /* io type */ }; #define is_read_io(rw) ((rw) == READ) @@ -1068,6 +1087,11 @@ struct f2fs_sb_info { #endif spinlock_t stat_lock; /* lock for stat operations */ + /* For app/fs IO statistics */ + spinlock_t iostat_lock; + unsigned long long write_iostat[NR_IO_TYPE]; + bool iostat_enable; + /* For sysfs suppport */ struct kobject s_kobj; struct completion s_kobj_unregister; @@ -2291,6 +2315,31 @@ static inline int get_extra_isize(struct inode *inode) sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \ +static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) +{ + int i; + + spin_lock(&sbi->iostat_lock); + for (i = 0; i < NR_IO_TYPE; i++) + sbi->write_iostat[i] = 0; + spin_unlock(&sbi->iostat_lock); +} + +static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, + enum iostat_type type, unsigned long long io_bytes) +{ + if (!sbi->iostat_enable) + return; + spin_lock(&sbi->iostat_lock); + sbi->write_iostat[type] += io_bytes; + + if (type == APP_WRITE_IO || type == APP_DIRECT_IO) + sbi->write_iostat[APP_BUFFERED_IO] = + sbi->write_iostat[APP_WRITE_IO] - + sbi->write_iostat[APP_DIRECT_IO]; + spin_unlock(&sbi->iostat_lock); +} + /* * file.c */ @@ -2418,7 +2467,7 @@ void move_node_page(struct page *node_page, int gc_type); int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic); int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, - bool do_balance); + bool do_balance, enum iostat_type io_type); void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); @@ -2461,7 +2510,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page); +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, + enum iostat_type io_type); void write_node_page(unsigned int nid, struct f2fs_io_info *fio); void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio); int rewrite_data_page(struct f2fs_io_info *fio); @@ -2502,7 +2552,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, - long nr_to_write); + long nr_to_write, enum iostat_type io_type); void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); void release_ino_entry(struct f2fs_sb_info *sbi, bool all); @@ -2555,6 +2605,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); void f2fs_set_page_dirty_nobuffers(struct page *page); +int __f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc, + enum iostat_type io_type); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 407518f42e45..e2b33b87701f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -98,6 +98,8 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) if (!PageUptodate(page)) SetPageUptodate(page); + f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + trace_f2fs_vm_page_mkwrite(page, DATA); mapped: /* fill the page */ @@ -1815,7 +1817,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) f2fs_stop_checkpoint(sbi, false); break; case F2FS_GOING_DOWN_METAFLUSH: - sync_meta_pages(sbi, META, LONG_MAX); + sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); f2fs_stop_checkpoint(sbi, false); break; default: @@ -2694,6 +2696,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = __generic_file_write_iter(iocb, from); blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } inode_unlock(inode); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f57cadae1a30..620dca443b29 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -689,6 +689,8 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, fio.new_blkaddr = newaddr; f2fs_submit_page_write(&fio); + f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE); + f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); if (page->index == 0) @@ -736,6 +738,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .page = page, .encrypted_page = NULL, .need_lock = LOCK_REQ, + .io_type = FS_GC_DATA_IO, }; bool is_dirty = PageDirty(page); int err; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index f38be791fdf9..e63ab0d1f614 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -117,6 +117,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .op_flags = REQ_SYNC | REQ_PRIO, .page = page, .encrypted_page = NULL, + .io_type = FS_DATA_IO, }; int dirty, err; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0176035d8ced..9168c304fd58 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1332,7 +1332,8 @@ continue_unlock: } static int __write_node_page(struct page *page, bool atomic, bool *submitted, - struct writeback_control *wbc, bool do_balance) + struct writeback_control *wbc, bool do_balance, + enum iostat_type io_type) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; @@ -1345,6 +1346,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, .page = page, .encrypted_page = NULL, .submitted = false, + .io_type = io_type, }; trace_f2fs_writepage(page, NODE); @@ -1413,7 +1415,7 @@ redirty_out: static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { - return __write_node_page(page, false, NULL, wbc, false); + return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO); } int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, @@ -1501,7 +1503,8 @@ continue_unlock: ret = __write_node_page(page, atomic && page == last_page, - &submitted, wbc, true); + &submitted, wbc, true, + FS_NODE_IO); if (ret) { unlock_page(page); f2fs_put_page(last_page, 0); @@ -1539,7 +1542,7 @@ out: } int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, - bool do_balance) + bool do_balance, enum iostat_type io_type) { pgoff_t index, end; struct pagevec pvec; @@ -1618,7 +1621,7 @@ continue_unlock: set_dentry_mark(page, 0); ret = __write_node_page(page, false, &submitted, - wbc, do_balance); + wbc, do_balance, io_type); if (ret) unlock_page(page); else if (submitted) @@ -1707,7 +1710,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, NODE, wbc); wbc->sync_mode = WB_SYNC_NONE; blk_start_plug(&plug); - sync_node_pages(sbi, wbc, true); + sync_node_pages(sbi, wbc, true, FS_NODE_IO); blk_finish_plug(&plug); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return 0; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1675a2dcd599..c6e4a1174f8a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -292,6 +292,7 @@ static int __commit_inmem_pages(struct inode *inode, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, + .io_type = FS_DATA_IO, }; pgoff_t last_idx = ULONG_MAX; int err = 0; @@ -823,6 +824,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, submit_bio(bio); list_move_tail(&dc->list, &dcc->wait_list); __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); + + f2fs_update_iostat(sbi, FS_DISCARD, 1); } } else { __remove_discard_cmd(sbi, dc); @@ -2271,7 +2274,8 @@ reallocate: } } -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, + enum iostat_type io_type) { struct f2fs_io_info fio = { .sbi = sbi, @@ -2290,6 +2294,8 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) set_page_writeback(page); f2fs_submit_page_write(&fio); + + f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } void write_node_page(unsigned int nid, struct f2fs_io_info *fio) @@ -2298,6 +2304,8 @@ void write_node_page(unsigned int nid, struct f2fs_io_info *fio) set_summary(&sum, nid, 0, 0); do_write_page(&sum, fio); + + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) @@ -2311,13 +2319,22 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); + + f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); } int rewrite_data_page(struct f2fs_io_info *fio) { + int err; + fio->new_blkaddr = fio->old_blkaddr; stat_inc_inplace_blocks(fio->sbi); - return f2fs_submit_page_bio(fio); + + err = f2fs_submit_page_bio(fio); + + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + + return err; } void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b84367856f8e..ca340d84f8c2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2056,6 +2056,10 @@ try_onemore: set_sbi_flag(sbi, SBI_POR_DOING); spin_lock_init(&sbi->stat_lock); + /* init iostat info */ + spin_lock_init(&sbi->iostat_lock); + sbi->iostat_enable = false; + for (i = 0; i < NR_PAGE_TYPE; i++) { int n = (i == META) ? 1: NR_TEMP_TYPE; int j; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 1e31d0c5b6ab..3d6bbdb743b0 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -153,6 +153,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, return count; } *ui = t; + + if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) + f2fs_reset_iostat(sbi); + return count; } @@ -250,6 +254,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -288,6 +293,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(dirty_nats_ratio), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), + ATTR_LIST(iostat_enable), #ifdef CONFIG_F2FS_FAULT_INJECTION ATTR_LIST(inject_rate), ATTR_LIST(inject_type), @@ -391,6 +397,48 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset) return 0; } +static int iostat_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + time64_t now = ktime_get_real_seconds(); + + if (!sbi->iostat_enable) + return 0; + + seq_printf(seq, "time: %-16llu\n", now); + + /* print app IOs */ + seq_printf(seq, "app buffered: %-16llu\n", + sbi->write_iostat[APP_BUFFERED_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->write_iostat[APP_DIRECT_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->write_iostat[APP_MAPPED_IO]); + + /* print fs IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->write_iostat[FS_DATA_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->write_iostat[FS_NODE_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->write_iostat[FS_META_IO]); + seq_printf(seq, "fs gc data: %-16llu\n", + sbi->write_iostat[FS_GC_DATA_IO]); + seq_printf(seq, "fs gc node: %-16llu\n", + sbi->write_iostat[FS_GC_NODE_IO]); + seq_printf(seq, "fs cp data: %-16llu\n", + sbi->write_iostat[FS_CP_DATA_IO]); + seq_printf(seq, "fs cp node: %-16llu\n", + sbi->write_iostat[FS_CP_NODE_IO]); + seq_printf(seq, "fs cp meta: %-16llu\n", + sbi->write_iostat[FS_CP_META_IO]); + seq_printf(seq, "fs discard: %-16llu\n", + sbi->write_iostat[FS_DISCARD]); + + return 0; +} + #define F2FS_PROC_FILE_DEF(_name) \ static int _name##_open_fs(struct inode *inode, struct file *file) \ { \ @@ -406,6 +454,7 @@ static const struct file_operations f2fs_seq_##_name##_fops = { \ F2FS_PROC_FILE_DEF(segment_info); F2FS_PROC_FILE_DEF(segment_bits); +F2FS_PROC_FILE_DEF(iostat_info); int __init f2fs_init_sysfs(void) { @@ -454,6 +503,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) &f2fs_seq_segment_info_fops, sb); proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, &f2fs_seq_segment_bits_fops, sb); + proc_create_data("iostat_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_iostat_info_fops, sb); } return 0; } @@ -461,6 +512,7 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) { if (sbi->s_proc) { + remove_proc_entry("iostat_info", sbi->s_proc); remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); -- cgit From 008396e1b026b3873091b555b808155da7d9d18f Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 4 Aug 2017 17:07:15 +0800 Subject: f2fs: fix the size value in __check_sit_bitmap The current size value is not correct and will miss bitmap check. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c6e4a1174f8a..95267630c8c4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -787,11 +787,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, sentry = get_seg_entry(sbi, segno); offset = GET_BLKOFF_FROM_SEG0(sbi, blk); - size = min((unsigned long)(end - blk), max_blocks); + if (end < START_BLOCK(sbi, segno + 1)) + size = GET_BLKOFF_FROM_SEG0(sbi, end); + else + size = max_blocks; map = (unsigned long *)(sentry->cur_valid_map); offset = __find_rev_next_bit(map, size, offset); f2fs_bug_on(sbi, offset != size); - blk += size; + blk = START_BLOCK(sbi, segno + 1); } #endif } -- cgit From 3537581a722402a1e6f0942cd0653035a9e09e21 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 5 Aug 2017 14:25:08 -0700 Subject: f2fs: use IPU for cold files We expect cold files write data sequentially, but sometimes some of small data can be updated, which incurs fragmentation. Let's avoid that. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 6b871b492fd5..7f700e54b77d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -577,6 +577,10 @@ static inline bool need_inplace_update_policy(struct inode *inode, if (test_opt(sbi, LFS)) return false; + /* if this is cold file, we should overwrite to avoid fragmentation */ + if (file_is_cold(inode)) + return true; + if (policy & (0x1 << F2FS_IPU_FORCE)) return true; if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) -- cgit From d9872a698c393e0d1abca86bf05b62712cbfc581 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 6 Aug 2017 22:09:00 -0700 Subject: f2fs: introduce gc_urgent mode for background GC This patch adds a sysfs entry to control urgent mode for background GC. If this is set, background GC thread conducts GC with gc_urgent_sleep_time all the time. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 17 +++++++++++++++-- fs/f2fs/gc.h | 4 ++++ fs/f2fs/sysfs.c | 9 +++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 620dca443b29..8da7c14a9d29 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -35,9 +35,14 @@ static int gc_thread_func(void *data) set_freezable(); do { wait_event_interruptible_timeout(*wq, - kthread_should_stop() || freezing(current), + kthread_should_stop() || freezing(current) || + gc_th->gc_wake, msecs_to_jiffies(wait_ms)); + /* give it a try one time */ + if (gc_th->gc_wake) + gc_th->gc_wake = 0; + if (try_to_freeze()) continue; if (kthread_should_stop()) @@ -74,6 +79,11 @@ static int gc_thread_func(void *data) if (!mutex_trylock(&sbi->gc_mutex)) goto next; + if (gc_th->gc_urgent) { + wait_ms = gc_th->urgent_sleep_time; + goto do_gc; + } + if (!is_idle(sbi)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); @@ -84,7 +94,7 @@ static int gc_thread_func(void *data) decrease_sleep_time(gc_th, &wait_ms); else increase_sleep_time(gc_th, &wait_ms); - +do_gc: stat_inc_bggc_count(sbi); /* if return value is not zero, no victim was selected */ @@ -115,11 +125,14 @@ int start_gc_thread(struct f2fs_sb_info *sbi) goto out; } + gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; gc_th->gc_idle = 0; + gc_th->gc_urgent = 0; + gc_th->gc_wake= 0; sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index a993967dcdb9..57a9000ce3af 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -13,6 +13,7 @@ * whether IO subsystem is idle * or not */ +#define DEF_GC_THREAD_URGENT_SLEEP_TIME 500 /* 500 ms */ #define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ #define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 #define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ @@ -27,12 +28,15 @@ struct f2fs_gc_kthread { wait_queue_head_t gc_wait_queue_head; /* for gc sleep time */ + unsigned int urgent_sleep_time; unsigned int min_sleep_time; unsigned int max_sleep_time; unsigned int no_gc_sleep_time; /* for changing gc mode */ unsigned int gc_idle; + unsigned int gc_urgent; + unsigned int gc_wake; }; struct gc_inode_list { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 3d6bbdb743b0..c40e5d24df9f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -156,6 +156,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) f2fs_reset_iostat(sbi); + if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { + sbi->gc_thread->gc_wake = 1; + wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); + } return count; } @@ -235,10 +239,13 @@ static struct f2fs_attr f2fs_attr_##_name = { \ .id = _id, \ } +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time, + urgent_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); @@ -275,10 +282,12 @@ F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_urgent_sleep_time), ATTR_LIST(gc_min_sleep_time), ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), ATTR_LIST(gc_idle), + ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), ATTR_LIST(max_small_discards), ATTR_LIST(batched_trim_sections), -- cgit From 9a20d391cd099d547c0f17626bf6f13e06da519a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Aug 2017 16:37:59 +0800 Subject: f2fs: avoid unneeded sync on quota file We only need to sync quota file with appointed quota type instead of all types in f2fs_quota_{on,off}. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ca340d84f8c2..54b8ff4c6fc9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1176,7 +1176,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, struct inode *inode; int err; - err = f2fs_quota_sync(sb, -1); + err = f2fs_quota_sync(sb, type); if (err) return err; @@ -1204,7 +1204,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) if (!inode || !igrab(inode)) return dquot_quota_off(sb, type); - f2fs_quota_sync(sb, -1); + f2fs_quota_sync(sb, type); err = dquot_quota_off(sb, type); if (err) -- cgit From b8c502b81e3f899c6488967dec61eed0f5907db3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Aug 2017 23:12:46 +0800 Subject: f2fs: fix potential overflow when adjusting GC cycle While comparing signed and unsigned variables, compiler will converts the signed value to unsigned one, due to this reason, {in,de}crease_sleep_time may return overflowed result. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- fs/f2fs/gc.h | 23 +++++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8da7c14a9d29..e60480f71bb5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -28,7 +28,7 @@ static int gc_thread_func(void *data) struct f2fs_sb_info *sbi = data; struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; - long wait_ms; + unsigned int wait_ms; wait_ms = gc_th->min_sleep_time; diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 57a9000ce3af..9325191fab2d 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -69,25 +69,32 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) } static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th, - long *wait) + unsigned int *wait) { + unsigned int min_time = gc_th->min_sleep_time; + unsigned int max_time = gc_th->max_sleep_time; + if (*wait == gc_th->no_gc_sleep_time) return; - *wait += gc_th->min_sleep_time; - if (*wait > gc_th->max_sleep_time) - *wait = gc_th->max_sleep_time; + if ((long long)*wait + (long long)min_time > (long long)max_time) + *wait = max_time; + else + *wait += min_time; } static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, - long *wait) + unsigned int *wait) { + unsigned int min_time = gc_th->min_sleep_time; + if (*wait == gc_th->no_gc_sleep_time) *wait = gc_th->max_sleep_time; - *wait -= gc_th->min_sleep_time; - if (*wait <= gc_th->min_sleep_time) - *wait = gc_th->min_sleep_time; + if ((long long)*wait - (long long)min_time < (long long)min_time) + *wait = min_time; + else + *wait -= min_time; } static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) -- cgit From 4b2414d04e99120ce852ba15a1926c9c3a77d9ce Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Aug 2017 10:54:31 +0800 Subject: f2fs: support journalled quota This patch supports to enable f2fs to accept quota information through mount option: - {usr,grp,prj}jquota= - jqfmt= Then, in ->mount flow, we can recover quota file during log replaying, by this, journelled quota can be supported. Signed-off-by: Chao Yu [Jaegeuk Kim: Fix wrong return values.] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 26 +++- fs/f2fs/f2fs.h | 9 ++ fs/f2fs/recovery.c | 72 ++++++++++-- fs/f2fs/super.c | 326 ++++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 403 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index da5b49183e09..04fe1df052b2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -588,11 +588,24 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) int recover_orphan_inodes(struct f2fs_sb_info *sbi) { block_t start_blk, orphan_blocks, i, j; - int err; + unsigned int s_flags = sbi->sb->s_flags; + int err = 0; if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; + if (s_flags & MS_RDONLY) { + f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + sbi->sb->s_flags &= ~MS_RDONLY; + } + +#ifdef CONFIG_QUOTA + /* Needed for iput() to work correctly and not trash data */ + sbi->sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ + f2fs_enable_quota_files(sbi); +#endif + start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); @@ -608,14 +621,21 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) err = recover_orphan_inode(sbi, ino); if (err) { f2fs_put_page(page, 1); - return err; + goto out; } } f2fs_put_page(page, 1); } /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); - return 0; +out: +#ifdef CONFIG_QUOTA + /* Turn quotas off */ + f2fs_quota_off_umount(sbi->sb); +#endif + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + + return err; } static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0ccccbfdeeab..dddbe9269ceb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -92,6 +92,7 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_USRQUOTA 0x00080000 #define F2FS_MOUNT_GRPQUOTA 0x00100000 #define F2FS_MOUNT_PRJQUOTA 0x00200000 +#define F2FS_MOUNT_QUOTA 0x00400000 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -1117,6 +1118,12 @@ struct f2fs_sb_info { #ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info fault_info; #endif + +#ifdef CONFIG_QUOTA + /* Names of quota files with journalled quota */ + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; /* Format of quota to use */ +#endif }; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -2429,6 +2436,8 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) */ int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); +void f2fs_enable_quota_files(struct f2fs_sb_info *sbi); +void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); extern __printf(3, 4) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2d9b8182691f..a3d02613934a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -69,20 +69,34 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, } static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, - struct list_head *head, nid_t ino) + struct list_head *head, nid_t ino, bool quota_inode) { struct inode *inode; struct fsync_inode_entry *entry; + int err; inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); + err = dquot_initialize(inode); + if (err) + goto err_out; + + if (quota_inode) { + err = dquot_alloc_inode(inode); + if (err) + goto err_out; + } + entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); entry->inode = inode; list_add_tail(&entry->list, head); return entry; +err_out: + iput(inode); + return ERR_PTR(err); } static void del_fsync_inode(struct fsync_inode_entry *entry) @@ -107,7 +121,8 @@ static int recover_dentry(struct inode *inode, struct page *ipage, entry = get_fsync_inode(dir_list, pino); if (!entry) { - entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino); + entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, + pino, false); if (IS_ERR(entry)) { dir = ERR_CAST(entry); err = PTR_ERR(entry); @@ -140,6 +155,13 @@ retry: err = -EEXIST; goto out_unmap_put; } + + err = dquot_initialize(einode); + if (err) { + iput(einode); + goto out_unmap_put; + } + err = acquire_orphan_inode(F2FS_I_SB(inode)); if (err) { iput(einode); @@ -226,18 +248,22 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, entry = get_fsync_inode(head, ino_of_node(page)); if (!entry) { + bool quota_inode = false; + if (!check_only && IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) break; + quota_inode = true; } /* * CP | dnode(F) | inode(DF) * For this case, we should not give up now. */ - entry = add_fsync_inode(sbi, head, ino_of_node(page)); + entry = add_fsync_inode(sbi, head, ino_of_node(page), + quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); if (err == -ENOENT) { @@ -328,10 +354,18 @@ got_it: f2fs_put_page(node_page, 1); if (ino != dn->inode->i_ino) { + int ret; + /* Deallocate previous index in the node page */ inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) return PTR_ERR(inode); + + ret = dquot_initialize(inode); + if (ret) { + iput(inode); + return ret; + } } else { inode = dn->inode; } @@ -558,12 +592,27 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) struct list_head dir_list; int err; int ret = 0; + unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; + if (s_flags & MS_RDONLY) { + f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + sbi->sb->s_flags &= ~MS_RDONLY; + } + +#ifdef CONFIG_QUOTA + /* Needed for iput() to work correctly and not trash data */ + sbi->sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ + f2fs_enable_quota_files(sbi); +#endif + fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry)); - if (!fsync_entry_slab) - return -ENOMEM; + if (!fsync_entry_slab) { + err = -ENOMEM; + goto out; + } INIT_LIST_HEAD(&inode_list); INIT_LIST_HEAD(&dir_list); @@ -574,11 +623,11 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) /* step #1: find fsynced inode numbers */ err = find_fsync_dnodes(sbi, &inode_list, check_only); if (err || list_empty(&inode_list)) - goto out; + goto skip; if (check_only) { ret = 1; - goto out; + goto skip; } need_writecp = true; @@ -587,7 +636,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) err = recover_data(sbi, &inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); -out: +skip: destroy_fsync_dnodes(&inode_list); /* truncate meta pages to be used by the recovery */ @@ -615,5 +664,12 @@ out: } kmem_cache_destroy(fsync_entry_slab); +out: +#ifdef CONFIG_QUOTA + /* Turn quotas off */ + f2fs_quota_off_umount(sbi->sb); +#endif + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + return ret ? ret: err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 54b8ff4c6fc9..52083d662197 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -107,9 +108,20 @@ enum { Opt_fault_injection, Opt_lazytime, Opt_nolazytime, + Opt_quota, + Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, + Opt_usrjquota, + Opt_grpjquota, + Opt_prjjquota, + Opt_offusrjquota, + Opt_offgrpjquota, + Opt_offprjjquota, + Opt_jqfmt_vfsold, + Opt_jqfmt_vfsv0, + Opt_jqfmt_vfsv1, Opt_err, }; @@ -145,9 +157,20 @@ static match_table_t f2fs_tokens = { {Opt_fault_injection, "fault_injection=%u"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, + {Opt_quota, "quota"}, + {Opt_noquota, "noquota"}, {Opt_usrquota, "usrquota"}, {Opt_grpquota, "grpquota"}, {Opt_prjquota, "prjquota"}, + {Opt_usrjquota, "usrjquota=%s"}, + {Opt_grpjquota, "grpjquota=%s"}, + {Opt_prjjquota, "prjjquota=%s"}, + {Opt_offusrjquota, "usrjquota="}, + {Opt_offgrpjquota, "grpjquota="}, + {Opt_offprjjquota, "prjjquota="}, + {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, + {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, + {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, {Opt_err, NULL}, }; @@ -170,6 +193,104 @@ static void init_once(void *foo) inode_init_once(&fi->vfs_inode); } +#ifdef CONFIG_QUOTA +static const char * const quotatypes[] = INITQFNAMES; +#define QTYPE2NAME(t) (quotatypes[t]) +static int f2fs_set_qf_name(struct super_block *sb, int qtype, + substring_t *args) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + char *qname; + int ret = -EINVAL; + + if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { + f2fs_msg(sb, KERN_ERR, + "Cannot change journaled " + "quota options when quota turned on"); + return -EINVAL; + } + qname = match_strdup(args); + if (!qname) { + f2fs_msg(sb, KERN_ERR, + "Not enough memory for storing quotafile name"); + return -EINVAL; + } + if (sbi->s_qf_names[qtype]) { + if (strcmp(sbi->s_qf_names[qtype], qname) == 0) + ret = 0; + else + f2fs_msg(sb, KERN_ERR, + "%s quota file already specified", + QTYPE2NAME(qtype)); + goto errout; + } + if (strchr(qname, '/')) { + f2fs_msg(sb, KERN_ERR, + "quotafile must be on filesystem root"); + goto errout; + } + sbi->s_qf_names[qtype] = qname; + set_opt(sbi, QUOTA); + return 0; +errout: + kfree(qname); + return ret; +} + +static int f2fs_clear_qf_name(struct super_block *sb, int qtype) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { + f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" + " when quota turned on"); + return -EINVAL; + } + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + return 0; +} + +static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) +{ + /* + * We do the test below only for project quotas. 'usrquota' and + * 'grpquota' mount options are allowed even without quota feature + * to support legacy quotas in quota files. + */ + if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " + "Cannot enable project quota enforcement."); + return -1; + } + if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA] || + sbi->s_qf_names[PRJQUOTA]) { + if (test_opt(sbi, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) + clear_opt(sbi, USRQUOTA); + + if (test_opt(sbi, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) + clear_opt(sbi, GRPQUOTA); + + if (test_opt(sbi, PRJQUOTA) && sbi->s_qf_names[PRJQUOTA]) + clear_opt(sbi, PRJQUOTA); + + if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || + test_opt(sbi, PRJQUOTA)) { + f2fs_msg(sbi->sb, KERN_ERR, "old and new quota " + "format mixing"); + return -1; + } + + if (!sbi->s_jquota_fmt) { + f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " + "not specified"); + return -1; + } + } + return 0; +} +#endif + static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -177,6 +298,9 @@ static int parse_options(struct super_block *sb, char *options) substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; +#ifdef CONFIG_QUOTA + int ret; +#endif if (!options) return 0; @@ -388,6 +512,7 @@ static int parse_options(struct super_block *sb, char *options) sb->s_flags &= ~MS_LAZYTIME; break; #ifdef CONFIG_QUOTA + case Opt_quota: case Opt_usrquota: set_opt(sbi, USRQUOTA); break; @@ -397,10 +522,66 @@ static int parse_options(struct super_block *sb, char *options) case Opt_prjquota: set_opt(sbi, PRJQUOTA); break; + case Opt_usrjquota: + ret = f2fs_set_qf_name(sb, USRQUOTA, &args[0]); + if (ret) + return ret; + break; + case Opt_grpjquota: + ret = f2fs_set_qf_name(sb, GRPQUOTA, &args[0]); + if (ret) + return ret; + break; + case Opt_prjjquota: + ret = f2fs_set_qf_name(sb, PRJQUOTA, &args[0]); + if (ret) + return ret; + break; + case Opt_offusrjquota: + ret = f2fs_clear_qf_name(sb, USRQUOTA); + if (ret) + return ret; + break; + case Opt_offgrpjquota: + ret = f2fs_clear_qf_name(sb, GRPQUOTA); + if (ret) + return ret; + break; + case Opt_offprjjquota: + ret = f2fs_clear_qf_name(sb, PRJQUOTA); + if (ret) + return ret; + break; + case Opt_jqfmt_vfsold: + sbi->s_jquota_fmt = QFMT_VFS_OLD; + break; + case Opt_jqfmt_vfsv0: + sbi->s_jquota_fmt = QFMT_VFS_V0; + break; + case Opt_jqfmt_vfsv1: + sbi->s_jquota_fmt = QFMT_VFS_V1; + break; + case Opt_noquota: + clear_opt(sbi, QUOTA); + clear_opt(sbi, USRQUOTA); + clear_opt(sbi, GRPQUOTA); + clear_opt(sbi, PRJQUOTA); + break; #else + case Opt_quota: case Opt_usrquota: case Opt_grpquota: case Opt_prjquota: + case Opt_usrjquota: + case Opt_grpjquota: + case Opt_prjjquota: + case Opt_offusrjquota: + case Opt_offgrpjquota: + case Opt_offprjjquota: + case Opt_jqfmt_vfsold: + case Opt_jqfmt_vfsv0: + case Opt_jqfmt_vfsv1: + case Opt_noquota: f2fs_msg(sb, KERN_INFO, "quota operations not supported"); break; @@ -412,6 +593,10 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; } } +#ifdef CONFIG_QUOTA + if (f2fs_check_quota_options(sbi)) + return -EINVAL; +#endif if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { f2fs_msg(sb, KERN_ERR, @@ -591,7 +776,6 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) kfree(sbi->devs); } -static void f2fs_quota_off_umount(struct super_block *sb); static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -658,6 +842,10 @@ static void f2fs_put_super(struct super_block *sb) destroy_device_list(sbi); mempool_destroy(sbi->write_io_dummy); +#ifdef CONFIG_QUOTA + for (i = 0; i < MAXQUOTAS; i++) + kfree(sbi->s_qf_names[i]); +#endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) kfree(sbi->write_io[i]); @@ -671,6 +859,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + return -EAGAIN; + if (sync) { struct cp_control cpc; @@ -791,6 +982,40 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static inline void f2fs_show_quota_options(struct seq_file *seq, + struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (sbi->s_jquota_fmt) { + char *fmtname = ""; + + switch (sbi->s_jquota_fmt) { + case QFMT_VFS_OLD: + fmtname = "vfsold"; + break; + case QFMT_VFS_V0: + fmtname = "vfsv0"; + break; + case QFMT_VFS_V1: + fmtname = "vfsv1"; + break; + } + seq_printf(seq, ",jqfmt=%s", fmtname); + } + + if (sbi->s_qf_names[USRQUOTA]) + seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); + + if (sbi->s_qf_names[GRPQUOTA]) + seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); + + if (sbi->s_qf_names[PRJQUOTA]) + seq_show_option(seq, "prjjquota", sbi->s_qf_names[PRJQUOTA]); +#endif +} + static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); @@ -864,6 +1089,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) sbi->fault_info.inject_rate); #endif #ifdef CONFIG_QUOTA + if (test_opt(sbi, QUOTA)) + seq_puts(seq, ",quota"); if (test_opt(sbi, USRQUOTA)) seq_puts(seq, ",usrquota"); if (test_opt(sbi, GRPQUOTA)) @@ -871,6 +1098,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sbi, PRJQUOTA)) seq_puts(seq, ",prjquota"); #endif + f2fs_show_quota_options(seq, sbi->sb); return 0; } @@ -919,6 +1147,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) #ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info ffi = sbi->fault_info; #endif +#ifdef CONFIG_QUOTA + int s_jquota_fmt; + char *s_qf_names[MAXQUOTAS]; + int i, j; +#endif /* * Save the old mount options in case we @@ -928,6 +1161,23 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) old_sb_flags = sb->s_flags; active_logs = sbi->active_logs; +#ifdef CONFIG_QUOTA + s_jquota_fmt = sbi->s_jquota_fmt; + for (i = 0; i < MAXQUOTAS; i++) { + if (sbi->s_qf_names[i]) { + s_qf_names[i] = kstrdup(sbi->s_qf_names[i], + GFP_KERNEL); + if (!s_qf_names[i]) { + for (j = 0; j < i; j++) + kfree(s_qf_names[j]); + return -ENOMEM; + } + } else { + s_qf_names[i] = NULL; + } + } +#endif + /* recover superblocks we couldn't write due to previous RO mount */ if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); @@ -1009,6 +1259,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_gc; } skip: +#ifdef CONFIG_QUOTA + /* Release old quota file names */ + for (i = 0; i < MAXQUOTAS; i++) + kfree(s_qf_names[i]); +#endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); @@ -1023,6 +1278,13 @@ restore_gc: stop_gc_thread(sbi); } restore_opts: +#ifdef CONFIG_QUOTA + sbi->s_jquota_fmt = s_jquota_fmt; + for (i = 0; i < MAXQUOTAS; i++) { + kfree(sbi->s_qf_names[i]); + sbi->s_qf_names[i] = s_qf_names[i]; + } +#endif sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; sb->s_flags = old_sb_flags; @@ -1139,6 +1401,27 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) return &F2FS_I(inode)->i_reserved_quota; } +static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) +{ + return dquot_quota_on_mount(sbi->sb, sbi->s_qf_names[type], + sbi->s_jquota_fmt, type); +} + +void f2fs_enable_quota_files(struct f2fs_sb_info *sbi) +{ + int i, ret; + + for (i = 0; i < MAXQUOTAS; i++) { + if (sbi->s_qf_names[i]) { + ret = f2fs_quota_on_mount(sbi, i); + if (ret < 0) + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on journaled " + "quota: error %d", ret); + } + } +} + static int f2fs_quota_sync(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); @@ -1220,7 +1503,7 @@ out_put: return err; } -static void f2fs_quota_off_umount(struct super_block *sb) +void f2fs_quota_off_umount(struct super_block *sb) { int type; @@ -1258,7 +1541,7 @@ static const struct quotactl_ops f2fs_quotactl_ops = { .get_nextdqblk = dquot_get_next_dqblk, }; #else -static inline void f2fs_quota_off_umount(struct super_block *sb) +void f2fs_quota_off_umount(struct super_block *sb) { } #endif @@ -2178,11 +2461,6 @@ try_onemore: if (err) goto free_nm; - /* if there are nt orphan nodes free them */ - err = recover_orphan_inodes(sbi); - if (err) - goto free_node_inode; - /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { @@ -2206,6 +2484,11 @@ try_onemore: if (err) goto free_root_inode; + /* if there are nt orphan nodes free them */ + err = recover_orphan_inodes(sbi); + if (err) + goto free_sysfs; + /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { /* @@ -2215,7 +2498,7 @@ try_onemore: if (bdev_read_only(sb->s_bdev) && !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { err = -EROFS; - goto free_sysfs; + goto free_meta; } if (need_fsck) @@ -2229,7 +2512,7 @@ try_onemore: need_fsck = true; f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%d", err); - goto free_sysfs; + goto free_meta; } } else { err = recover_fsync_data(sbi, true); @@ -2253,7 +2536,7 @@ skip_recovery: /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) - goto free_sysfs; + goto free_meta; } kfree(options); @@ -2271,8 +2554,16 @@ skip_recovery: f2fs_update_time(sbi, REQ_TIME); return 0; -free_sysfs: +free_meta: f2fs_sync_inode_meta(sbi); + /* + * Some dirty meta pages can be produced by recover_orphan_inodes() + * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg() + * followed by write_checkpoint() through f2fs_write_node_pages(), which + * falls into an infinite loop in sync_meta_pages(). + */ + truncate_inode_pages_final(META_MAPPING(sbi)); +free_sysfs: f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); @@ -2282,13 +2573,6 @@ free_node_inode: mutex_lock(&sbi->umount_mutex); release_ino_entry(sbi, true); f2fs_leave_shrinker(sbi); - /* - * Some dirty meta pages can be produced by recover_orphan_inodes() - * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg() - * followed by write_checkpoint() through f2fs_write_node_pages(), which - * falls into an infinite loop in sync_meta_pages(). - */ - truncate_inode_pages_final(META_MAPPING(sbi)); iput(sbi->node_inode); mutex_unlock(&sbi->umount_mutex); f2fs_destroy_stats(sbi); @@ -2308,6 +2592,10 @@ free_options: for (i = 0; i < NR_PAGE_TYPE; i++) kfree(sbi->write_io[i]); destroy_percpu_info(sbi); +#ifdef CONFIG_QUOTA + for (i = 0; i < MAXQUOTAS; i++) + kfree(sbi->s_qf_names[i]); +#endif kfree(options); free_sb_buf: kfree(raw_super); -- cgit From f2220c7f155c99392b307be3012aae976a503afe Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Wed, 9 Aug 2017 17:27:30 +0800 Subject: f2fs: merge equivalent flags F2FS_GET_BLOCK_[READ|DIO] Currently, the two flags F2FS_GET_BLOCK_[READ|DIO] are totally equivalent and can be used interchangably in all scenarios they are involved in. Neither of the flags is referenced in f2fs_map_blocks(), making them both the default case. To remove the ambiguity, this patch merges both flags into F2FS_GET_BLOCK_DEFAULT, and introduces an enum for all distinct flags. Signed-off-by: Qiuyang Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 13 +++++++------ fs/f2fs/file.c | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c43262dc36de..67da4f6eeaa0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1044,7 +1044,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DIO, NULL); + F2FS_GET_BLOCK_DEFAULT, NULL); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -1244,7 +1244,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_len = last_block - block_in_file; if (f2fs_map_blocks(inode, &map, 0, - F2FS_GET_BLOCK_READ)) + F2FS_GET_BLOCK_DEFAULT)) goto set_error_page; } got_it: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dddbe9269ceb..e252e5bf9791 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -510,12 +510,13 @@ struct f2fs_map_blocks { }; /* for flag in get_data_block */ -#define F2FS_GET_BLOCK_READ 0 -#define F2FS_GET_BLOCK_DIO 1 -#define F2FS_GET_BLOCK_FIEMAP 2 -#define F2FS_GET_BLOCK_BMAP 3 -#define F2FS_GET_BLOCK_PRE_DIO 4 -#define F2FS_GET_BLOCK_PRE_AIO 5 +enum { + F2FS_GET_BLOCK_DEFAULT, + F2FS_GET_BLOCK_FIEMAP, + F2FS_GET_BLOCK_BMAP, + F2FS_GET_BLOCK_PRE_DIO, + F2FS_GET_BLOCK_PRE_AIO, +}; /* * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e2b33b87701f..3eebd49c3348 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2074,7 +2074,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, */ while (map.m_lblk < pg_end) { map.m_len = pg_end - map.m_lblk; - err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); if (err) goto out; @@ -2116,7 +2116,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, do_map: map.m_len = pg_end - map.m_lblk; - err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); if (err) goto clear_out; -- cgit From afd2b4da40b3b567ef8d8e6881479345a2312a03 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 10 Aug 2017 17:35:04 -0700 Subject: f2fs: let fill_super handle roll-forward errors If we set CP_ERROR_FLAG in roll-forward error, f2fs is no longer to proceed any IOs due to f2fs_cp_error(). But, for example, if some stale data is involved on roll-forward process, we're able to get -ENOENT, getting fs stuck. If we get any error, let fill_super set SBI_NEED_FSCK and try to recover back to stable point. Cc: Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index a3d02613934a..f707d810c87d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -649,8 +649,6 @@ skip: } clear_sbi_flag(sbi, SBI_POR_DOING); - if (err) - set_ckpt_flags(sbi, CP_ERROR_FLAG); mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ -- cgit From 7f2b4e8ea5b49846f7d20f1694dbca81f88b8d50 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Aug 2017 19:09:08 +0800 Subject: f2fs: retry to revoke atomic commit in -ENOMEM case During atomic committing, if we encounter -ENOMEM in revoke path, it's better to give a chance to retry revoking. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 95267630c8c4..05144b3a7f62 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -213,9 +213,15 @@ static int __revoke_inmem_pages(struct inode *inode, struct node_info ni; trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); - +retry: set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) { + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto retry; + } err = -EAGAIN; goto next; } -- cgit From c56f16dab0dfc8a37bc6133f2c2a02ffb873648b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 11 Aug 2017 18:00:15 +0800 Subject: f2fs: add tracepoint for f2fs_gc This patch adds tracepoint for f2fs_gc. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e60480f71bb5..57bea2182f30 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -919,7 +919,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; - int sec_freed = 0; + int seg_freed = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; @@ -965,6 +965,10 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, gc_type); stat_inc_seg_count(sbi, type, gc_type); + + if (gc_type == FG_GC && + get_valid_blocks(sbi, segno, false) == 0) + seg_freed++; next: f2fs_put_page(sum_page, 0); } @@ -975,21 +979,17 @@ next: blk_finish_plug(&plug); - if (gc_type == FG_GC && - get_valid_blocks(sbi, start_segno, true) == 0) - sec_freed = 1; - stat_inc_call_count(sbi->stat_info); - return sec_freed; + return seg_freed; } int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, unsigned int segno) { int gc_type = sync ? FG_GC : BG_GC; - int sec_freed = 0; - int ret; + int sec_freed = 0, seg_freed = 0, total_freed = 0; + int ret = 0; struct cp_control cpc; unsigned int init_segno = segno; struct gc_inode_list gc_list = { @@ -997,6 +997,15 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, .iroot = RADIX_TREE_INIT(GFP_NOFS), }; + trace_f2fs_gc_begin(sbi->sb, sync, background, + get_pages(sbi, F2FS_DIRTY_NODES), + get_pages(sbi, F2FS_DIRTY_DENTS), + get_pages(sbi, F2FS_DIRTY_IMETA), + free_sections(sbi), + free_segments(sbi), + reserved_segments(sbi), + prefree_segments(sbi)); + cpc.reason = __get_cp_reason(sbi); gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { @@ -1023,17 +1032,20 @@ gc_more: gc_type = FG_GC; } - ret = -EINVAL; /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */ - if (gc_type == BG_GC && !background) + if (gc_type == BG_GC && !background) { + ret = -EINVAL; goto stop; - if (!__get_victim(sbi, &segno, gc_type)) + } + if (!__get_victim(sbi, &segno, gc_type)) { + ret = -ENODATA; goto stop; - ret = 0; + } - if (do_garbage_collect(sbi, segno, &gc_list, gc_type) && - gc_type == FG_GC) + seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type); + if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec) sec_freed++; + total_freed += seg_freed; if (gc_type == FG_GC) sbi->cur_victim_sec = NULL_SEGNO; @@ -1050,6 +1062,16 @@ gc_more: stop: SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0; SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno; + + trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed, + get_pages(sbi, F2FS_DIRTY_NODES), + get_pages(sbi, F2FS_DIRTY_DENTS), + get_pages(sbi, F2FS_DIRTY_IMETA), + free_sections(sbi), + free_segments(sbi), + reserved_segments(sbi), + prefree_segments(sbi)); + mutex_unlock(&sbi->gc_mutex); put_gc_inode(&gc_list); -- cgit From 125c9fb1ccb53eb2ea9380df40f3c743f3fb2fed Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 12 Aug 2017 21:33:23 -0700 Subject: f2fs: check hot_data for roll-forward recovery We need to check HOT_DATA to truncate any previous data block when doing roll-forward recovery. Cc: Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f707d810c87d..9626758bc762 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -317,7 +317,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, return 0; /* Get the previous summary */ - for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); if (curseg->segno == segno) { sum = curseg->sum_blk->entries[blkoff]; -- cgit From f24b150a63b208f4437efc7dc7a42954363641ca Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Mon, 14 Aug 2017 16:52:43 +0800 Subject: f2fs: remove unused function overprovision_sections Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7f700e54b77d..b9b4f85ffeb6 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -492,11 +492,6 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi) return SM_I(sbi)->ovp_segments; } -static inline int overprovision_sections(struct f2fs_sb_info *sbi) -{ - return GET_SEC_FROM_SEG(sbi, (unsigned int)overprovision_segments(sbi)); -} - static inline int reserved_sections(struct f2fs_sb_info *sbi) { return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); -- cgit From 969d1b180d987c2be02de890d0fff0f66a0e80de Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Aug 2017 23:09:56 +0800 Subject: f2fs: introduce discard_granularity sysfs entry Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables f2fs to issue 4K size discard in real-time discard mode. However, issuing smaller discard may cost more lifetime but releasing less free space in flash device. Since f2fs has ability of separating hot/cold data and garbage collection, we can expect that small-sized invalid region would expand soon with OPU, deletion or garbage collection on valid datas, so it's better to delay or skip issuing smaller size discards, it could help to reduce overmuch consumption of IO bandwidth and lifetime of flash storage. This patch makes f2fs selectng 64K size as its default minimal granularity, and issue discard with the size which is not smaller than minimal granularity. Also it exposes discard granularity as sysfs entry for configuration in different scenario. Jaegeuk Kim: We must issue all the accumulated discard commands when fstrim is called. So, I've added pend_list_tag[] to indicate whether we should issue the commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them. P_TRIM is set once at a time, given fstrim trigger. In addition, issue_discard_thread is calling too much due to the number of discard commands remaining in the pending list. I added a timer to control it likewise gc_thread. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++++ fs/f2fs/segment.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++--------- fs/f2fs/sysfs.c | 23 ++++++++++++++ 3 files changed, 112 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e252e5bf9791..4b993961d81d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -148,6 +148,8 @@ enum { (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DISCARD_ISSUE_RATE 8 +#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ +#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ @@ -196,11 +198,18 @@ struct discard_entry { unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ }; +/* default discard granularity of inner discard thread, unit: block count */ +#define DEFAULT_DISCARD_GRANULARITY 16 + /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ (MAX_PLIST_NUM - 1) : (blk_num - 1)) +#define P_ACTIVE 0x01 +#define P_TRIM 0x02 +#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM)) + enum { D_PREP, D_SUBMIT, @@ -236,11 +245,14 @@ struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ struct list_head entry_list; /* 4KB discard entry list */ struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ + unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ struct list_head wait_list; /* store on-flushing entries */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ + unsigned int discard_wake; /* to wake up discard thread */ struct mutex cmd_lock; unsigned int nr_discards; /* # of discards in the list */ unsigned int max_discards; /* max. discards to be issued */ + unsigned int discard_granularity; /* discard granularity */ unsigned int undiscard_blks; /* # of undiscard blocks */ atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 05144b3a7f62..1387925a0d83 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1016,32 +1016,65 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } -static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int i, iter = 0; + int iter = 0, issued = 0; + int i; mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); - for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + for (i = MAX_PLIST_NUM - 1; + i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { pend_list = &dcc->pend_list[i]; list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); - if (!issue_cond || is_idle(sbi)) + /* Hurry up to finish fstrim */ + if (dcc->pend_list_tag[i] & P_TRIM) { + __submit_discard_cmd(sbi, dc); + issued++; + continue; + } + + if (!issue_cond || is_idle(sbi)) { + issued++; __submit_discard_cmd(sbi, dc); + } if (issue_cond && iter++ > DISCARD_ISSUE_RATE) goto out; } + if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) + dcc->pend_list_tag[i] &= (~P_TRIM); } out: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); + + return issued; +} + +static void __drop_discard_cmd(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + int i; + + mutex_lock(&dcc->cmd_lock); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); + __remove_discard_cmd(sbi, dc); + } + } + mutex_unlock(&dcc->cmd_lock); } static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, @@ -1126,34 +1159,56 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { __issue_discard_cmd(sbi, false); + __drop_discard_cmd(sbi); __wait_discard_cmd(sbi, false); } +static void mark_discard_range_all(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int i; + + mutex_lock(&dcc->cmd_lock); + for (i = 0; i < MAX_PLIST_NUM; i++) + dcc->pend_list_tag[i] |= P_TRIM; + mutex_unlock(&dcc->cmd_lock); +} + static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; + unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + int issued; set_freezable(); do { - wait_event_interruptible(*q, kthread_should_stop() || - freezing(current) || - atomic_read(&dcc->discard_cmd_cnt)); + wait_event_interruptible_timeout(*q, + kthread_should_stop() || freezing(current) || + dcc->discard_wake, + msecs_to_jiffies(wait_ms)); if (try_to_freeze()) continue; if (kthread_should_stop()) return 0; + if (dcc->discard_wake) + dcc->discard_wake = 0; + sb_start_intwrite(sbi->sb); - __issue_discard_cmd(sbi, true); - __wait_discard_cmd(sbi, true); + issued = __issue_discard_cmd(sbi, true); + if (issued) { + __wait_discard_cmd(sbi, true); + wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + } else { + wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; + } sb_end_intwrite(sbi->sb); - congestion_wait(BLK_RW_SYNC, HZ/50); } while (!kthread_should_stop()); return 0; } @@ -1344,7 +1399,8 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *head = &dcc->entry_list; struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; @@ -1426,11 +1482,12 @@ skip: goto find_next; list_del(&entry->list); - SM_I(sbi)->dcc_info->nr_discards -= total_len; + dcc->nr_discards -= total_len; kmem_cache_free(discard_entry_slab, entry); } - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); + dcc->discard_wake = 1; + wake_up_interruptible_all(&dcc->discard_wait_queue); } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) @@ -1448,9 +1505,13 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; + dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; INIT_LIST_HEAD(&dcc->entry_list); - for (i = 0; i < MAX_PLIST_NUM; i++) + for (i = 0; i < MAX_PLIST_NUM; i++) { INIT_LIST_HEAD(&dcc->pend_list[i]); + if (i >= dcc->discard_granularity - 1) + dcc->pend_list_tag[i] |= P_ACTIVE; + } INIT_LIST_HEAD(&dcc->wait_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); @@ -2127,6 +2188,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) schedule(); } + /* It's time to issue all the filed discards */ + mark_discard_range_all(sbi); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c40e5d24df9f..4bcaa9059026 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -152,6 +152,27 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, spin_unlock(&sbi->stat_lock); return count; } + + if (!strcmp(a->attr.name, "discard_granularity")) { + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int i; + + if (t == 0 || t > MAX_PLIST_NUM) + return -EINVAL; + if (t == *ui) + return count; + + mutex_lock(&dcc->cmd_lock); + for (i = 0; i < MAX_PLIST_NUM; i++) { + if (i >= t - 1) + dcc->pend_list_tag[i] |= P_ACTIVE; + else + dcc->pend_list_tag[i] &= (~P_ACTIVE); + } + mutex_unlock(&dcc->cmd_lock); + return count; + } + *ui = t; if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) @@ -248,6 +269,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); @@ -290,6 +312,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), ATTR_LIST(max_small_discards), + ATTR_LIST(discard_granularity), ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), -- cgit From 5f656541ff6e4f58b4ab5b4ae59badb97a9ff749 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 15 Aug 2017 21:27:19 -0700 Subject: f2fs: issue discard commands if gc_urgent is set It's time to issue all the discard commands, if user sets the idle time. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++++- fs/f2fs/sysfs.c | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1387925a0d83..e3922f902c8c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -21,6 +21,7 @@ #include "f2fs.h" #include "segment.h" #include "node.h" +#include "gc.h" #include "trace.h" #include @@ -1194,8 +1195,11 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; - if (dcc->discard_wake) + if (dcc->discard_wake) { dcc->discard_wake = 0; + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + mark_discard_range_all(sbi); + } sb_start_intwrite(sbi->sb); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 4bcaa9059026..b9ad9041559f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -178,8 +178,13 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) f2fs_reset_iostat(sbi); if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + sbi->gc_thread->gc_wake = 1; wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); + + dcc->discard_wake = 1; + wake_up_interruptible_all(&dcc->discard_wait_queue); } return count; -- cgit From 6f890df0a7efe3181aceb5d8bcd4af7deb2abce5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 21 Aug 2017 22:53:45 +0800 Subject: f2fs: fix out-of-order execution in f2fs_issue_flush In f2fs_issue_flush, due to out-of-order execution of CPU, wake_up can be called before we insert issue_list, result in long latency of wait_for_completion. Fix this by adding smp_mb() to force the order of related codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e3922f902c8c..be1c49b9402b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -549,7 +549,10 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) atomic_inc(&fcc->issing_flush); llist_add(&cmd.llnode, &fcc->issue_list); - if (!fcc->dispatch_list) + /* update issue_list before we wake up issue_flush thread */ + smp_mb(); + + if (waitqueue_active(&fcc->flush_wait_queue)) wake_up(&fcc->flush_wait_queue); if (fcc->f2fs_issue_flush) { -- cgit From 84a23fbe96b4e307eb749046a74515329119b08d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 18 Aug 2017 23:37:36 +0800 Subject: f2fs: clear FI_HOT_DATA correctly This patch fixes to clear FI_HOT_DATA correctly in below path: - error handling in f2fs_ioc_start_atomic_write - after commit atomic write in f2fs_ioc_commit_atomic_write - after drop atomic write in drop_inmem_pages Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ fs/f2fs/segment.c | 1 + 2 files changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3eebd49c3348..8f0d32a57b52 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1640,6 +1640,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); goto out; } @@ -1678,6 +1679,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); if (!ret) { clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); stat_dec_atomic_write(inode); } } else { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index be1c49b9402b..8306beace7cb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -255,6 +255,7 @@ void drop_inmem_pages(struct inode *inode) mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); stat_dec_atomic_write(inode); } -- cgit From 0adf6a1b796777f5d19a9b7442172016aeb8020a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 18 Aug 2017 16:20:33 +0800 Subject: f2fs: trigger normal fsync for non-atomic_write file If file was not opened with atomic write mode, but user uses atomic write ioctl to fsync datas, in the flow, we should not fsync that file with atomic write mode. Fixes: 608514deba38 ("f2fs: set fsync mark only for the last dnode") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8f0d32a57b52..d74f1a9104a3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1683,7 +1683,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) stat_dec_atomic_write(inode); } } else { - ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, false); } err_out: inode_unlock(inode); -- cgit From adb6dc197187e2a5f5a7bed01e722f46a58676af Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 21 Aug 2017 13:51:32 -0700 Subject: f2fs: return error when accessing insane flie offset If file offset is insane, we have to return error instead of kernel panic. Reported-by: Eric Zhang Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9168c304fd58..2654c9166fba 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -555,7 +555,7 @@ static int get_node_path(struct inode *inode, long block, level = 3; goto got; } else { - BUG(); + return -E2BIG; } got: return level; @@ -579,6 +579,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) int err = 0; level = get_node_path(dn->inode, index, offset, noffset); + if (level < 0) + return level; nids[0] = dn->inode->i_ino; npage[0] = dn->inode_page; @@ -878,6 +880,8 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) trace_f2fs_truncate_inode_blocks_enter(inode, from); level = get_node_path(inode, from, offset, noffset); + if (level < 0) + return level; page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { -- cgit From 01983c715ad0e78842a885f361ad927a3a985994 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Aug 2017 21:15:43 -0700 Subject: f2fs: wake up discard_thread iff there is a candidate This patch fixes to avoid needless wake ups. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +-- fs/f2fs/segment.h | 25 +++++++++++++++++++++++++ fs/f2fs/sysfs.c | 6 +----- 3 files changed, 27 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8306beace7cb..8375257b6b26 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1494,8 +1494,7 @@ skip: kmem_cache_free(discard_entry_slab, entry); } - dcc->discard_wake = 1; - wake_up_interruptible_all(&dcc->discard_wait_queue); + wake_up_discard_thread(sbi, false); } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b9b4f85ffeb6..613b2fa7b1c1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -798,3 +798,28 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, wbc->nr_to_write = desired; return desired - nr_to_write; } + +static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + bool wakeup = false; + int i; + + if (force) + goto wake_up; + + mutex_lock(&dcc->cmd_lock); + for (i = MAX_PLIST_NUM - 1; + i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + if (!list_empty(&dcc->pend_list[i])) { + wakeup = true; + break; + } + } + mutex_unlock(&dcc->cmd_lock); + if (!wakeup) + return; +wake_up: + dcc->discard_wake = 1; + wake_up_interruptible_all(&dcc->discard_wait_queue); +} diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b9ad9041559f..962735dc9c63 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -178,13 +178,9 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) f2fs_reset_iostat(sbi); if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - sbi->gc_thread->gc_wake = 1; wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); - - dcc->discard_wake = 1; - wake_up_interruptible_all(&dcc->discard_wait_queue); + wake_up_discard_thread(sbi, true); } return count; -- cgit From 73ac2f4e8256b9605c84364011322f015b31f499 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Aug 2017 18:23:24 +0800 Subject: f2fs: fix to avoid race in between aio and gc We won't wait DIO synchronously when doing AIO, so there will be potential IO reorder in between AIO and GC, which will cause data corruption. This patch adds inode_dio_wait to serialize aio and data GC to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 57bea2182f30..cd147e7c71e8 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -875,6 +875,9 @@ next_step: continue; } locked = true; + + /* wait for all inflight aio data */ + inode_dio_wait(inode); } start_bidx = start_bidx_of_node(nofs, inode) -- cgit From 774e1b78a0f90a7e81f7a23d9ebfa8b8233c1ffa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Aug 2017 18:23:25 +0800 Subject: f2fs: trigger fdatasync for non-atomic_write file Sqlite only cares about synchronization of file data instead of other data unrelated attribute of inode, so in commit flow, call fdatasync is enough. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d74f1a9104a3..63e394907808 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1683,7 +1683,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) stat_dec_atomic_write(inode); } } else { - ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, false); + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } err_out: inode_unlock(inode); -- cgit From a298d57f960255d8618b86f403dee305d3c8a29c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Aug 2017 11:10:58 -0700 Subject: f2fs: don't need to update inode checksum for recovery This patch fixes "f2fs: support inode checksum". The recovered inode page will be rewritten with valid checksum. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2654c9166fba..388a00262a5f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2287,8 +2287,6 @@ retry: F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; - - f2fs_inode_chksum_set(sbi, ipage); } new_ni = old_ni; -- cgit From ee605234996627c4fe874ea580e36211fb2bf6d5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Aug 2017 16:54:51 -0700 Subject: f2fs: don't check inode's checksum if it was dirtied or writebacked If another thread already made the page dirtied or writebacked, we must avoid to verify checksum. If we got an error, we need to remove its uptodate as well. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 3 ++- fs/f2fs/node.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b4c401d456e7..c33b05aec1a1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -153,7 +153,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) struct f2fs_inode *ri; __u32 provided, calculated; - if (!f2fs_enable_inode_chksum(sbi, page)) + if (!f2fs_enable_inode_chksum(sbi, page) || + PageDirty(page) || PageWriteback(page)) return true; ri = &F2FS_NODE(page)->i; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 388a00262a5f..fca87835a1da 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1187,9 +1187,9 @@ page_hit: nid, nid_of_node(page), ino_of_node(page), ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(page)); - ClearPageUptodate(page); err = -EINVAL; out_err: + ClearPageUptodate(page); f2fs_put_page(page, 1); return ERR_PTR(err); } -- cgit From 11f5020d2f43a9a7b9b5cf22873e5a9a06a884f7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 30 Aug 2017 18:04:47 +0800 Subject: f2fs: update i_flags correctly f2fs enables hash-indexed directory by default, so we need to tag FS_INDEX_FL in inode::i_flags during directory creataion, in order to show correct status of inode in lsattr: Before: ------------------- /mnt/f2fs/dir/ After: -----------I------- /mnt/f2fs/dir/ Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e241c1b2c636..a4dab98c4b7b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -100,6 +100,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) F2FS_I(inode)->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); + if (S_ISDIR(inode->i_mode)) + F2FS_I(inode)->i_flags |= FS_INDEX_FL; + if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); -- cgit From 025d63a486aad611b20fa39184fc86e4b76d260e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 30 Aug 2017 18:04:48 +0800 Subject: f2fs: remove unneeded parameter of change_curseg allocate_segment_by_default is the only caller of change_curseg passing @reuse with 'false', but commit 763bfe1bc575 ("f2fs: remove reusing any prefree segments") removes the calling, after that, @reuse in change_curseg always be true, so, let's clean up the unneeded parameter. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8375257b6b26..d6c3f456ea51 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2012,7 +2012,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ -static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) +static void change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2033,12 +2033,10 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) curseg->alloc_type = SSR; __next_free_blkoff(sbi, curseg, 0); - if (reuse) { - sum_page = get_sum_page(sbi, new_segno); - sum_node = (struct f2fs_summary_block *)page_address(sum_page); - memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); - f2fs_put_page(sum_page, 1); - } + sum_page = get_sum_page(sbi, new_segno); + sum_node = (struct f2fs_summary_block *)page_address(sum_page); + memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); + f2fs_put_page(sum_page, 1); } static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) @@ -2102,7 +2100,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) - change_curseg(sbi, type, true); + change_curseg(sbi, type); else new_curseg(sbi, type, false); @@ -2455,7 +2453,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); @@ -2474,7 +2472,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = old_blkoff; } -- cgit From edd748e6c8e824a8281f8a8450f12c4a95ec61ee Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Aug 2017 18:56:05 +0800 Subject: f2fs: avoid race in between atomic_read & atomic_inc Previously, we will miss merging flush command during fsync due to below race condition: Thread A Thread B Thread C - f2fs_issue_flush - atomic_read(&issing_flush) - f2fs_issue_flush - atomic_read(&issing_flush) - f2fs_issue_flush - atomic_read(&issing_flush) - atomic_inc(&issing_flush) - atomic_inc(&issing_flush) - atomic_inc(&issing_flush) - submit_flush_wait - submit_flush_wait - submit_flush_wait It needs to use atomic_inc_return instead to avoid such race. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d6c3f456ea51..1215ca1bd4e2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -536,8 +536,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return ret; } - if (!atomic_read(&fcc->issing_flush)) { - atomic_inc(&fcc->issing_flush); + if (atomic_inc_return(&fcc->issing_flush) == 1) { ret = submit_flush_wait(sbi); atomic_dec(&fcc->issing_flush); @@ -547,7 +546,6 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) init_completion(&cmd.wait); - atomic_inc(&fcc->issing_flush); llist_add(&cmd.llnode, &fcc->issue_list); /* update issue_list before we wake up issue_flush thread */ -- cgit From d3238691ed5f7be29f7b8b7cf7c68bbb2924361d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Aug 2017 18:56:06 +0800 Subject: f2fs: fix to wake up all sleeping flusher In scenario of remount_ro vs flush, after flush_thread exits in ->remount_fs, flusher will only clean up golbal issue_list, but without waking up flushers waiting on that list, result in hang related user threads. In order to fix this issue, this patch enables the flusher to take charge of issue_flush thread: executes merged flush command, and wake up all sleeping flushers. Fixes: 5eba8c5d1fb3 ("f2fs: fix to access nullified flush_cmd_control pointer") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1215ca1bd4e2..265c3bc44f2d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -558,8 +558,27 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) wait_for_completion(&cmd.wait); atomic_dec(&fcc->issing_flush); } else { - llist_del_all(&fcc->issue_list); - atomic_set(&fcc->issing_flush, 0); + struct llist_node *list; + + list = llist_del_all(&fcc->issue_list); + if (!list) { + wait_for_completion(&cmd.wait); + atomic_dec(&fcc->issing_flush); + } else { + struct flush_cmd *tmp, *next; + + ret = submit_flush_wait(sbi); + + llist_for_each_entry_safe(tmp, next, list, llnode) { + if (tmp == &cmd) { + cmd.ret = ret; + atomic_dec(&fcc->issing_flush); + continue; + } + tmp->ret = ret; + complete(&tmp->wait); + } + } } return cmd.ret; -- cgit From f62fc9f97602d5f52fd574db80d95d71869bfe21 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 31 Aug 2017 15:06:24 +0530 Subject: f2fs: constify super_operations super_operations are not supposed to change at runtime. "struct super_block" working with super_operations provided by work with const super_operations. So mark the non-const structs as const Signed-off-by: Arvind Yadav Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 52083d662197..7730316c19c6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1546,7 +1546,7 @@ void f2fs_quota_off_umount(struct super_block *sb) } #endif -static struct super_operations f2fs_sops = { +static const struct super_operations f2fs_sops = { .alloc_inode = f2fs_alloc_inode, .drop_inode = f2fs_drop_inode, .destroy_inode = f2fs_destroy_inode, -- cgit From 2afce76a1151fe2f1104962c327d8f85047045e6 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Mon, 4 Sep 2017 11:10:18 +0800 Subject: Revert "f2fs: add a new function get_ssr_cost" This reverts commit b7b7c4cf1c9ef0272a65f1480457cbfdadcda19d. se->ckpt_valid_blocks will never be smaller than se->valid_blocks, so just remove get_ssr_cost. Signed-off-by: Yunlong Song Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index cd147e7c71e8..b226760afba8 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -277,20 +277,11 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi, valid_blocks * 2 : valid_blocks; } -static unsigned int get_ssr_cost(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - struct seg_entry *se = get_seg_entry(sbi, segno); - - return se->ckpt_valid_blocks > se->valid_blocks ? - se->ckpt_valid_blocks : se->valid_blocks; -} - static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, struct victim_sel_policy *p) { if (p->alloc_mode == SSR) - return get_ssr_cost(sbi, segno); + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) -- cgit From 1958593e4fa9fa9e3e2d03b27e72af314c2891be Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 5 Sep 2017 16:54:24 -0700 Subject: f2fs: introduce f2fs_encrypted_file for clean-up This patch replaces (f2fs_encrypted_inode() && S_ISREG()) with f2fs_encrypted_file(), which gives no functional change. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 +++++----- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 5 ++--- fs/f2fs/inline.c | 2 +- 5 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 67da4f6eeaa0..e6c683e7a10e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -581,7 +581,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, .encrypted_page = NULL, }; - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + if (f2fs_encrypted_file(inode)) return read_mapping_page(mapping, index, NULL); page = f2fs_grab_cache_page(mapping, index, for_write); @@ -786,7 +786,7 @@ alloc: static inline bool __force_buffered_io(struct inode *inode, int rw) { - return ((f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) || + return (f2fs_encrypted_file(inode) || (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || F2FS_I_SB(inode)->s_ndevs); } @@ -1157,7 +1157,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, struct fscrypt_ctx *ctx = NULL; struct bio *bio; - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + if (f2fs_encrypted_file(inode)) { ctx = fscrypt_get_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) return ERR_CAST(ctx); @@ -1345,7 +1345,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio) struct inode *inode = fio->page->mapping->host; gfp_t gfp_flags = GFP_NOFS; - if (!f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode)) + if (!f2fs_encrypted_file(inode)) return 0; /* wait for GCed encrypted page writeback */ @@ -1974,7 +1974,7 @@ repeat: f2fs_wait_on_page_writeback(page, DATA, false); /* wait for GCed encrypted page writeback */ - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + if (f2fs_encrypted_file(inode)) f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); if (len == PAGE_SIZE || PageUptodate(page)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4b993961d81d..417c28f01fd5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2939,6 +2939,11 @@ static inline bool f2fs_encrypted_inode(struct inode *inode) return file_is_encrypt(inode); } +static inline bool f2fs_encrypted_file(struct inode *inode) +{ + return f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode); +} + static inline void f2fs_set_encrypted_inode(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 63e394907808..f8cedaba7ce4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -106,7 +106,7 @@ mapped: f2fs_wait_on_page_writeback(page, DATA, false); /* wait for GCed encrypted page writeback */ - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + if (f2fs_encrypted_file(inode)) f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); out_sem: diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b226760afba8..6a12f33d0cdd 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -831,8 +831,7 @@ next_step: continue; /* if encrypted inode, let's go phase 3 */ - if (f2fs_encrypted_inode(inode) && - S_ISREG(inode->i_mode)) { + if (f2fs_encrypted_file(inode)) { add_gc_inode(gc_list, inode); continue; } @@ -873,7 +872,7 @@ next_step: start_bidx = start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + if (f2fs_encrypted_file(inode)) move_encrypted_block(inode, start_bidx, segno, off); else move_data_page(inode, start_bidx, gc_type, segno, off); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e63ab0d1f614..c133a4fdecf6 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -25,7 +25,7 @@ bool f2fs_may_inline_data(struct inode *inode) if (i_size_read(inode) > MAX_INLINE_DATA(inode)) return false; - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) + if (f2fs_encrypted_file(inode)) return false; return true; -- cgit From d4c759ee5faa51e0b0ee55d8a229ba5b80c4917e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 5 Sep 2017 17:04:35 -0700 Subject: f2fs: use generic terms used for encrypted block management This patch renames functions regarding to buffer management via META_MAPPING used for encrypted blocks especially. We can actually use them in generic way. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++--- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 13 +++++++++---- fs/f2fs/segment.c | 3 +-- 5 files changed, 15 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e6c683e7a10e..ee6801fdbdec 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1163,7 +1163,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, return ERR_CAST(ctx); /* wait the page to be moved by cleaning */ - f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); + f2fs_wait_on_block_writeback(sbi, blkaddr); } bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); @@ -1349,7 +1349,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio) return 0; /* wait for GCed encrypted page writeback */ - f2fs_wait_on_encrypted_page_writeback(fio->sbi, fio->old_blkaddr); + f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); retry_encrypt: fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, @@ -1975,7 +1975,7 @@ repeat: /* wait for GCed encrypted page writeback */ if (f2fs_encrypted_file(inode)) - f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); + f2fs_wait_on_block_writeback(sbi, blkaddr); if (len == PAGE_SIZE || PageUptodate(page)) return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 417c28f01fd5..91fb749686f2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2550,8 +2550,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); -void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, - block_t blkaddr); +void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr); void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f8cedaba7ce4..224379a9848c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -107,7 +107,7 @@ mapped: /* wait for GCed encrypted page writeback */ if (f2fs_encrypted_file(inode)) - f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); + f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6a12f33d0cdd..bfe6a8ccc3a0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -599,8 +599,12 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return true; } -static void move_encrypted_block(struct inode *inode, block_t bidx, - unsigned int segno, int off) +/* + * Move data block via META_MAPPING while keeping locked data page. + * This can be used to move blocks, aka LBAs, directly on disk. + */ +static void move_data_block(struct inode *inode, block_t bidx, + unsigned int segno, int off) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -873,9 +877,10 @@ next_step: start_bidx = start_bidx_of_node(nofs, inode) + ofs_in_node; if (f2fs_encrypted_file(inode)) - move_encrypted_block(inode, start_bidx, segno, off); + move_data_block(inode, start_bidx, segno, off); else - move_data_page(inode, start_bidx, gc_type, segno, off); + move_data_page(inode, start_bidx, gc_type, + segno, off); if (locked) { up_write(&fi->dio_rwsem[WRITE]); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 265c3bc44f2d..9e708e525ba8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2528,8 +2528,7 @@ void f2fs_wait_on_page_writeback(struct page *page, } } -void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, - block_t blkaddr) +void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; -- cgit From 13ba41e346170e594b7ce582561b3efa5b85f18f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 6 Sep 2017 21:04:44 -0700 Subject: f2fs: make get_lock_data_page to handle encrypted inode This patch refactors get_lock_data_page() to handle encryption case directly. In order to do that, it introduces common f2fs_submit_page_read(). Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 109 +++++++++++++++++++++++++++------------------------------ 1 file changed, 51 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ee6801fdbdec..95f30f0000b6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -456,6 +456,53 @@ out_fail: return err; } +static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, + unsigned nr_pages) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct fscrypt_ctx *ctx = NULL; + struct bio *bio; + + if (f2fs_encrypted_file(inode)) { + ctx = fscrypt_get_ctx(inode, GFP_NOFS); + if (IS_ERR(ctx)) + return ERR_CAST(ctx); + + /* wait the page to be moved by cleaning */ + f2fs_wait_on_block_writeback(sbi, blkaddr); + } + + bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); + if (!bio) { + if (ctx) + fscrypt_release_ctx(ctx); + return ERR_PTR(-ENOMEM); + } + f2fs_target_device(sbi, blkaddr, bio); + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = ctx; + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + return bio; +} + +/* This can handle encryption stuffs */ +static int f2fs_submit_page_read(struct inode *inode, struct page *page, + block_t blkaddr) +{ + struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1); + + if (IS_ERR(bio)) + return PTR_ERR(bio); + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + bio_put(bio); + return -EFAULT; + } + __submit_bio(F2FS_I_SB(inode), bio, DATA); + return 0; +} + static void __set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn = F2FS_NODE(dn->node_page); @@ -573,16 +620,6 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, struct page *page; struct extent_info ei = {0,0,0}; int err; - struct f2fs_io_info fio = { - .sbi = F2FS_I_SB(inode), - .type = DATA, - .op = REQ_OP_READ, - .op_flags = op_flags, - .encrypted_page = NULL, - }; - - if (f2fs_encrypted_file(inode)) - return read_mapping_page(mapping, index, NULL); page = f2fs_grab_cache_page(mapping, index, for_write); if (!page) @@ -623,9 +660,7 @@ got_it: return page; } - fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; - fio.page = page; - err = f2fs_submit_page_bio(&fio); + err = f2fs_submit_page_read(inode, page, dn.data_blkaddr); if (err) goto put_err; return page; @@ -1150,35 +1185,6 @@ out: return ret; } -static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct fscrypt_ctx *ctx = NULL; - struct bio *bio; - - if (f2fs_encrypted_file(inode)) { - ctx = fscrypt_get_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - return ERR_CAST(ctx); - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_block_writeback(sbi, blkaddr); - } - - bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); - return ERR_PTR(-ENOMEM); - } - f2fs_target_device(sbi, blkaddr, bio); - bio->bi_end_io = f2fs_read_end_io; - bio->bi_private = ctx; - - return bio; -} - /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -1275,12 +1281,11 @@ submit_and_realloc: bio = NULL; } if (bio == NULL) { - bio = f2fs_grab_bio(inode, block_nr, nr_pages); + bio = f2fs_grab_read_bio(inode, block_nr, nr_pages); if (IS_ERR(bio)) { bio = NULL; goto set_error_page; } - bio_set_op_attrs(bio, REQ_OP_READ, 0); } if (bio_add_page(bio, page, blocksize, 0) < blocksize) @@ -1989,21 +1994,9 @@ repeat: zero_user_segment(page, 0, PAGE_SIZE); SetPageUptodate(page); } else { - struct bio *bio; - - bio = f2fs_grab_bio(inode, blkaddr, 1); - if (IS_ERR(bio)) { - err = PTR_ERR(bio); - goto fail; - } - bio->bi_opf = REQ_OP_READ; - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_put(bio); - err = -EFAULT; + err = f2fs_submit_page_read(inode, page, blkaddr); + if (err) goto fail; - } - - __submit_bio(sbi, bio, DATA); lock_page(page); if (unlikely(page->mapping != mapping)) { -- cgit From 27161f13e3c3241944846ac24942a85cceda0a2c Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 7 Sep 2017 10:40:54 +0800 Subject: f2fs: avoid race in between read xattr & write xattr Thread A: Thread B: -f2fs_getxattr -lookup_all_xattrs -xnid = F2FS_I(inode)->i_xattr_nid; -f2fs_setxattr -__f2fs_setxattr -write_all_xattrs -truncate_xattr_node ... ... -write_checkpoint ... ... -alloc_nid <- nid reuse -get_node_page -f2fs_bug_on <- nid != node_footer->nid It's need a rw_sem to avoid the race Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 1 + fs/f2fs/xattr.c | 6 ++++++ 3 files changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 91fb749686f2..c0803b1873b3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -589,6 +589,7 @@ struct f2fs_inode_info { struct extent_tree *extent_tree; /* cached extent_tree entry */ struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ struct rw_semaphore i_mmap_sem; + struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */ int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7730316c19c6..89f61eb3d167 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -630,6 +630,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_rwsem(&fi->dio_rwsem[READ]); init_rwsem(&fi->dio_rwsem[WRITE]); init_rwsem(&fi->i_mmap_sem); + init_rwsem(&fi->i_xattr_sem); #ifdef CONFIG_QUOTA memset(&fi->i_dquot, 0, sizeof(fi->i_dquot)); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index ef3a3721ea6b..7c65540148f8 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -473,8 +473,10 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; + down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, &entry, &base_addr); + up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -503,7 +505,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) int error = 0; size_t rest = buffer_size; + down_read(&F2FS_I(inode)->i_xattr_sem); error = read_all_xattrs(inode, NULL, &base_addr); + up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -686,7 +690,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_lock_op(sbi); /* protect xattr_ver */ down_write(&F2FS_I(inode)->i_sem); + down_write(&F2FS_I(inode)->i_xattr_sem); err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); + up_write(&F2FS_I(inode)->i_xattr_sem); up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); -- cgit From 1eb1ef4a8e9f6a4d9c2c7a645aba21d2f8719728 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Sep 2017 12:03:23 -0700 Subject: f2fs: better to wait for fstrim completion In android, we'd better wait for fstrim completion instead of issuing the discard commands asynchronous. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9e708e525ba8..273cc645e502 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -1061,6 +1062,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) if (dcc->pend_list_tag[i] & P_TRIM) { __submit_discard_cmd(sbi, dc); issued++; + + if (fatal_signal_pending(current)) + break; continue; } @@ -1177,7 +1181,7 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } } -/* This comes from f2fs_put_super */ +/* This comes from f2fs_put_super and f2fs_trim_fs */ void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { __issue_discard_cmd(sbi, false); @@ -2212,6 +2216,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) } /* It's time to issue all the filed discards */ mark_discard_range_all(sbi); + f2fs_wait_discard_bios(sbi); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; -- cgit From b3a97a2a9a7b2d50bcf13d32857cd6f5695c6b65 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Sep 2017 11:11:04 -0700 Subject: f2fs: speed up gc_urgent mode with SSR This patch activates SSR in gc_urgent mode. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 15 +++++++++++++++ fs/f2fs/segment.h | 13 ------------- 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c0803b1873b3..9a7c90386947 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2511,6 +2511,7 @@ void destroy_node_manager_caches(void); /* * segment.c */ +bool need_SSR(struct f2fs_sb_info *sbi); void register_inmem_page(struct inode *inode, struct page *page); void drop_inmem_pages(struct inode *inode); void drop_inmem_page(struct inode *inode, struct page *page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 273cc645e502..7fd742f747ce 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -169,6 +169,21 @@ found: return result - size + __reverse_ffz(tmp); } +bool need_SSR(struct f2fs_sb_info *sbi) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + + if (test_opt(sbi, LFS)) + return false; + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + return true; + + return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + + 2 * reserved_sections(sbi)); +} + void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 613b2fa7b1c1..e0a6cc23ace3 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -497,19 +497,6 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); } -static inline bool need_SSR(struct f2fs_sb_info *sbi) -{ - int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); - int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); - - if (test_opt(sbi, LFS)) - return false; - - return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + - 2 * reserved_sections(sbi)); -} - static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed, int needed) { -- cgit From 0abd8e70d24b665dd00972d4a259e05528cbf4c6 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 11 Sep 2017 16:30:28 +0900 Subject: f2fs: clear radix tree dirty tag of pages whose dirty flag is cleared On a senario like writing out the first dirty page of the inode as the inline data, we only cleared dirty flags of the pages, but didn't clear the dirty tags of those pages in the radix tree. If we don't clear the dirty tags of the pages in the radix tree, the inodes which contain the pages will be marked with I_DIRTY_PAGES again and again, and writepages() for the inodes will be invoked in every writeback period. As a result, nothing will be done in every writepages() for the inodes and it will just consume CPU time meaninglessly. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 7 +++++++ fs/f2fs/inline.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 37f9c7f55605..c0c933ad43c8 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -705,6 +705,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + struct address_space *mapping = page_mapping(page); + unsigned long flags; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); @@ -735,6 +737,11 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, if (bit_pos == NR_DENTRY_IN_BLOCK && !truncate_hole(dir, page->index, page->index + 1)) { + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + clear_page_dirty_for_io(page); ClearPagePrivate(page); ClearPageUptodate(page); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c133a4fdecf6..8322e4e7bb3f 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -202,6 +202,8 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; struct dnode_of_data dn; + struct address_space *mapping = page_mapping(page); + unsigned long flags; int err; set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -223,6 +225,11 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) kunmap_atomic(src_addr); set_page_dirty(dn.inode_page); + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); -- cgit From ca7d802a7d8ee4c47dce9be86ef4b27e587086bb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Sep 2017 14:04:05 +0800 Subject: f2fs: detect dirty inode in evict_inode Add a bugon in f2fs_evict_inode to detect inconsistent status between inode cache and related node page cache. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c33b05aec1a1..50c88e37ed66 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -519,6 +519,9 @@ no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); + if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) + f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); + /* ino == 0, if f2fs_new_inode() was failed t*/ if (inode->i_ino) invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, -- cgit From 80647e5f4c728ecea7d9190c6e7163755ff6835c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Sep 2017 14:25:35 +0800 Subject: f2fs: fix to show correct discard_granularity in sysfs Fix below incorrect display when reading discard_granularity sysfs node. $ cat /sys/fs/f2fs//discard_granularity $ 16 $ echo 32 > /sys/fs/f2fs//discard_granularity $ cat /sys/fs/f2fs//discard_granularity $ 16 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 962735dc9c63..e2c258f717cd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -170,6 +170,8 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, dcc->pend_list_tag[i] &= (~P_ACTIVE); } mutex_unlock(&dcc->cmd_lock); + + *ui = t; return count; } -- cgit From e6c6de18f010d9a7d592f4044d2c30213cb3a7bc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Sep 2017 21:35:12 +0800 Subject: f2fs: hurry up to issue discard after io interruption Once we encounter I/O interruption during issuing discards, we will delay long time before next round, but if system status is I/O idle during the time, it may loses opportunity to issue discards. So this patch changes to hurry up to issue discard after io interruption. Besides, this patch also fixes to issue discards accurately with assigned rate. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7fd742f747ce..dedf0209d820 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1062,6 +1062,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) struct blk_plug plug; int iter = 0, issued = 0; int i; + bool io_interrupted = false; mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, @@ -1083,11 +1084,20 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) continue; } - if (!issue_cond || is_idle(sbi)) { + if (!issue_cond) { + __submit_discard_cmd(sbi, dc); issued++; + continue; + } + + if (is_idle(sbi)) { __submit_discard_cmd(sbi, dc); + issued++; + } else { + io_interrupted = true; } - if (issue_cond && iter++ > DISCARD_ISSUE_RATE) + + if (++iter >= DISCARD_ISSUE_RATE) goto out; } if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) @@ -1097,6 +1107,9 @@ out: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); + if (!issued && io_interrupted) + issued = -1; + return issued; } -- cgit