diff options
Diffstat (limited to 'fs/f2fs/file.c')
-rw-r--r-- | fs/f2fs/file.c | 973 |
1 files changed, 658 insertions, 315 deletions
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1761ad125f97..6bd3de64f2a8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -8,7 +8,6 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/stat.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/falloc.h> @@ -54,11 +53,11 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - bool need_alloc = true; + bool need_alloc = !f2fs_is_pinned_file(inode); int err = 0; vm_fault_t ret; @@ -86,7 +85,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { - int ret = f2fs_is_compressed_cluster(inode, page->index); + int ret = f2fs_is_compressed_cluster(inode, folio->index); if (ret < 0) { err = ret; @@ -106,34 +105,33 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); - lock_page(page); - if (unlikely(page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode) || - !PageUptodate(page))) { - unlock_page(page); + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) > i_size_read(inode) || + !folio_test_uptodate(folio))) { + folio_unlock(folio); err = -EFAULT; goto out_sem; } + set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_alloc) { /* block allocation */ - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_block_locked(&dn, page->index); - } - -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (!need_alloc) { - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_block_locked(&dn, folio->index); + } else { + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); f2fs_put_dnode(&dn); + if (f2fs_is_pinned_file(inode) && + !__is_valid_data_blkaddr(dn.data_blkaddr)) + err = -EIO; } -#endif + if (err) { - unlock_page(page); + folio_unlock(folio); goto out_sem; } - f2fs_wait_on_page_writeback(page, DATA, false, true); + f2fs_folio_wait_writeback(folio, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -141,18 +139,18 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) /* * check to see if the page is mapped already (no holes) */ - if (PageMappedToDisk(page)) + if (folio_test_mappedtodisk(folio)) goto out_sem; /* page is wholly or partially inside EOF */ - if (((loff_t)(page->index + 1) << PAGE_SHIFT) > + if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > i_size_read(inode)) { loff_t offset; offset = i_size_read(inode) & ~PAGE_MASK; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); } - set_page_dirty(page); + folio_mark_dirty(folio); f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); f2fs_update_time(sbi, REQ_TIME); @@ -164,7 +162,7 @@ out_sem: out: ret = vmf_fs_error(err); - trace_f2fs_vm_page_mkwrite(inode, page->index, vmf->vma->vm_flags, ret); + trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); return ret; } @@ -186,7 +184,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - *pino = parent_ino(dentry); + *pino = d_parent_ino(dentry); dput(dentry); return 1; } @@ -219,18 +217,22 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; + else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + XATTR_DIR_INO)) + cp_reason = CP_XATTR_DIR; return cp_reason; } static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) { - struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino); bool ret = false; /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino)) + if ((!IS_ERR(i) && folio_test_dirty(i)) || + f2fs_need_inode_block_update(sbi, ino)) ret = true; - f2fs_put_page(i, 0); + f2fs_folio_put(i, false); return ret; } @@ -259,7 +261,6 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, - .for_reclaim = 0, }; unsigned int seq_id = 0; @@ -374,8 +375,7 @@ sync_nodes: f2fs_remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - if ((!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) || - (atomic && !test_opt(sbi, NOBARRIER) && f2fs_sb_has_blkzoned(sbi))) + if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ret = f2fs_issue_flush(sbi, inode->i_ino); if (!ret) { f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); @@ -403,7 +403,7 @@ static bool __found_offset(struct address_space *mapping, bool compressed_cluster = false; if (f2fs_compressed_file(inode)) { - block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, + block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); compressed_cluster = first_blkaddr == COMPRESS_ADDR; @@ -432,7 +432,7 @@ static bool __found_offset(struct address_space *mapping, static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); struct dnode_of_data dn; pgoff_t pgofs, end_offset; loff_t data_ofs = offset; @@ -473,7 +473,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) } } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; @@ -514,10 +514,7 @@ fail: static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; - - if (f2fs_compressed_file(inode)) - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); switch (whence) { case SEEK_SET: @@ -555,6 +552,45 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +static int finish_preallocate_blocks(struct inode *inode) +{ + int ret = 0; + bool opened; + + f2fs_down_read(&F2FS_I(inode)->i_sem); + opened = is_inode_flag_set(inode, FI_OPENED_FILE); + f2fs_up_read(&F2FS_I(inode)->i_sem); + if (opened) + return 0; + + inode_lock(inode); + if (is_inode_flag_set(inode, FI_OPENED_FILE)) + goto out_unlock; + + if (!file_should_truncate(inode)) + goto out_update; + + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + filemap_invalidate_lock(inode->i_mapping); + + truncate_setsize(inode, i_size_read(inode)); + ret = f2fs_truncate(inode); + + filemap_invalidate_unlock(inode->i_mapping); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (ret) + goto out_unlock; + + file_dont_truncate(inode); +out_update: + f2fs_down_write(&F2FS_I(inode)->i_sem); + set_inode_flag(inode, FI_OPENED_FILE); + f2fs_up_write(&F2FS_I(inode)->i_sem); +out_unlock: + inode_unlock(inode); + return ret; +} + static int f2fs_file_open(struct inode *inode, struct file *filp) { int err = fscrypt_file_open(inode, filp); @@ -569,10 +605,14 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; + filp->f_mode |= FMODE_NOWAIT; filp->f_mode |= FMODE_CAN_ODIRECT; - return dquot_file_open(inode, filp); + err = dquot_file_open(inode, filp); + if (err) + return err; + + return finish_preallocate_blocks(inode); } void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -584,8 +624,11 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); + block_t blkstart; + int blklen = 0; - addr = get_dnode_addr(dn->inode, dn->node_page) + ofs; + addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs; + blkstart = le32_to_cpu(*addr); /* Assumption: truncation starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { @@ -601,26 +644,44 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) } if (blkaddr == NULL_ADDR) - continue; + goto next; f2fs_set_data_blkaddr(dn, NULL_ADDR); if (__is_valid_data_blkaddr(blkaddr)) { if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) - continue; + goto next; if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - continue; + goto next; if (compressed_cluster) valid_blocks++; } - f2fs_invalidate_blocks(sbi, blkaddr); + if (blkstart + blklen == blkaddr) { + blklen++; + } else { + f2fs_invalidate_blocks(sbi, blkstart, blklen); + blkstart = blkaddr; + blklen = 1; + } if (!released || blkaddr != COMPRESS_ADDR) nr_free++; + + continue; + +next: + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + + blkstart = le32_to_cpu(*(addr + 1)); + blklen = 0; } + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + if (compressed_cluster) f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); @@ -630,7 +691,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) * once we invalidate valid blkaddr in range [ofs, ofs + count], * we will invalidate all blkaddr in the whole range. */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), + fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), dn->inode) + ofs; f2fs_update_read_extent_cache_range(dn, fofs, 0, len); f2fs_update_age_extent_cache_range(dn, fofs, len); @@ -649,31 +710,33 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, loff_t offset = from & (PAGE_SIZE - 1); pgoff_t index = from >> PAGE_SHIFT; struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; if (!offset && !cache_only) return 0; if (cache_only) { - page = find_lock_page(mapping, index); - if (page && PageUptodate(page)) + folio = filemap_lock_folio(mapping, index); + if (IS_ERR(folio)) + return 0; + if (folio_test_uptodate(folio)) goto truncate_out; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return 0; } - page = f2fs_get_lock_data_page(inode, index, true); - if (IS_ERR(page)) - return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); + folio = f2fs_get_lock_data_folio(inode, index, true); + if (IS_ERR(folio)) + return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio); truncate_out: - f2fs_wait_on_page_writeback(page, DATA, true, true); - zero_user(page, offset, PAGE_SIZE - offset); + f2fs_folio_wait_writeback(folio, DATA, true, true); + folio_zero_segment(folio, offset, folio_size(folio)); /* An encrypted inode should have a key and truncate the last page. */ f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); if (!cache_only) - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); return 0; } @@ -683,11 +746,16 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; - struct page *ipage; + struct folio *ifolio; bool truncate_page = false; trace_f2fs_truncate_blocks_enter(inode, from); + if (IS_DEVICE_ALIASING(inode) && from) { + err = -EINVAL; + goto out_err; + } + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= max_file_blocks(inode)) @@ -696,20 +764,33 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) if (lock) f2fs_lock_op(sbi); - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); + goto out; + } + + if (IS_DEVICE_ALIASING(inode)) { + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_info ei = et->largest; + + f2fs_invalidate_blocks(sbi, ei.blk, ei.len); + + dec_valid_block_count(sbi, inode, ei.len); + f2fs_update_time(sbi, REQ_TIME); + + f2fs_folio_put(ifolio, true); goto out; } if (f2fs_has_inline_data(inode)) { - f2fs_truncate_inline_inode(inode, ipage, from); - f2fs_put_page(ipage, 1); + f2fs_truncate_inline_inode(inode, ifolio, from); + f2fs_folio_put(ifolio, true); truncate_page = true; goto out; } - set_new_dnode(&dn, inode, ipage, NULL, 0); + set_new_dnode(&dn, inode, ifolio, NULL, 0); err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); if (err) { if (err == -ENOENT) @@ -717,12 +798,12 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) goto out; } - count = ADDRS_PER_PAGE(dn.node_page, inode); + count = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count -= dn.ofs_in_node; f2fs_bug_on(sbi, count < 0); - if (dn.ofs_in_node || IS_INODE(dn.node_page)) { + if (dn.ofs_in_node || IS_INODE(&dn.node_folio->page)) { f2fs_truncate_data_blocks_range(&dn, count); free_from += count; } @@ -737,7 +818,7 @@ free_partial: /* lastly zero out the first data page */ if (!err) err = truncate_partial_data_page(inode, from, truncate_page); - +out_err: trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -826,6 +907,12 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) return true; if (f2fs_compressed_file(inode)) return true; + /* + * only force direct read to use buffered IO, for direct write, + * it expects inline data conversion before committing IO. + */ + if (f2fs_has_inline_data(inode) && rw == READ) + return true; /* disallow direct IO if any of devices has unaligned blksize */ if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) @@ -834,7 +921,8 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. */ - if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE)) + if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && + !f2fs_is_pinned_file(inode)) return true; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) return true; @@ -923,10 +1011,8 @@ static void __setattr_copy(struct mnt_idmap *idmap, inode_set_ctime_to_ts(inode, attr->ia_ctime); if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; - vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); - if (!vfsgid_in_group_p(vfsgid) && - !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) + if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; set_acl_inode(inode, mode); } @@ -939,6 +1025,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct f2fs_inode_info *fi = F2FS_I(inode); int err; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) @@ -952,9 +1039,15 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ATTR_GID | ATTR_TIMES_SET)))) return -EPERM; - if ((attr->ia_valid & ATTR_SIZE) && - !f2fs_is_compress_backend_ready(inode)) - return -EOPNOTSUPP; + if ((attr->ia_valid & ATTR_SIZE)) { + if (!f2fs_is_compress_backend_ready(inode) || + IS_DEVICE_ALIASING(inode)) + return -EOPNOTSUPP; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(fi->i_cluster_size))) + return -EINVAL; + } err = setattr_prepare(idmap, dentry, attr); if (err) @@ -1006,7 +1099,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; } - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + /* + * wait for inflight dio, blocks should be removed after + * IO completion. + */ + if (attr->ia_size < old_size) + inode_dio_wait(inode); + + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_setsize(inode, attr->ia_size); @@ -1018,14 +1118,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, * larger than i_size. */ filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; - spin_lock(&F2FS_I(inode)->i_size_lock); + spin_lock(&fi->i_size_lock); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); - F2FS_I(inode)->last_disk_size = i_size_read(inode); - spin_unlock(&F2FS_I(inode)->i_size_lock); + fi->last_disk_size = i_size_read(inode); + spin_unlock(&fi->i_size_lock); } __setattr_copy(idmap, inode, attr); @@ -1035,7 +1135,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) - inode->i_mode = F2FS_I(inode)->i_acl_mode; + inode->i_mode = fi->i_acl_mode; clear_inode_flag(inode, FI_ACL_MODE); } } @@ -1064,7 +1164,7 @@ static int fill_zero(struct inode *inode, pgoff_t index, loff_t start, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; + struct folio *folio; if (!len) return 0; @@ -1072,16 +1172,16 @@ static int fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - page = f2fs_get_new_data_page(inode, NULL, index, false); + folio = f2fs_get_new_data_folio(inode, NULL, index, false); f2fs_unlock_op(sbi); - if (IS_ERR(page)) - return PTR_ERR(page); + if (IS_ERR(folio)) + return PTR_ERR(folio); - f2fs_wait_on_page_writeback(page, DATA, true, true); - zero_user(page, start, len); - set_page_dirty(page); - f2fs_put_page(page, 1); + f2fs_folio_wait_writeback(folio, DATA, true, true); + folio_zero_range(folio, start, len); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); return 0; } @@ -1104,7 +1204,7 @@ int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) return err; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); @@ -1199,7 +1299,7 @@ next_dnode: goto next; } - done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - + done = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = f2fs_data_blkaddr(&dn); @@ -1247,7 +1347,7 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); if (ret) { dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, *blkaddr); + f2fs_invalidate_blocks(sbi, *blkaddr, 1); } else { f2fs_update_data_blkaddr(&dn, *blkaddr); } @@ -1288,7 +1388,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } ilen = min((pgoff_t) - ADDRS_PER_PAGE(dn.node_page, dst_inode) - + ADDRS_PER_PAGE(&dn.node_folio->page, dst_inode) - dn.ofs_in_node, len - i); do { dn.data_blkaddr = f2fs_data_blkaddr(&dn); @@ -1313,23 +1413,26 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_dnode(&dn); } else { - struct page *psrc, *pdst; + struct folio *fsrc, *fdst; - psrc = f2fs_get_lock_data_page(src_inode, + fsrc = f2fs_get_lock_data_folio(src_inode, src + i, true); - if (IS_ERR(psrc)) - return PTR_ERR(psrc); - pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i, + if (IS_ERR(fsrc)) + return PTR_ERR(fsrc); + fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i, true); - if (IS_ERR(pdst)) { - f2fs_put_page(psrc, 1); - return PTR_ERR(pdst); + if (IS_ERR(fdst)) { + f2fs_folio_put(fsrc, true); + return PTR_ERR(fdst); } - memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); - set_page_dirty(pdst); - set_page_private_gcing(pdst); - f2fs_put_page(pdst, 1); - f2fs_put_page(psrc, 1); + + f2fs_folio_wait_writeback(fdst, DATA, true, true); + + memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); + folio_mark_dirty(fdst); + set_page_private_gcing(&fdst->page); + f2fs_folio_put(fdst, true); + f2fs_folio_put(fsrc, true); ret = f2fs_truncate_hole(src_inode, src + i, src + i + 1); @@ -1496,7 +1599,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, break; } - f2fs_invalidate_blocks(sbi, dn->data_blkaddr); + f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); f2fs_set_data_blkaddr(dn, NEW_ADDR); } @@ -1575,7 +1678,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, goto out; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); end = min(pg_end, end_offset - dn.ofs_in_node + index); ret = f2fs_do_zero_range(&dn, index, end); @@ -1736,17 +1839,32 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, map.m_len = sec_blks; next_alloc: - if (has_not_enough_free_secs(sbi, 0, + f2fs_down_write(&sbi->pin_sem); + + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (has_not_enough_free_secs(sbi, 0, 0)) { + f2fs_up_write(&sbi->pin_sem); + err = -ENOSPC; + f2fs_warn_ratelimited(sbi, + "ino:%lu, start:%lu, end:%lu, need to trigger GC to " + "reclaim enough free segment when checkpoint is enabled", + inode->i_ino, pg_start, pg_end); + goto out_err; + } + } + + if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ? + ZONED_PIN_SEC_REQUIRED_COUNT : GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { f2fs_down_write(&sbi->gc_lock); stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); - if (err && err != -ENODATA) + if (err && err != -ENODATA) { + f2fs_up_write(&sbi->pin_sem); goto out_err; + } } - f2fs_down_write(&sbi->pin_sem); - err = f2fs_allocate_pinning_section(sbi); if (err) { f2fs_up_write(&sbi->pin_sem); @@ -1806,7 +1924,7 @@ static long f2fs_fallocate(struct file *file, int mode, return -EIO; if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) return -ENOSPC; - if (!f2fs_is_compress_backend_ready(inode)) + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) return -EOPNOTSUPP; /* f2fs only support ->fallocate for regular file */ @@ -1817,15 +1935,6 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; - /* - * Pinned file should not support partial truncation since the block - * can be used by applications. - */ - if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && - (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | - FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) - return -EOPNOTSUPP; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) @@ -1833,10 +1942,27 @@ static long f2fs_fallocate(struct file *file, int mode, inode_lock(inode); + /* + * Pinned file should not support partial truncation since the block + * can be used by applications. + */ + if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { + ret = -EOPNOTSUPP; + goto out; + } + ret = file_modified(file); if (ret) goto out; + /* + * wait for inflight dio, blocks should be removed after IO + * completion. + */ + inode_dio_wait(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; @@ -1938,15 +2064,15 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (err) return err; - f2fs_down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&fi->i_sem); if (!f2fs_may_compress(inode) || (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return -EINVAL; } err = set_compress_context(inode); - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); if (err) return err; @@ -2065,10 +2191,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) struct mnt_idmap *idmap = file_mnt_idmap(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode *pinode; loff_t isize; int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2098,6 +2226,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) goto out; f2fs_down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[READ]); /* * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -2107,37 +2236,33 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; /* Check if the inode already has a COW inode */ if (fi->cow_inode == NULL) { /* Create a COW inode for atomic write */ - pinode = f2fs_iget(inode->i_sb, fi->i_pino); - if (IS_ERR(pinode)) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - ret = PTR_ERR(pinode); - goto out; - } + struct dentry *dentry = file_dentry(filp); + struct inode *dir = d_inode(dentry->d_parent); - ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode); - iput(pinode); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); + if (ret) + goto out_unlock; set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); + + /* Set the COW inode's atomic_inode to the atomic inode */ + F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ + f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); + + invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); + ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; } f2fs_write_inode(inode, NULL); @@ -2156,7 +2281,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) } f2fs_i_size_write(fi->cow_inode, isize); +out_unlock: + f2fs_up_write(&fi->i_gc_rwsem[READ]); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + if (ret) + goto out; f2fs_update_time(sbi, REQ_TIME); fi->atomic_write_task = current; @@ -2174,6 +2303,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2206,6 +2338,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2224,34 +2359,13 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) return ret; } -static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly, bool need_lock) { - struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; - __u32 in; int ret = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (get_user(in, (__u32 __user *)arg)) - return -EFAULT; - - if (in != F2FS_GOING_DOWN_FULLSYNC) { - ret = mnt_want_write_file(filp); - if (ret) { - if (ret == -EROFS) { - ret = 0; - f2fs_stop_checkpoint(sbi, false, - STOP_CP_REASON_SHUTDOWN); - trace_f2fs_shutdown(sbi, in, ret); - } - return ret; - } - } - - switch (in) { + switch (flag) { case F2FS_GOING_DOWN_FULLSYNC: ret = bdev_freeze(sb->s_bdev); if (ret) @@ -2290,18 +2404,120 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) goto out; } + if (readonly) + goto out; + + /* + * grab sb->s_umount to avoid racing w/ remount() and other shutdown + * paths. + */ + if (need_lock) + down_write(&sbi->sb->s_umount); + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); + if (need_lock) + up_write(&sbi->sb->s_umount); + f2fs_update_time(sbi, REQ_TIME); out: - if (in != F2FS_GOING_DOWN_FULLSYNC) + + trace_f2fs_shutdown(sbi, flag, ret); + + return ret; +} + +static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + __u32 in; + int ret; + bool need_drop = false, readonly = false; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__u32 __user *)arg)) + return -EFAULT; + + if (in != F2FS_GOING_DOWN_FULLSYNC) { + ret = mnt_want_write_file(filp); + if (ret) { + if (ret != -EROFS) + return ret; + + /* fallback to nosync shutdown for readonly fs */ + in = F2FS_GOING_DOWN_NOSYNC; + readonly = true; + } else { + need_drop = true; + } + } + + ret = f2fs_do_shutdown(sbi, in, readonly, true); + + if (need_drop) mnt_drop_write_file(filp); - trace_f2fs_shutdown(sbi, in, ret); + return ret; +} + +static int f2fs_keep_noreuse_range(struct inode *inode, + loff_t offset, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); + u64 start, end; + int ret = 0; + + if (!S_ISREG(inode->i_mode)) + return 0; + + if (offset >= max_bytes || len > max_bytes || + (offset + len) > max_bytes) + return 0; + + start = offset >> PAGE_SHIFT; + end = DIV_ROUND_UP(offset + len, PAGE_SIZE); + + inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + inode_unlock(inode); + return 0; + } + + spin_lock(&sbi->inode_lock[DONATE_INODE]); + /* let's remove the range, if len = 0 */ + if (!len) { + if (!list_empty(&F2FS_I(inode)->gdonate_list)) { + list_del_init(&F2FS_I(inode)->gdonate_list); + sbi->donate_files--; + if (is_inode_flag_set(inode, FI_DONATE_FINISHED)) + ret = -EALREADY; + else + set_inode_flag(inode, FI_DONATE_FINISHED); + } else + ret = -ENOENT; + } else { + if (list_empty(&F2FS_I(inode)->gdonate_list)) { + list_add_tail(&F2FS_I(inode)->gdonate_list, + &sbi->inode_list[DONATE_INODE]); + sbi->donate_files++; + } else { + list_move_tail(&F2FS_I(inode)->gdonate_list, + &sbi->inode_list[DONATE_INODE]); + } + F2FS_I(inode)->donate_start = start; + F2FS_I(inode)->donate_end = end - 1; + clear_inode_flag(inode, FI_DONATE_FINISHED); + } + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + inode_unlock(inode); return ret; } @@ -2354,13 +2570,14 @@ static bool uuid_is_nonzero(__u8 u[16]) static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + int ret; if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) return -EOPNOTSUPP; + ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - - return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); + return ret; } static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) @@ -2607,14 +2824,16 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, bool fragmented = false; int err; - pg_start = range->start >> PAGE_SHIFT; - pg_end = (range->start + range->len) >> PAGE_SHIFT; - f2fs_balance_fs(sbi, true); inode_lock(inode); + pg_start = range->start >> PAGE_SHIFT; + pg_end = min_t(pgoff_t, + (range->start + range->len) >> PAGE_SHIFT, + DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || + f2fs_is_atomic_file(inode)) { err = -EINVAL; goto unlock_out; } @@ -2627,8 +2846,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } /* writeback all dirty pages in the range */ - err = filemap_write_and_wait_range(inode->i_mapping, range->start, - range->start + range->len - 1); + err = filemap_write_and_wait_range(inode->i_mapping, + pg_start << PAGE_SHIFT, + (pg_end << PAGE_SHIFT) - 1); if (err) goto out; @@ -2637,7 +2857,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * block addresses are continuous. */ if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { - if (ei.fofs + ei.len >= pg_end) + if ((pgoff_t)ei.fofs + ei.len >= pg_end) goto out; } @@ -2712,17 +2932,19 @@ do_map: idx = map.m_lblk; while (idx < map.m_lblk + map.m_len && cnt < BLKS_PER_SEG(sbi)) { - struct page *page; + struct folio *folio; - page = f2fs_get_lock_data_page(inode, idx, true); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_lock_data_folio(inode, idx, true); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto clear_out; } - set_page_dirty(page); - set_page_private_gcing(page); - f2fs_put_page(page, 1); + f2fs_folio_wait_writeback(folio, DATA, true, true); + + folio_mark_dirty(folio); + set_page_private_gcing(&folio->page); + f2fs_folio_put(folio, true); idx++; cnt++; @@ -2761,7 +2983,7 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode)) + if (!S_ISREG(inode->i_mode)) return -EINVAL; if (f2fs_readonly(sbi->sb)) @@ -2786,7 +3008,8 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) err = f2fs_defragment_range(sbi, filp, &range); mnt_drop_write_file(filp); - f2fs_update_time(sbi, REQ_TIME); + if (range.len) + f2fs_update_time(sbi, REQ_TIME); if (err < 0) return err; @@ -2837,11 +3060,17 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out; } - if (f2fs_compressed_file(src) || f2fs_compressed_file(dst)) { + if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || + f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { ret = -EOPNOTSUPP; goto out_unlock; } + if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { + ret = -EINVAL; + goto out_unlock; + } + ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) goto out_unlock; @@ -2893,9 +3122,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } f2fs_lock_op(sbi); - ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, - pos_out >> F2FS_BLKSIZE_BITS, - len >> F2FS_BLKSIZE_BITS, false); + ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), + F2FS_BYTES_TO_BLK(pos_out), + F2FS_BYTES_TO_BLK(len), false); if (!ret) { if (dst_max_i_size) @@ -2931,32 +3160,27 @@ out: static int __f2fs_ioc_move_range(struct file *filp, struct f2fs_move_range *range) { - struct fd dst; int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) return -EBADF; - dst = fdget(range->dst_fd); - if (!dst.file) + CLASS(fd, dst)(range->dst_fd); + if (fd_empty(dst)) return -EBADF; - if (!(dst.file->f_mode & FMODE_WRITE)) { - err = -EBADF; - goto err_out; - } + if (!(fd_file(dst)->f_mode & FMODE_WRITE)) + return -EBADF; err = mnt_want_write_file(filp); if (err) - goto err_out; + return err; - err = f2fs_move_file_range(filp, range->pos_in, dst.file, + err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), range->pos_out, range->len); mnt_drop_write_file(filp); -err_out: - fdput(dst); return err; } @@ -3189,18 +3413,20 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - /* Use i_gc_failures for normal file as a risk signal. */ - if (inc) - f2fs_i_gc_failures_write(inode, - fi->i_gc_failures[GC_FAILURE_PIN] + 1); + if (IS_DEVICE_ALIASING(inode)) + return -EINVAL; - if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { + if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", - __func__, inode->i_ino, - fi->i_gc_failures[GC_FAILURE_PIN]); + __func__, inode->i_ino, fi->i_gc_failures); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } + + /* Use i_gc_failures for normal file as a risk signal. */ + if (inc) + f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); + return 0; } @@ -3220,12 +3446,20 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (!pin && IS_DEVICE_ALIASING(inode)) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + ret = -EINVAL; + goto out; + } + if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); @@ -3234,7 +3468,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) goto done; } - if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { + if (F2FS_HAS_BLOCKS(inode)) { ret = -EFBIG; goto out; } @@ -3261,7 +3495,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) } set_inode_flag(inode, FI_PIN_FILE); - ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + ret = F2FS_I(inode)->i_gc_failures; done: f2fs_update_time(sbi, REQ_TIME); out: @@ -3276,10 +3510,33 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) __u32 pin = 0; if (is_inode_flag_set(inode, FI_PIN_FILE)) - pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + pin = F2FS_I(inode)->i_gc_failures; return put_user(pin, (u32 __user *)arg); } +static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) +{ + return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, + (u32 __user *)arg); +} + +static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 level; + + if (get_user(level, (__u32 __user *)arg)) + return -EFAULT; + + if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX) + return -EINVAL; + + inode_lock(inode); + F2FS_I(inode)->ioprio_hint = level; + inode_unlock(inode); + return 0; +} + int f2fs_precache_extents(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); @@ -3466,7 +3723,7 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) int i; for (i = 0; i < count; i++) { - blkaddr = data_blkaddr(dn->inode, dn->node_page, + blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) @@ -3513,6 +3770,7 @@ next: static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int released_blocks = 0; @@ -3522,9 +3780,6 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3543,7 +3798,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -3552,7 +3808,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) goto out; - if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + if (!atomic_read(&fi->i_compr_blocks)) { ret = -EPERM; goto out; } @@ -3561,7 +3817,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3570,9 +3826,12 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3582,14 +3841,16 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); ret = release_compress_blocks(&dn, count); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -3598,8 +3859,10 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); out: + if (released_blocks) + f2fs_update_time(sbi, REQ_TIME); inode_unlock(inode); mnt_drop_write_file(filp); @@ -3607,14 +3870,14 @@ out: if (ret >= 0) { ret = put_user(released_blocks, (u64 __user *)arg); } else if (released_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " "iblocks=%llu, released=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, released_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; @@ -3629,7 +3892,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, int i; for (i = 0; i < count; i++) { - blkaddr = data_blkaddr(dn->inode, dn->node_page, + blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) @@ -3641,11 +3904,12 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, while (count) { int compr_blocks = 0; - blkcnt_t reserved; + blkcnt_t reserved = 0; + blkcnt_t to_reserved; int ret; for (i = 0; i < cluster_size; i++) { - blkaddr = data_blkaddr(dn->inode, dn->node_page, + blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); if (i == 0) { @@ -3661,20 +3925,26 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, * fails in release_compress_blocks(), so NEW_ADDR * is a possible case. */ - if (blkaddr == NEW_ADDR || - __is_valid_data_blkaddr(blkaddr)) { + if (blkaddr == NEW_ADDR) { + reserved++; + continue; + } + if (__is_valid_data_blkaddr(blkaddr)) { compr_blocks++; continue; } } - reserved = cluster_size - compr_blocks; + to_reserved = cluster_size - compr_blocks - reserved; /* for the case all blocks in cluster were reserved */ - if (reserved == 1) + if (reserved && to_reserved == 1) { + dn->ofs_in_node += cluster_size; goto next; + } - ret = inc_valid_block_count(sbi, dn->inode, &reserved, false); + ret = inc_valid_block_count(sbi, dn->inode, + &to_reserved, false); if (unlikely(ret)) return ret; @@ -3685,7 +3955,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); - *reserved_blocks += reserved; + *reserved_blocks += to_reserved; next: count -= cluster_size; } @@ -3696,6 +3966,7 @@ next: static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int reserved_blocks = 0; @@ -3704,9 +3975,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3718,15 +3986,16 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) inode_lock(inode); - if (!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto unlock_inode; } - if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) + if (atomic_read(&fi->i_compr_blocks)) goto unlock_inode; - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3735,9 +4004,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3747,14 +4019,16 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); ret = reserve_compress_blocks(&dn, count, &reserved_blocks); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -3762,7 +4036,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (!ret) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); @@ -3770,20 +4044,22 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) f2fs_mark_inode_dirty_sync(inode, true); } unlock_inode: + if (reserved_blocks) + f2fs_update_time(sbi, REQ_TIME); inode_unlock(inode); mnt_drop_write_file(filp); if (!ret) { ret = put_user(reserved_blocks, (u64 __user *)arg); } else if (reserved_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " + f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " "iblocks=%llu, reserved=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, reserved_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; @@ -3846,7 +4122,9 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) return -EOPNOTSUPP; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || @@ -3905,7 +4183,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) goto out; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count = min(end_offset - dn.ofs_in_node, pg_end - index); for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { struct block_device *cur_bdev; @@ -3966,12 +4244,13 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) if (len) ret = f2fs_secure_erase(prev_bdev, inode, prev_index, prev_block, len, range.flags); + f2fs_update_time(sbi, REQ_TIME); out: filemap_invalidate_unlock(mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); err: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4006,6 +4285,7 @@ static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_comp_option option; int ret = 0; @@ -4025,7 +4305,9 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) option.algorithm >= COMPRESS_MAX) return -EINVAL; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); f2fs_down_write(&F2FS_I(inode)->i_sem); @@ -4044,27 +4326,27 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) goto out; } - F2FS_I(inode)->i_compress_algorithm = option.algorithm; - F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size; - F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size); + fi->i_compress_algorithm = option.algorithm; + fi->i_log_cluster_size = option.log_cluster_size; + fi->i_cluster_size = BIT(option.log_cluster_size); /* Set default level */ - if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) - F2FS_I(inode)->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + if (fi->i_compress_algorithm == COMPRESS_ZSTD) + fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; else - F2FS_I(inode)->i_compress_level = 0; + fi->i_compress_level = 0; /* Adjust mount option level */ if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && F2FS_OPTION(sbi).compress_level) - F2FS_I(inode)->i_compress_level = F2FS_OPTION(sbi).compress_level; + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; f2fs_mark_inode_dirty_sync(inode, true); if (!f2fs_is_compress_backend_ready(inode)) f2fs_warn(sbi, "compression algorithm is successfully set, " "but current kernel doesn't support this algorithm."); out: - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4073,32 +4355,36 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; pgoff_t redirty_idx = page_idx; - int i, page_len = 0, ret = 0; + int page_len = 0, ret = 0; page_cache_ra_unbounded(&ractl, len, 0); - for (i = 0; i < len; i++, page_idx++) { - page = read_cache_page(mapping, page_idx, NULL, NULL); - if (IS_ERR(page)) { - ret = PTR_ERR(page); + do { + folio = read_cache_folio(mapping, page_idx, NULL, NULL); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); break; } - page_len++; - } + page_len += folio_nr_pages(folio) - (page_idx - folio->index); + page_idx = folio_next_index(folio); + } while (page_len < len); - for (i = 0; i < page_len; i++, redirty_idx++) { - page = find_lock_page(mapping, redirty_idx); + do { + folio = filemap_lock_folio(mapping, redirty_idx); - /* It will never fail, when page has pinned above */ - f2fs_bug_on(F2FS_I_SB(inode), !page); + /* It will never fail, when folio has pinned above */ + f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio)); - set_page_dirty(page); - set_page_private_gcing(page); - f2fs_put_page(page, 1); - f2fs_put_page(page, 0); - } + f2fs_folio_wait_writeback(folio, DATA, true, true); + + folio_mark_dirty(folio); + set_page_private_gcing(&folio->page); + redirty_idx = folio_next_index(folio); + folio_unlock(folio); + folio_put_refs(folio, 2); + } while (redirty_idx < page_idx); return ret; } @@ -4108,9 +4394,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = fi->i_cluster_size; - int count, ret; + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4119,12 +4404,11 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4132,7 +4416,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4145,10 +4430,15 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; + + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + if (!f2fs_is_compressed_cluster(inode, page_idx)) + continue; + + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4158,9 +4448,6 @@ static int f2fs_ioc_decompress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4175,9 +4462,10 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (ret) f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", __func__, ret); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4186,9 +4474,9 @@ static int f2fs_ioc_compress_file(struct file *filp) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = F2FS_I(inode)->i_cluster_size; - int count, ret; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4197,12 +4485,11 @@ static int f2fs_ioc_compress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4210,7 +4497,8 @@ static int f2fs_ioc_compress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4222,10 +4510,15 @@ static int f2fs_ioc_compress_file(struct file *filp) set_inode_flag(inode, FI_ENABLE_COMPRESS); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; + + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (f2fs_is_sparse_cluster(inode, page_idx)) + continue; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4235,9 +4528,6 @@ static int f2fs_ioc_compress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4254,9 +4544,10 @@ static int f2fs_ioc_compress_file(struct file *filp) if (ret) f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", __func__, ret); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4347,6 +4638,10 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_decompress_file(filp); case F2FS_IOC_COMPRESS_FILE: return f2fs_ioc_compress_file(filp); + case F2FS_IOC_GET_DEV_ALIAS_FILE: + return f2fs_ioc_get_dev_alias_file(filp, arg); + case F2FS_IOC_IO_PRIO: + return f2fs_ioc_io_prio(filp, arg); default: return -ENOTTY; } @@ -4437,6 +4732,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_down_read(&fi->i_gc_rwsem[READ]); } + /* dio is not compatible w/ atomic file */ + if (f2fs_is_atomic_file(inode)) { + f2fs_up_read(&fi->i_gc_rwsem[READ]); + ret = -EOPNOTSUPP; + goto out; + } + /* * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of * the higher-level function iomap_dio_rw() in order to ensure that the @@ -4496,6 +4798,11 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + /* In LFS mode, if there is inflight dio, wait for its completion */ + if (f2fs_lfs_mode(F2FS_I_SB(inode)) && + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) + inode_dio_wait(inode); + if (f2fs_should_use_dio(inode, iocb, to)) { ret = f2fs_dio_read_iter(iocb, to); } else { @@ -4610,9 +4917,11 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, else return 0; - map.m_may_create = true; + if (!IS_DEVICE_ALIASING(inode)) + map.m_may_create = true; if (dio) { - map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, + inode->i_write_hint); flag = F2FS_GET_BLOCK_PRE_DIO; } else { map.m_seg_type = NO_CHECK_TYPE; @@ -4660,8 +4969,21 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, return 0; } +static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, + struct bio *bio, loff_t file_offset) +{ + struct inode *inode = iter->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); + enum temp_type temp = f2fs_get_segment_temp(sbi, type); + + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); + submit_bio(bio); +} + static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { - .end_io = f2fs_dio_write_end_io, + .end_io = f2fs_dio_write_end_io, + .submit_io = f2fs_dio_write_submit_io, }; static void f2fs_flush_buffered_write(struct address_space *mapping, @@ -4798,6 +5120,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) bool dio; bool may_need_sync = true; int preallocated; + const loff_t pos = iocb->ki_pos; + const ssize_t count = iov_iter_count(from); ssize_t ret; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { @@ -4819,6 +5143,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); } + if (f2fs_is_pinned_file(inode) && + !f2fs_overwrite_io(inode, pos, count)) { + ret = -EIO; + goto out_unlock; + } + ret = f2fs_write_checks(iocb, from); if (ret <= 0) goto out_unlock; @@ -4826,6 +5156,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Determine whether we will do a direct write or a buffered write. */ dio = f2fs_should_use_dio(inode, iocb, from); + /* dio is not compatible w/ atomic write */ + if (dio && f2fs_is_atomic_file(inode)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + /* Possibly preallocate the blocks for the write. */ target_size = iocb->ki_pos + iov_iter_count(from); preallocated = f2fs_preallocate_blocks(iocb, from, dio); @@ -4906,11 +5242,15 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, } err = generic_fadvise(filp, offset, len, advice); - if (!err && advice == POSIX_FADV_DONTNEED && - test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && - f2fs_compressed_file(inode)) - f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); + if (err) + return err; + if (advice == POSIX_FADV_DONTNEED && + (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && + f2fs_compressed_file(inode))) + f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); + else if (advice == POSIX_FADV_NOREUSE) + err = f2fs_keep_noreuse_range(inode, offset, len); return err; } @@ -5019,6 +5359,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_SET_COMPRESS_OPTION: case F2FS_IOC_DECOMPRESS_FILE: case F2FS_IOC_COMPRESS_FILE: + case F2FS_IOC_GET_DEV_ALIAS_FILE: + case F2FS_IOC_IO_PRIO: break; default: return -ENOIOCTLCMD; @@ -5045,4 +5387,5 @@ const struct file_operations f2fs_file_operations = { .splice_read = f2fs_file_splice_read, .splice_write = iter_file_splice_write, .fadvise = f2fs_file_fadvise, + .fop_flags = FOP_BUFFER_RASYNC, }; |