diff options
Diffstat (limited to 'fs/f2fs/file.c')
-rw-r--r-- | fs/f2fs/file.c | 452 |
1 files changed, 352 insertions, 100 deletions
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 517e112c8a9a..6b94f19b3fa8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -53,6 +53,11 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct dnode_of_data dn; int err; + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto err; + } + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -114,6 +119,7 @@ out_sem: out: sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); +err: return block_page_mkwrite_return(err); } @@ -138,27 +144,33 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) return 1; } -static inline bool need_do_checkpoint(struct inode *inode) +static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool need_cp = false; + enum cp_reason_type cp_reason = CP_NO_NEEDED; - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) - need_cp = true; + if (!S_ISREG(inode->i_mode)) + cp_reason = CP_NON_REGULAR; + else if (inode->i_nlink != 1) + cp_reason = CP_HARDLINK; else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) - need_cp = true; + cp_reason = CP_SB_NEED_CP; else if (file_wrong_pino(inode)) - need_cp = true; + cp_reason = CP_WRONG_PINO; else if (!space_for_roll_forward(sbi)) - need_cp = true; + cp_reason = CP_NO_SPC_ROLL; else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) - need_cp = true; + cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) - need_cp = true; - else if (sbi->active_logs == 2) - need_cp = true; + cp_reason = CP_FASTBOOT_MODE; + else if (F2FS_OPTION(sbi).active_logs == 2) + cp_reason = CP_SPEC_LOG_NUM; + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && + need_dentry_mark(sbi, inode->i_ino) && + exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) + cp_reason = CP_RECOVER_DIR; - return need_cp; + return cp_reason; } static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) @@ -193,7 +205,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t ino = inode->i_ino; int ret = 0; - bool need_cp = false; + enum cp_reason_type cp_reason = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, @@ -212,7 +224,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, clear_inode_flag(inode, FI_NEED_IPU); if (ret) { - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); return ret; } @@ -243,10 +255,10 @@ go_write: * sudden-power-off. */ down_read(&F2FS_I(inode)->i_sem); - need_cp = need_do_checkpoint(inode); + cp_reason = need_do_checkpoint(inode); up_read(&F2FS_I(inode)->i_sem); - if (need_cp) { + if (cp_reason) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); @@ -294,37 +306,43 @@ sync_nodes: remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - remove_ino_entry(sbi, ino, UPDATE_INO); - clear_inode_flag(inode, FI_UPDATE_WRITE); if (!atomic) - ret = f2fs_issue_flush(sbi); + ret = f2fs_issue_flush(sbi, inode->i_ino); + if (!ret) { + remove_ino_entry(sbi, ino, UPDATE_INO); + clear_inode_flag(inode, FI_UPDATE_WRITE); + remove_ino_entry(sbi, ino, FLUSH_INO); + } f2fs_update_time(sbi, REQ_TIME); out: - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); f2fs_trace_ios(NULL, 1); return ret; } int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) + return -EIO; return f2fs_do_sync_file(file, start, end, datasync, false); } static pgoff_t __get_first_dirty_index(struct address_space *mapping, pgoff_t pgofs, int whence) { - struct pagevec pvec; + struct page *page; int nr_pages; if (whence != SEEK_DATA) return 0; /* find first dirty page index */ - pagevec_init(&pvec, 0); - nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, - PAGECACHE_TAG_DIRTY, 1); - pgofs = nr_pages ? pvec.pages[0]->index : ULONG_MAX; - pagevec_release(&pvec); + nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY, + 1, &page); + if (!nr_pages) + return ULONG_MAX; + pgofs = page->index; + put_page(page); return pgofs; } @@ -443,6 +461,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file_inode(file); int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -455,26 +476,17 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) static int f2fs_file_open(struct inode *inode, struct file *filp) { - struct dentry *dir; + int err = fscrypt_file_open(inode, filp); + + if (err) + return err; + + filp->f_mode |= FMODE_NOWAIT; - if (f2fs_encrypted_inode(inode)) { - int ret = fscrypt_get_encryption_info(inode); - if (ret) - return -EACCES; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } - dir = dget_parent(file_dentry(filp)); - if (f2fs_encrypted_inode(d_inode(dir)) && - !fscrypt_has_permitted_context(d_inode(dir), inode)) { - dput(dir); - return -EPERM; - } - dput(dir); return dquot_file_open(inode, filp); } -int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +void truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_node *raw_node; @@ -517,7 +529,6 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) f2fs_update_time(sbi, REQ_TIME); trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, dn->ofs_in_node, nr_free); - return nr_free; } void truncate_data_blocks(struct dnode_of_data *dn) @@ -562,7 +573,6 @@ truncate_out: int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; @@ -571,7 +581,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); - free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= sbi->max_file_blocks) goto free_partial; @@ -629,6 +639,9 @@ int f2fs_truncate(struct inode *inode) { int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return 0; @@ -662,8 +675,17 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode *ri; unsigned int flags; + if (f2fs_has_extra_attr(inode) && + f2fs_sb_has_inode_crtime(inode->i_sb) && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = fi->i_crtime.tv_sec; + stat->btime.tv_nsec = fi->i_crtime.tv_nsec; + } + flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL); if (flags & FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; @@ -683,6 +705,12 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, STATX_ATTR_NODUMP); generic_fillattr(inode, stat); + + /* we need to show initial sectors used for inline_data/dentries */ + if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || + f2fs_has_inline_dentry(inode)) + stat->blocks += (stat->size + 511) >> 9; + return 0; } @@ -722,10 +750,17 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) int err; bool size_changed = false; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + err = setattr_prepare(dentry, attr); if (err) return err; + err = fscrypt_prepare_setattr(dentry, attr); + if (err) + return err; + if (is_quota_modification(inode, attr)) { err = dquot_initialize(inode); if (err) @@ -741,14 +776,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - if (f2fs_encrypted_inode(inode)) { - err = fscrypt_get_encryption_info(inode); - if (err) - return err; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } - if (attr->ia_size <= i_size_read(inode)) { down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); @@ -774,6 +801,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_mtime = inode->i_ctime = current_time(inode); } + down_write(&F2FS_I(inode)->i_sem); + F2FS_I(inode)->last_disk_size = i_size_read(inode); + up_write(&F2FS_I(inode)->i_sem); + size_changed = true; } @@ -844,7 +875,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); if (err) { if (err == -ENOENT) { - pg_start++; + pg_start = get_next_page_offset(&dn, pg_start); continue; } return err; @@ -1081,11 +1112,13 @@ static int __exchange_data_block(struct inode *src_inode, while (len) { olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); - src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); + src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), + sizeof(block_t) * olen, GFP_KERNEL); if (!src_blkaddr) return -ENOMEM; - do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL); + do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), + sizeof(int) * olen, GFP_KERNEL); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; @@ -1153,17 +1186,20 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out; + goto out_unlock; truncate_pagecache(inode, offset); ret = f2fs_do_collapse(inode, pg_start, pg_end); if (ret) - goto out; + goto out_unlock; /* write out all moved pages, if possible */ filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -1175,9 +1211,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = truncate_blocks(inode, new_size, true); if (!ret) f2fs_i_size_write(inode, new_size); - -out: +out_unlock: up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } @@ -1315,8 +1351,12 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, } out: - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } out_sem: up_write(&F2FS_I(inode)->i_mmap_sem); @@ -1348,6 +1388,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) @@ -1387,6 +1430,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_i_size_write(inode, new_size); out: up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } @@ -1394,7 +1438,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; + struct f2fs_map_blocks map = { .m_next_pgofs = NULL, + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; @@ -1434,8 +1479,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; } - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } return err; } @@ -1446,6 +1495,9 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* f2fs only support ->fallocate for regular file */ if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -1479,8 +1531,6 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); - if (mode & FALLOC_FL_KEEP_SIZE) - file_set_keep_isize(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1668,6 +1718,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) inode_lock(inode); + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + if (f2fs_is_volatile_file(inode)) goto err_out; @@ -1686,6 +1738,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } err_out: + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1805,14 +1858,20 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) switch (in) { case F2FS_GOING_DOWN_FULLSYNC: sb = freeze_bdev(sb->s_bdev); - if (sb && !IS_ERR(sb)) { + if (IS_ERR(sb)) { + ret = PTR_ERR(sb); + goto out; + } + if (sb) { f2fs_stop_checkpoint(sbi, false); thaw_bdev(sb->s_bdev, sb); } break; case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ - f2fs_sync_fs(sb, 1); + ret = f2fs_sync_fs(sb, 1); + if (ret) + goto out; f2fs_stop_checkpoint(sbi, false); break; case F2FS_GOING_DOWN_NOSYNC: @@ -1826,6 +1885,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) ret = -EINVAL; goto out; } + + stop_gc_thread(sbi); + stop_discard_thread(sbi); + + drop_discard_cmd(sbi); + clear_opt(sbi, DISCARD); + f2fs_update_time(sbi, REQ_TIME); out: mnt_drop_write_file(filp); @@ -1882,6 +1948,9 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + if (!f2fs_sb_has_encrypt(inode->i_sb)) + return -EOPNOTSUPP; + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); @@ -1889,6 +1958,8 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { + if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) + return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -1898,16 +1969,18 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; - if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) - goto got_it; - err = mnt_want_write_file(filp); if (err) return err; + down_write(&sbi->sb_lock); + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + /* update superblock with uuid */ generate_random_uuid(sbi->raw_super->encrypt_pw_salt); @@ -1915,15 +1988,16 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) { /* undo new data */ memset(sbi->raw_super->encrypt_pw_salt, 0, 16); - mnt_drop_write_file(filp); - return err; + goto out_err; } - mnt_drop_write_file(filp); got_it: if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 16)) - return -EFAULT; - return 0; + err = -EFAULT; +out_err: + up_write(&sbi->sb_lock); + mnt_drop_write_file(filp); + return err; } static int f2fs_ioc_gc(struct file *filp, unsigned long arg) @@ -1984,8 +2058,10 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) return ret; end = range.start + range.len; - if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) - return -EINVAL; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + ret = -EINVAL; + goto out; + } do_more: if (!range.sync) { if (!mutex_trylock(&sbi->gc_mutex)) { @@ -2032,9 +2108,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_defragment *range) { struct inode *inode = file_inode(filp); - struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; + struct f2fs_map_blocks map = { .m_next_extent = NULL, + .m_seg_type = NO_CHECK_TYPE }; struct extent_info ei = {0,0,0}; - pgoff_t pg_start, pg_end; + pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; block_t blk_end = 0; @@ -2042,7 +2119,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, int err; /* if in-place-update policy is enabled, don't waste time here */ - if (need_inplace_update_policy(inode, NULL)) + if (should_update_inplace(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; @@ -2068,6 +2145,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } map.m_lblk = pg_start; + map.m_next_pgofs = &next_pgofs; /* * lookup mapping info in dnode page cache, skip defragmenting if all @@ -2081,14 +2159,16 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, goto out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { - map.m_lblk++; + map.m_lblk = next_pgofs; continue; } - if (blk_end && blk_end != map.m_pblk) { + if (blk_end && blk_end != map.m_pblk) fragmented = true; - break; - } + + /* record total count of block that we're going to move */ + total += map.m_len; + blk_end = map.m_pblk + map.m_len; map.m_lblk += map.m_len; @@ -2097,10 +2177,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, if (!fragmented) goto out; - map.m_lblk = pg_start; - map.m_len = pg_end - pg_start; - - sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); + sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); /* * make sure there are enough free section for LFS allocation, this can @@ -2112,6 +2189,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, goto out; } + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; + total = 0; + while (map.m_lblk < pg_end) { pgoff_t idx; int cnt = 0; @@ -2123,7 +2204,7 @@ do_map: goto clear_out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { - map.m_lblk++; + map.m_lblk = next_pgofs; continue; } @@ -2244,9 +2325,13 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } inode_lock(src); + down_write(&F2FS_I(src)->dio_rwsem[WRITE]); if (src != dst) { - if (!inode_trylock(dst)) { - ret = -EBUSY; + ret = -EBUSY; + if (!inode_trylock(dst)) + goto out; + if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) { + inode_unlock(dst); goto out; } } @@ -2306,9 +2391,12 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } f2fs_unlock_op(sbi); out_unlock: - if (src != dst) + if (src != dst) { + up_write(&F2FS_I(dst)->dio_rwsem[WRITE]); inode_unlock(dst); + } out: + up_write(&F2FS_I(src)->dio_rwsem[WRITE]); inode_unlock(src); return ret; } @@ -2622,8 +2710,130 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) return 0; } +int f2fs_pin_file_control(struct inode *inode, bool inc) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* Use i_gc_failures for normal file as a risk signal. */ + if (inc) + f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); + + if (fi->i_gc_failures > sbi->gc_pin_file_threshold) { + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: Enable GC = ino %lx after %x GC trials\n", + __func__, inode->i_ino, fi->i_gc_failures); + clear_inode_flag(inode, FI_PIN_FILE); + return -EAGAIN; + } + return 0; +} + +static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 pin; + int ret = 0; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (get_user(pin, (__u32 __user *)arg)) + return -EFAULT; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + inode_lock(inode); + + if (should_update_outplace(inode, NULL)) { + ret = -EINVAL; + goto out; + } + + if (!pin) { + clear_inode_flag(inode, FI_PIN_FILE); + F2FS_I(inode)->i_gc_failures = 1; + goto done; + } + + if (f2fs_pin_file_control(inode, false)) { + ret = -EAGAIN; + goto out; + } + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto out; + + set_inode_flag(inode, FI_PIN_FILE); + ret = F2FS_I(inode)->i_gc_failures; +done: + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); +out: + inode_unlock(inode); + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 pin = 0; + + if (is_inode_flag_set(inode, FI_PIN_FILE)) + pin = F2FS_I(inode)->i_gc_failures; + return put_user(pin, (u32 __user *)arg); +} + +int f2fs_precache_extents(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_map_blocks map; + pgoff_t m_next_extent; + loff_t end; + int err; + + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return -EOPNOTSUPP; + + map.m_lblk = 0; + map.m_next_pgofs = NULL; + map.m_next_extent = &m_next_extent; + map.m_seg_type = NO_CHECK_TYPE; + end = F2FS_I_SB(inode)->max_file_blocks; + + while (map.m_lblk < end) { + map.m_len = end - map.m_lblk; + + down_write(&fi->dio_rwsem[WRITE]); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); + up_write(&fi->dio_rwsem[WRITE]); + if (err) + return err; + + map.m_lblk = m_next_extent; + } + + return err; +} + +static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) +{ + return f2fs_precache_extents(file_inode(filp)); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) + return -EIO; + switch (cmd) { case F2FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); @@ -2669,6 +2879,12 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_fsgetxattr(filp, arg); case F2FS_IOC_FSSETXATTR: return f2fs_ioc_fssetxattr(filp, arg); + case F2FS_IOC_GET_PIN_FILE: + return f2fs_ioc_get_pin_file(filp, arg); + case F2FS_IOC_SET_PIN_FILE: + return f2fs_ioc_set_pin_file(filp, arg); + case F2FS_IOC_PRECACHE_EXTENTS: + return f2fs_ioc_precache_extents(filp, arg); default: return -ENOTTY; } @@ -2681,24 +2897,57 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct blk_plug plug; ssize_t ret; - inode_lock(inode); + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } + ret = generic_write_checks(iocb, from); if (ret > 0) { + bool preallocated = false; + size_t target_size = 0; int err; if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - inode_unlock(inode); - return err; + if ((iocb->ki_flags & IOCB_NOWAIT) && + (iocb->ki_flags & IOCB_DIRECT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, + iov_iter_count(from)) || + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, WRITE)) { + inode_unlock(inode); + return -EAGAIN; + } + + } else { + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + return err; + } } blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); + /* if we couldn't write data, we should deallocate blocks. */ + if (preallocated && i_size_read(inode) < target_size) + f2fs_truncate(inode); + if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } @@ -2740,6 +2989,9 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_FEATURES: case F2FS_IOC_FSGETXATTR: case F2FS_IOC_FSSETXATTR: + case F2FS_IOC_GET_PIN_FILE: + case F2FS_IOC_SET_PIN_FILE: + case F2FS_IOC_PRECACHE_EXTENTS: break; default: return -ENOIOCTLCMD; |