diff options
Diffstat (limited to 'fs/f2fs')
-rw-r--r-- | fs/f2fs/Kconfig | 3 | ||||
-rw-r--r-- | fs/f2fs/acl.c | 8 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 107 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 252 | ||||
-rw-r--r-- | fs/f2fs/data.c | 837 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 124 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 178 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 176 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 617 | ||||
-rw-r--r-- | fs/f2fs/file.c | 846 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 297 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 35 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 100 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 153 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 136 | ||||
-rw-r--r-- | fs/f2fs/node.c | 128 | ||||
-rw-r--r-- | fs/f2fs/node.h | 4 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 76 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 984 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 144 | ||||
-rw-r--r-- | fs/f2fs/super.c | 523 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 182 | ||||
-rw-r--r-- | fs/f2fs/verity.c | 29 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 14 |
24 files changed, 3618 insertions, 2335 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 68a1e23e1557..5916a02fb46d 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -4,8 +4,7 @@ config F2FS_FS depends on BLOCK select BUFFER_HEAD select NLS - select CRYPTO - select CRYPTO_CRC32 + select CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION select FS_ENCRYPTION_ALGS if FS_ENCRYPTION select FS_IOMAP diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index ec2aeccb69a3..1fbc0607363b 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -219,8 +219,7 @@ static int f2fs_acl_update_mode(struct mnt_idmap *idmap, return error; if (error == 0) *acl = NULL; - if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) && - !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) + if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; *mode_p = mode; return 0; @@ -297,9 +296,8 @@ static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, struct posix_acl *clone = NULL; if (acl) { - int size = sizeof(struct posix_acl) + acl->a_count * - sizeof(struct posix_acl_entry); - clone = kmemdup(acl, size, flags); + clone = kmemdup(acl, struct_size(acl, a_entries, acl->a_count), + flags); if (clone) refcount_set(&clone->a_refcount, 1); } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b0597a539fc5..efda9a022981 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -32,7 +32,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, f2fs_build_fault_attr(sbi, 0, 0); if (!end_io) f2fs_flush_merged_writes(sbi); - f2fs_handle_critical_error(sbi, reason, end_io); + f2fs_handle_critical_error(sbi, reason); } /* @@ -99,7 +99,7 @@ repeat: } if (unlikely(!PageUptodate(page))) { - f2fs_handle_page_eio(sbi, page->index, META); + f2fs_handle_page_eio(sbi, page_folio(page), META); f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -154,49 +154,47 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, if (unlikely(f2fs_cp_error(sbi))) return exist; - if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) { - f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", - blkaddr, exist); - set_sbi_flag(sbi, SBI_NEED_FSCK); - return exist; - } + if ((exist && type == DATA_GENERIC_ENHANCE_UPDATE) || + (!exist && type == DATA_GENERIC_ENHANCE)) + goto out_err; + if (!exist && type != DATA_GENERIC_ENHANCE_UPDATE) + goto out_handle; + return exist; - if (!exist && type == DATA_GENERIC_ENHANCE) { - f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", - blkaddr, exist); - set_sbi_flag(sbi, SBI_NEED_FSCK); - dump_stack(); - } +out_err: + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + dump_stack(); +out_handle: + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return exist; } -bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, +static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { - if (time_to_inject(sbi, FAULT_BLKADDR)) - return false; - switch (type) { case META_NAT: break; case META_SIT: if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) - return false; + goto check_only; break; case META_SSA: if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || blkaddr < SM_I(sbi)->ssa_blkaddr)) - return false; + goto check_only; break; case META_CP: if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || blkaddr < __start_cp_addr(sbi))) - return false; + goto check_only; break; case META_POR: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) - return false; + goto check_only; break; case DATA_GENERIC: case DATA_GENERIC_ENHANCE: @@ -213,7 +211,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, blkaddr); set_sbi_flag(sbi, SBI_NEED_FSCK); dump_stack(); - return false; + goto err; } else { return __is_bitmap_valid(sbi, blkaddr, type); } @@ -221,13 +219,31 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case META_GENERIC: if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || blkaddr >= MAIN_BLKADDR(sbi))) - return false; + goto err; break; default: BUG(); } return true; +err: + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); +check_only: + return false; +} + +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY)) + return false; + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type); +} + +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type); } /* @@ -329,30 +345,31 @@ static int __f2fs_write_meta_page(struct page *page, enum iostat_type io_type) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct folio *folio = page_folio(page); - trace_f2fs_writepage(page, META); + trace_f2fs_writepage(folio, META); if (unlikely(f2fs_cp_error(sbi))) { if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_META); - unlock_page(page); + folio_unlock(folio); return 0; } goto redirty_out; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) + if (wbc->for_reclaim && folio->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; - f2fs_do_write_meta_page(sbi, page, io_type); + f2fs_do_write_meta_page(sbi, folio, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_write(sbi, META); @@ -477,7 +494,7 @@ stop: static bool f2fs_dirty_meta_folio(struct address_space *mapping, struct folio *folio) { - trace_f2fs_set_page_dirty(&folio->page, META); + trace_f2fs_set_page_dirty(folio, META); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); @@ -889,7 +906,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count); - if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) { + if (cp_blocks > BLKS_PER_SEG(sbi) || cp_blocks <= F2FS_CP_PACKS) { f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; @@ -1170,6 +1187,11 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); ckpt->next_free_nid = cpu_to_le32(last_nid); + + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + percpu_counter_set(&sbi->alloc_valid_block_count, 0); + percpu_counter_set(&sbi->rf_node_block_count, 0); } static bool __need_flush_quota(struct f2fs_sb_info *sbi) @@ -1324,7 +1346,7 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason & CP_UMOUNT) { if (le32_to_cpu(ckpt->cp_pack_total_block_count) + - NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) { + NM_I(sbi)->nat_bits_blocks > BLKS_PER_SEG(sbi)) { clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG); f2fs_notice(sbi, "Disable nat_bits due to no space"); } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) && @@ -1527,10 +1549,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) cp_ver |= ((__u64)crc32 << 32); *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver); - blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks; + blk = start_blk + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) f2fs_update_meta_page(sbi, nm_i->nat_bits + - (i << F2FS_BLKSIZE_BITS), blk + i); + F2FS_BLK_TO_BYTES(i), blk + i); } /* write out checkpoint buffer at block 0 */ @@ -1559,11 +1581,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += NR_CURSEG_NODE_TYPE; } - /* update user_block_counts */ - sbi->last_valid_block_count = sbi->total_valid_block_count; - percpu_counter_set(&sbi->alloc_valid_block_count, 0); - percpu_counter_set(&sbi->rf_node_block_count, 0); - /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); /* Wait for all dirty meta pages to be submitted for IO */ @@ -1587,8 +1604,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) */ if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) || f2fs_sb_has_compression(sbi)) - invalidate_mapping_pages(META_MAPPING(sbi), - MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); + f2fs_bug_on(sbi, + invalidate_inode_pages2_range(META_MAPPING(sbi), + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1)); f2fs_release_ino_entry(sbi, false); @@ -1701,6 +1719,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } f2fs_restore_inmem_curseg(sbi); + f2fs_reinit_atgc_curseg(sbi); stat_inc_cp_count(sbi); stop: unblock_operations(sbi); @@ -1730,9 +1749,9 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi) im->ino_num = 0; } - sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - + sbi->max_orphans = (BLKS_PER_SEG(sbi) - F2FS_CP_PACKS - NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) * - F2FS_ORPHANS_PER_BLOCK; + F2FS_ORPHANS_PER_BLOCK; } int __init f2fs_create_checkpoint_caches(void) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 531517dac079..985690d81a82 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -90,11 +90,13 @@ bool f2fs_is_compressed_page(struct page *page) static void f2fs_set_compressed_page(struct page *page, struct inode *inode, pgoff_t index, void *data) { - attach_page_private(page, (void *)data); + struct folio *folio = page_folio(page); + + folio_attach_private(folio, (void *)data); /* i_crypto_info and iv index */ - page->index = index; - page->mapping = inode->i_mapping; + folio->index = index; + folio->mapping = inode->i_mapping; } static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) @@ -160,17 +162,17 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) cc->cluster_idx = NULL_CLUSTER; } -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio) { unsigned int cluster_ofs; - if (!f2fs_cluster_can_merge_page(cc, page->index)) + if (!f2fs_cluster_can_merge_page(cc, folio->index)) f2fs_bug_on(F2FS_I_SB(cc->inode), 1); - cluster_ofs = offset_in_cluster(cc, page->index); - cc->rpages[cluster_ofs] = page; + cluster_ofs = offset_in_cluster(cc, folio->index); + cc->rpages[cluster_ofs] = folio_page(folio, 0); cc->nr_rpages++; - cc->cluster_idx = cluster_idx(cc, page->index); + cc->cluster_idx = cluster_idx(cc, folio->index); } #ifdef CONFIG_F2FS_FS_LZO @@ -198,8 +200,8 @@ static int lzo_compress_pages(struct compress_ctx *cc) ret = lzo1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, &cc->clen, cc->private); if (ret != LZO_E_OK) { - printk_ratelimited("%sF2FS-fs (%s): lzo compress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "lzo compress failed, ret:%d", ret); return -EIO; } return 0; @@ -212,17 +214,15 @@ static int lzo_decompress_pages(struct decompress_io_ctx *dic) ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, dic->rbuf, &dic->rlen); if (ret != LZO_E_OK) { - printk_ratelimited("%sF2FS-fs (%s): lzo decompress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "lzo decompress failed, ret:%d", ret); return -EIO; } if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lzo invalid rlen:%zu, " - "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, - dic->rlen, - PAGE_SIZE << dic->log_cluster_size); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "lzo invalid rlen:%zu, expected:%lu", + dic->rlen, PAGE_SIZE << dic->log_cluster_size); return -EIO; } return 0; @@ -294,16 +294,15 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, dic->clen, dic->rlen); if (ret < 0) { - printk_ratelimited("%sF2FS-fs (%s): lz4 decompress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "lz4 decompress failed, ret:%d", ret); return -EIO; } if (ret != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, " - "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, ret, - PAGE_SIZE << dic->log_cluster_size); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "lz4 invalid ret:%d, expected:%lu", + ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; } return 0; @@ -350,9 +349,8 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) stream = zstd_init_cstream(¶ms, 0, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_cstream failed\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, - __func__); + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "%s zstd_init_cstream failed", __func__); kvfree(workspace); return -EIO; } @@ -390,16 +388,16 @@ static int zstd_compress_pages(struct compress_ctx *cc) ret = zstd_compress_stream(stream, &outbuf, &inbuf); if (zstd_is_error(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_compress_stream failed, ret: %d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "%s zstd_compress_stream failed, ret: %d", __func__, zstd_get_error_code(ret)); return -EIO; } ret = zstd_end_stream(stream, &outbuf); if (zstd_is_error(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_end_stream returned %d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "%s zstd_end_stream returned %d", __func__, zstd_get_error_code(ret)); return -EIO; } @@ -432,9 +430,8 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) stream = zstd_init_dstream(max_window_size, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_dstream failed\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, - __func__); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "%s zstd_init_dstream failed", __func__); kvfree(workspace); return -EIO; } @@ -469,16 +466,15 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) ret = zstd_decompress_stream(stream, &outbuf, &inbuf); if (zstd_is_error(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_decompress_stream failed, ret: %d\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "%s zstd_decompress_stream failed, ret: %d", __func__, zstd_get_error_code(ret)); return -EIO; } if (dic->rlen != outbuf.pos) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD invalid rlen:%zu, " - "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "%s ZSTD invalid rlen:%zu, expected:%lu", __func__, dic->rlen, PAGE_SIZE << dic->log_cluster_size); return -EIO; @@ -512,8 +508,8 @@ static int lzorle_compress_pages(struct compress_ctx *cc) ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, &cc->clen, cc->private); if (ret != LZO_E_OK) { - printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "lzo-rle compress failed, ret:%d", ret); return -EIO; } return 0; @@ -780,9 +776,9 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) if (provided != calculated) { if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) { set_inode_flag(dic->inode, FI_COMPRESS_CORRUPT); - printk_ratelimited( - "%sF2FS-fs (%s): checksum invalid, nid = %lu, %x vs %x", - KERN_INFO, sbi->sb->s_id, dic->inode->i_ino, + f2fs_info_ratelimited(sbi, + "checksum invalid, nid = %lu, %x vs %x", + dic->inode->i_ino, provided, calculated); } set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -850,7 +846,7 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate) { - unsigned long pgidx = pages[index]->index; + unsigned long pgidx = page_folio(pages[index])->index; int i = uptodate ? 0 : 1; /* @@ -864,9 +860,11 @@ bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, return false; for (; i < cc->cluster_size; i++) { - if (pages[index + i]->index != pgidx + i) + struct folio *folio = page_folio(pages[index + i]); + + if (folio->index != pgidx + i) return false; - if (uptodate && !PageUptodate(pages[index + i])) + if (uptodate && !folio_test_uptodate(folio)) return false; } @@ -885,7 +883,7 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc) f2fs_bug_on(F2FS_I_SB(cc->inode), !page); /* beyond EOF */ - if (page->index >= nr_pages) + if (page_folio(page)->index >= nr_pages) return true; } return false; @@ -951,7 +949,7 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; int count, i; - for (i = 1, count = 1; i < cluster_size; i++) { + for (i = 0, count = 0; i < cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node + i); @@ -962,8 +960,8 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, return count; } -static int __f2fs_cluster_blocks(struct inode *inode, - unsigned int cluster_idx, bool compr_blks) +static int __f2fs_cluster_blocks(struct inode *inode, unsigned int cluster_idx, + enum cluster_check_type type) { struct dnode_of_data dn; unsigned int start_idx = cluster_idx << @@ -984,10 +982,12 @@ static int __f2fs_cluster_blocks(struct inode *inode, } if (dn.data_blkaddr == COMPRESS_ADDR) { - if (compr_blks) - ret = __f2fs_get_cluster_blocks(inode, &dn); - else + if (type == CLUSTER_COMPR_BLKS) + ret = 1 + __f2fs_get_cluster_blocks(inode, &dn); + else if (type == CLUSTER_IS_COMPR) ret = 1; + } else if (type == CLUSTER_RAW_BLKS) { + ret = __f2fs_get_cluster_blocks(inode, &dn); } fail: f2fs_put_dnode(&dn); @@ -997,7 +997,16 @@ fail: /* return # of compressed blocks in compressed cluster */ static int f2fs_compressed_blocks(struct compress_ctx *cc) { - return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, true); + return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, + CLUSTER_COMPR_BLKS); +} + +/* return # of raw blocks in non-compressed cluster */ +static int f2fs_decompressed_blocks(struct inode *inode, + unsigned int cluster_idx) +{ + return __f2fs_cluster_blocks(inode, cluster_idx, + CLUSTER_RAW_BLKS); } /* return whether cluster is compressed one or not */ @@ -1005,7 +1014,16 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { return __f2fs_cluster_blocks(inode, index >> F2FS_I(inode)->i_log_cluster_size, - false); + CLUSTER_IS_COMPR); +} + +/* return whether cluster contains non raw blocks or not */ +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index) +{ + unsigned int cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size; + + return f2fs_decompressed_blocks(inode, cluster_idx) != + F2FS_I(inode)->i_cluster_size; } static bool cluster_may_compress(struct compress_ctx *cc) @@ -1031,6 +1049,31 @@ static void set_cluster_writeback(struct compress_ctx *cc) } } +static void cancel_cluster_writeback(struct compress_ctx *cc, + struct compress_io_ctx *cic, int submitted) +{ + int i; + + /* Wait for submitted IOs. */ + if (submitted > 1) { + f2fs_submit_merged_write(F2FS_I_SB(cc->inode), DATA); + while (atomic_read(&cic->pending_pages) != + (cc->valid_nr_cpages - submitted + 1)) + f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + } + + /* Cancel writeback and stay locked. */ + for (i = 0; i < cc->cluster_size; i++) { + if (i < submitted) { + inode_inc_dirty_pages(cc->inode); + lock_page(cc->rpages[i]); + } + clear_page_private_gcing(cc->rpages[i]); + if (folio_test_writeback(page_folio(cc->rpages[i]))) + end_page_writeback(cc->rpages[i]); + } +} + static void set_cluster_dirty(struct compress_ctx *cc) { int i; @@ -1074,14 +1117,14 @@ retry: if (PageUptodate(page)) f2fs_put_page(page, 1); else - f2fs_compress_ctx_add_page(cc, page); + f2fs_compress_ctx_add_page(cc, page_folio(page)); } if (!f2fs_cluster_is_empty(cc)) { struct bio *bio = NULL; ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, - &last_block_in_bio, false, true); + &last_block_in_bio, NULL, true); f2fs_put_rpages(cc); f2fs_destroy_compress_ctx(cc, true); if (ret) @@ -1104,7 +1147,7 @@ retry: } f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_compress_ctx_add_page(cc, page); + f2fs_compress_ctx_add_page(cc, page_folio(page)); if (!PageUptodate(page)) { release_and_retry: @@ -1154,7 +1197,8 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, .cluster_size = F2FS_I(inode)->i_cluster_size, .rpages = fsdata, }; - bool first_index = (index == cc.rpages[0]->index); + struct folio *folio = page_folio(cc.rpages[0]); + bool first_index = (index == folio->index); if (copied) set_cluster_dirty(&cc); @@ -1198,13 +1242,14 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock) int i; for (i = cluster_size - 1; i >= 0; i--) { - loff_t start = rpages[i]->index << PAGE_SHIFT; + struct folio *folio = page_folio(rpages[i]); + loff_t start = folio->index << PAGE_SHIFT; if (from <= start) { - zero_user_segment(rpages[i], 0, PAGE_SIZE); + folio_zero_segment(folio, 0, folio_size(folio)); } else { - zero_user_segment(rpages[i], from - start, - PAGE_SIZE); + folio_zero_segment(folio, from - start, + folio_size(folio)); break; } } @@ -1232,12 +1277,12 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .page = NULL, .encrypted_page = NULL, .compressed_page = NULL, - .submitted = 0, .io_type = io_type, .io_wbc = wbc, .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ? 1 : 0, }; + struct folio *folio; struct dnode_of_data dn; struct node_info ni; struct compress_io_ctx *cic; @@ -1249,7 +1294,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(cc->rpages[0]->mapping, -EIO); + mapping_set_error(inode->i_mapping, -EIO); goto out_free; } @@ -1276,7 +1321,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, goto out_put_dnode; } - psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; + folio = page_folio(cc->rpages[last_index]); + psize = folio_pos(folio) + folio_size(folio); err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); if (err) @@ -1299,7 +1345,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->valid_nr_cpages; i++) { f2fs_set_compressed_page(cc->cpages[i], inode, - cc->rpages[i + 1]->index, cic); + page_folio(cc->rpages[i + 1])->index, cic); fio.compressed_page = cc->cpages[i]; fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_page, @@ -1334,7 +1380,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (blkaddr == COMPRESS_ADDR) fio.compr_blocks++; if (__is_valid_data_blkaddr(blkaddr)) - f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr, 1); f2fs_update_data_blkaddr(&dn, COMPRESS_ADDR); goto unlock_continue; } @@ -1344,7 +1390,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (i > cc->valid_nr_cpages) { if (__is_valid_data_blkaddr(blkaddr)) { - f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr, 1); f2fs_update_data_blkaddr(&dn, NEW_ADDR); } goto unlock_continue; @@ -1358,7 +1404,16 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, fio.compressed_page = cc->cpages[i - 1]; cc->cpages[i - 1] = NULL; + fio.submitted = 0; f2fs_outplace_write_data(&dn, &fio); + if (unlikely(!fio.submitted)) { + cancel_cluster_writeback(cc, cic, i); + + /* To call fscrypt_finalize_bounce_page */ + i = cc->valid_nr_cpages; + *submitted = 0; + goto out_destroy_crypt; + } (*submitted)++; unlock_continue: inode_dec_dirty_pages(cc->inode); @@ -1392,8 +1447,11 @@ unlock_continue: out_destroy_crypt: page_array_free(cc->inode, cic->rpages, cc->cluster_size); - for (--i; i >= 0; i--) + for (--i; i >= 0; i--) { + if (!cc->cpages[i]) + continue; fscrypt_finalize_bounce_page(&cc->cpages[i]); + } out_put_cic: kmem_cache_free(cic_entry_slab, cic); out_put_dnode: @@ -1418,6 +1476,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) struct f2fs_sb_info *sbi = bio->bi_private; struct compress_io_ctx *cic = (struct compress_io_ctx *)page_private(page); + enum count_type type = WB_DATA_TYPE(page, + f2fs_is_compressed_page(page)); int i; if (unlikely(bio->bi_status)) @@ -1425,7 +1485,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) f2fs_compress_free_page(page); - dec_page_count(sbi, F2FS_WB_DATA); + dec_page_count(sbi, type); if (atomic_dec_return(&cic->pending_pages)) return; @@ -1441,12 +1501,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) } static int f2fs_write_raw_pages(struct compress_ctx *cc, - int *submitted, + int *submitted_p, struct writeback_control *wbc, enum iostat_type io_type) { struct address_space *mapping = cc->inode->i_mapping; - int _submitted, compr_blocks, ret, i; + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + int submitted, compr_blocks, i; + int ret = 0; compr_blocks = f2fs_compressed_blocks(cc); @@ -1461,6 +1523,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, if (compr_blocks < 0) return compr_blocks; + /* overwrite compressed cluster w/ normal cluster */ + if (compr_blocks > 0) + f2fs_lock_op(sbi); + for (i = 0; i < cc->cluster_size; i++) { if (!cc->rpages[i]) continue; @@ -1476,7 +1542,7 @@ continue_unlock: if (!PageDirty(cc->rpages[i])) goto continue_unlock; - if (PageWriteback(cc->rpages[i])) { + if (folio_test_writeback(page_folio(cc->rpages[i]))) { if (wbc->sync_mode == WB_SYNC_NONE) goto continue_unlock; f2fs_wait_on_page_writeback(cc->rpages[i], DATA, true, true); @@ -1485,7 +1551,9 @@ continue_unlock: if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; - ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + submitted = 0; + ret = f2fs_write_single_data_page(page_folio(cc->rpages[i]), + &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { @@ -1493,26 +1561,29 @@ continue_unlock: unlock_page(cc->rpages[i]); ret = 0; } else if (ret == -EAGAIN) { + ret = 0; /* * for quota file, just redirty left pages to * avoid deadlock caused by cluster update race * from foreground operation. */ if (IS_NOQUOTA(cc->inode)) - return 0; - ret = 0; + goto out; f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); goto retry_write; } - return ret; + goto out; } - *submitted += _submitted; + *submitted_p += submitted; } - f2fs_balance_fs(F2FS_M_SB(mapping), true); +out: + if (compr_blocks > 0) + f2fs_unlock_op(sbi); - return 0; + f2fs_balance_fs(sbi, true); + return ret; } int f2fs_write_multi_pages(struct compress_ctx *cc, @@ -1806,16 +1877,18 @@ void f2fs_put_page_dic(struct page *page, bool in_task) * check whether cluster blocks are contiguous, and add extent cache entry * only if cluster blocks are logically and physically contiguous. */ -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node) { - bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR; + bool compressed = data_blkaddr(dn->inode, dn->node_page, + ofs_in_node) == COMPRESS_ADDR; int i = compressed ? 1 : 0; block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) break; @@ -1837,11 +1910,12 @@ struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi) return sbi->compress_inode->i_mapping; } -void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr) +void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) { if (!sbi->compress_inode) return; - invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr); + invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr + len - 1); } void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, @@ -1878,12 +1952,8 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, set_page_private_data(cpage, ino); - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) - goto out; - memcpy(page_address(cpage), page_address(page), PAGE_SIZE); SetPageUptodate(cpage); -out: f2fs_put_page(cpage, 1); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 26e317696b33..de4da6d9cd93 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -7,7 +7,6 @@ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> -#include <linux/buffer_head.h> #include <linux/sched/mm.h> #include <linux/mpage.h> #include <linux/writeback.h> @@ -48,7 +47,7 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -static bool __is_cp_guaranteed(struct page *page) +bool f2fs_is_cp_guaranteed(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode; @@ -65,17 +64,15 @@ static bool __is_cp_guaranteed(struct page *page) S_ISDIR(inode->i_mode)) return true; - if (f2fs_is_compressed_page(page)) - return false; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || page_private_gcing(page)) return true; return false; } -static enum count_type __read_io_type(struct page *page) +static enum count_type __read_io_type(struct folio *folio) { - struct address_space *mapping = page_file_mapping(page); + struct address_space *mapping = folio->mapping; if (mapping) { struct inode *inode = mapping->host; @@ -139,27 +136,22 @@ struct bio_post_read_ctx { */ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; struct bio_post_read_ctx *ctx = bio->bi_private; - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(page)) { + if (f2fs_is_compressed_page(&folio->page)) { if (ctx && !ctx->decompression_attempted) - f2fs_end_read_compressed_page(page, true, 0, + f2fs_end_read_compressed_page(&folio->page, true, 0, in_task); - f2fs_put_page_dic(page, in_task); + f2fs_put_page_dic(&folio->page, in_task); continue; } - if (bio->bi_status) - ClearPageUptodate(page); - else - SetPageUptodate(page); - dec_page_count(F2FS_P_SB(page), __read_io_type(page)); - unlock_page(page); + dec_page_count(F2FS_F_SB(folio), __read_io_type(folio)); + folio_end_read(folio, bio->bi_status == 0); } if (ctx) @@ -338,18 +330,7 @@ static void f2fs_write_end_io(struct bio *bio) bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; - enum count_type type = WB_DATA_TYPE(page); - - if (page_private_dummy(page)) { - clear_page_private_dummy(page); - unlock_page(page); - mempool_free(page, sbi->write_io_dummy); - - if (unlikely(bio->bi_status)) - f2fs_stop_checkpoint(sbi, true, - STOP_CP_REASON_WRITE_FAIL); - continue; - } + enum count_type type = WB_DATA_TYPE(page, false); fscrypt_finalize_bounce_page(&page); @@ -368,7 +349,7 @@ static void f2fs_write_end_io(struct bio *bio) } f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && - page->index != nid_of_node(page)); + page_folio(page)->index != nid_of_node(page)); dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, page)) @@ -478,6 +459,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) } else { bio->bi_end_io = f2fs_write_end_io; bio->bi_private = sbi; + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, + fio->type, fio->temp); } iostat_alloc_and_bind_ctx(sbi, bio, NULL); @@ -524,51 +507,10 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, submit_bio(bio); } -static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio) -{ - unsigned int start = - (bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi); - - if (start == 0) - return; - - /* fill dummy pages */ - for (; start < F2FS_IO_SIZE(sbi); start++) { - struct page *page = - mempool_alloc(sbi->write_io_dummy, - GFP_NOIO | __GFP_NOFAIL); - f2fs_bug_on(sbi, !page); - - lock_page(page); - - zero_user_segment(page, 0, PAGE_SIZE); - set_page_private_dummy(page); - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) - f2fs_bug_on(sbi, 1); - } -} - static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { WARN_ON_ONCE(is_read_io(bio_op(bio))); - - if (type == DATA || type == NODE) { - if (f2fs_lfs_mode(sbi) && current->plug) - blk_finish_plug(current->plug); - - if (F2FS_IO_ALIGNED(sbi)) { - f2fs_align_write_bio(sbi, bio); - /* - * In the NODE case, we lose next block address chain. - * So, we need to do checkpoint in f2fs_sync_file. - */ - if (type == NODE) - set_sbi_flag(sbi, SBI_NEED_CP); - } - } - trace_f2fs_submit_write_bio(sbi->sb, type, bio); iostat_update_submit_ctx(bio, type); submit_bio(bio); @@ -643,17 +585,20 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi) return -ENOMEM; for (j = HOT; j < n; j++) { - init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem); - sbi->write_io[i][j].sbi = sbi; - sbi->write_io[i][j].bio = NULL; - spin_lock_init(&sbi->write_io[i][j].io_lock); - INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); - INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); - init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); + struct f2fs_bio_info *io = &sbi->write_io[i][j]; + + init_f2fs_rwsem(&io->io_rwsem); + io->sbi = sbi; + io->bio = NULL; + io->last_block_in_bio = 0; + spin_lock_init(&io->io_lock); + INIT_LIST_HEAD(&io->io_list); + INIT_LIST_HEAD(&io->bio_list); + init_f2fs_rwsem(&io->bio_list_lock); #ifdef CONFIG_BLK_DEV_ZONED - init_completion(&sbi->write_io[i][j].zone_wait); - sbi->write_io[i][j].zone_pending_bio = NULL; - sbi->write_io[i][j].bi_private = NULL; + init_completion(&io->zone_wait); + io->zone_pending_bio = NULL; + io->bi_private = NULL; #endif } } @@ -735,34 +680,29 @@ void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; - struct page *page = fio->encrypted_page ? - fio->encrypted_page : fio->page; + struct folio *fio_folio = page_folio(fio->page); + struct folio *data_folio = fio->encrypted_page ? + page_folio(fio->encrypted_page) : fio_folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? - META_GENERIC : DATA_GENERIC_ENHANCE))) { - f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); + META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - } - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(data_folio, fio); /* Allocate a new bio */ bio = __bio_alloc(fio, 1); - f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_put(bio); - return -EFAULT; - } + f2fs_set_bio_crypt_ctx(bio, fio_folio->mapping->host, + fio_folio->index, fio, GFP_NOIO); + bio_add_folio_nofail(bio, data_folio, folio_size(data_folio), 0); if (fio->io_wbc && !is_read_io(fio->op)) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(page) : WB_DATA_TYPE(fio->page)); + __read_io_type(data_folio) : WB_DATA_TYPE(fio->page, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); @@ -796,16 +736,6 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, block_t last_blkaddr, block_t cur_blkaddr) { - if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) { - unsigned int filled_blocks = - F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size); - unsigned int io_size = F2FS_IO_SIZE(sbi); - unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt; - - /* IOs in bio is aligned and left space of vectors is not enough */ - if (!(filled_blocks % io_size) && left_vecs < io_size) - return false; - } if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr)) return false; return io_type_is_mergeable(io, fio); @@ -860,7 +790,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, fio->new_blkaddr)); if (f2fs_crypt_mergeable_bio(*bio, fio->page->mapping->host, - fio->page->index, fio) && + page_folio(fio->page)->index, fio) && bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { ret = 0; @@ -948,12 +878,10 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) fio->encrypted_page : fio->page; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, - __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) { - f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; - } - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(page_folio(page), fio); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, fio->new_blkaddr)) @@ -962,7 +890,7 @@ alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); + page_folio(fio->page)->index, fio, GFP_NOIO); add_bio_entry(fio->sbi, bio, page, fio->temp); } else { @@ -971,9 +899,10 @@ alloc_new: } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), + PAGE_SIZE); - inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; @@ -984,6 +913,7 @@ alloc_new: #ifdef CONFIG_BLK_DEV_ZONED static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) { + struct block_device *bdev = sbi->sb->s_bdev; int devi = 0; if (f2fs_is_multi_device(sbi)) { @@ -994,8 +924,9 @@ static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) return false; } blkaddr -= FDEV(devi).start_blk; + bdev = FDEV(devi).bdev; } - return bdev_is_zoned(FDEV(devi).bdev) && + return bdev_is_zoned(bdev) && f2fs_blkz_is_seq(sbi, devi, blkaddr) && (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1); } @@ -1007,11 +938,12 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; + enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); f2fs_down_write(&io->io_rwsem); - +next: #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) { wait_for_completion_io(&io->zone_wait); @@ -1021,7 +953,6 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) } #endif -next: if (fio->in_list) { spin_lock(&io->io_lock); if (list_empty(&io->io_list)) { @@ -1046,26 +977,20 @@ next: /* set submitted = true as a return value */ fio->submitted = 1; - inc_page_count(sbi, WB_DATA_TYPE(bio_page)); + type = WB_DATA_TYPE(bio_page, fio->compressed_page); + inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, - bio_page->index, fio))) + page_folio(bio_page)->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { - if (F2FS_IO_ALIGNED(sbi) && - (fio->type == DATA || fio->type == NODE) && - fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { - dec_page_count(sbi, WB_DATA_TYPE(bio_page)); - fio->retry = 1; - goto skip; - } io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, - bio_page->index, fio, GFP_NOIO); + page_folio(bio_page)->index, fio, GFP_NOIO); io->fio = *fio; } @@ -1075,15 +1000,12 @@ alloc_new: } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), + PAGE_SIZE); io->last_block_in_bio = fio->new_blkaddr; - trace_f2fs_submit_page_write(fio->page, fio); -skip: - if (fio->in_list) - goto next; -out: + trace_f2fs_submit_folio_write(page_folio(fio->page), fio); #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { @@ -1096,6 +1018,9 @@ out: __submit_merged_bio(io); } #endif + if (fio->in_list) + goto next; +out: if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); @@ -1151,7 +1076,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, } /* This can handle encryption stuffs */ -static int f2fs_submit_page_read(struct inode *inode, struct page *page, +static int f2fs_submit_page_read(struct inode *inode, struct folio *folio, block_t blkaddr, blk_opf_t op_flags, bool for_write) { @@ -1159,14 +1084,14 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct bio *bio; bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, - page->index, for_write); + folio->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, blkaddr); - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) { iostat_update_and_unbind_ctx(bio); if (bio->bi_private) mempool_free(bio->bi_private, bio_post_read_ctx_pool); @@ -1218,7 +1143,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + err = inc_valid_block_count(sbi, dn->inode, &count, true); + if (unlikely(err)) return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, @@ -1285,8 +1211,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), - ERROR_INVALID_BLKADDR); goto put_err; } goto got_it; @@ -1312,8 +1236,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, dn.data_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), - ERROR_INVALID_BLKADDR); goto put_err; } got_it: @@ -1337,7 +1259,7 @@ got_it: return page; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, + err = f2fs_submit_page_read(inode, page_folio(page), dn.data_blkaddr, op_flags, for_write); if (err) goto put_err; @@ -1354,7 +1276,7 @@ struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct page *page; - page = find_get_page(mapping, index); + page = find_get_page_flags(mapping, index, FGP_ACCESSED); if (page && PageUptodate(page)) return page; f2fs_put_page(page, 0); @@ -1475,17 +1397,20 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = f2fs_data_blkaddr(dn); if (dn->data_blkaddr == NULL_ADDR) { - err = inc_valid_block_count(sbi, dn->inode, &count); + err = inc_valid_block_count(sbi, dn->inode, &count, true); if (unlikely(err)) return err; } set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; - f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, - &sum, seg_type, NULL); + err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr, + &dn->data_blkaddr, &sum, seg_type, NULL); + if (err) + return err; + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) - f2fs_invalidate_internal_cache(sbi, old_blkaddr); + f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); f2fs_update_data_blkaddr(dn, dn->data_blkaddr); return 0; @@ -1578,6 +1503,25 @@ static bool f2fs_map_blocks_cached(struct inode *inode, return true; } +static bool map_is_mergeable(struct f2fs_sb_info *sbi, + struct f2fs_map_blocks *map, + block_t blkaddr, int flag, int bidx, + int ofs) +{ + if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) + return false; + if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) + return true; + if (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) + return true; + if (flag == F2FS_GET_BLOCK_PRE_DIO) + return true; + if (flag == F2FS_GET_BLOCK_DIO && + map->m_pblk == NULL_ADDR && blkaddr == NULL_ADDR) + return true; + return false; +} + /* * f2fs_map_blocks() tries to find or build mapping relationship which * maps continuous logical blocks to physical blocks, and return such @@ -1641,13 +1585,13 @@ next_block: if (!is_hole && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto sync_out; } /* use out-place-update for direct IO under LFS mode */ - if (map->m_may_create && - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { + if (map->m_may_create && (is_hole || + (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + !f2fs_is_pinned_file(inode)))) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto sync_out; @@ -1700,6 +1644,10 @@ next_block: goto sync_out; } break; + case F2FS_GET_BLOCK_DIO: + if (map->m_next_pgofs) + *map->m_next_pgofs = pgofs + 1; + break; default: /* for defragment case */ if (map->m_next_pgofs) @@ -1718,19 +1666,16 @@ next_block: /* reserved delalloc block should be mapped for fiemap. */ if (blkaddr == NEW_ADDR) map->m_flags |= F2FS_MAP_DELALLOC; - map->m_flags |= F2FS_MAP_MAPPED; + /* DIO READ and hole case, should not map the blocks. */ + if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create)) + map->m_flags |= F2FS_MAP_MAPPED; map->m_pblk = blkaddr; map->m_len = 1; if (map->m_multidev_dio) map->m_bdev = FDEV(bidx).bdev; - } else if ((map->m_pblk != NEW_ADDR && - blkaddr == (map->m_pblk + ofs)) || - (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || - flag == F2FS_GET_BLOCK_PRE_DIO) { - if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) - goto sync_out; + } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) { ofs++; map->m_len++; } else { @@ -1758,6 +1703,14 @@ skip: dn.ofs_in_node = end_offset; } + if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create) { + /* the next block to be allocated may not be contiguous. */ + if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) == + CAP_BLKS_PER_SEC(sbi) - 1) + goto sync_out; + } + if (pgofs >= end) goto sync_out; else if (dn.ofs_in_node < end_offset) @@ -1856,16 +1809,6 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) return true; } -static inline u64 bytes_to_blks(struct inode *inode, u64 bytes) -{ - return (bytes >> inode->i_blkbits); -} - -static inline u64 blks_to_bytes(struct inode *inode, u64 blks) -{ - return (blks << inode->i_blkbits); -} - static int f2fs_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) { @@ -1891,7 +1834,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, return err; } - phys = blks_to_bytes(inode, ni.blk_addr); + phys = F2FS_BLK_TO_BYTES(ni.blk_addr); offset = offsetof(struct f2fs_inode, i_addr) + sizeof(__le32) * (DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)); @@ -1923,7 +1866,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, return err; } - phys = blks_to_bytes(inode, ni.blk_addr); + phys = F2FS_BLK_TO_BYTES(ni.blk_addr); len = inode->i_sb->s_blocksize; f2fs_put_page(page, 1); @@ -1939,30 +1882,11 @@ static int f2fs_xattr_fiemap(struct inode *inode, return (err < 0 ? err : 0); } -static loff_t max_inode_blocks(struct inode *inode) -{ - loff_t result = ADDRS_PER_INODE(inode); - loff_t leaf_count = ADDRS_PER_BLOCK(inode); - - /* two direct node blocks */ - result += (leaf_count * 2); - - /* two indirect node blocks */ - leaf_count *= NIDS_PER_BLOCK; - result += (leaf_count * 2); - - /* one double indirect node block */ - leaf_count *= NIDS_PER_BLOCK; - result += leaf_count; - - return result; -} - int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { struct f2fs_map_blocks map; - sector_t start_blk, last_blk; + sector_t start_blk, last_blk, blk_len, max_len; pgoff_t next_pgofs; u64 logical = 0, phys = 0, size = 0; u32 flags = 0; @@ -1984,7 +1908,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, inode_lock_shared(inode); - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); if (start > maxbytes) { ret = -EFBIG; goto out; @@ -2004,16 +1928,15 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - if (bytes_to_blks(inode, len) == 0) - len = blks_to_bytes(inode, 1); - - start_blk = bytes_to_blks(inode, start); - last_blk = bytes_to_blks(inode, start + len - 1); + start_blk = F2FS_BYTES_TO_BLK(start); + last_blk = F2FS_BYTES_TO_BLK(start + len - 1); + blk_len = last_blk - start_blk + 1; + max_len = F2FS_BYTES_TO_BLK(maxbytes) - start_blk; next: memset(&map, 0, sizeof(map)); map.m_lblk = start_blk; - map.m_len = bytes_to_blks(inode, len); + map.m_len = blk_len; map.m_next_pgofs = &next_pgofs; map.m_seg_type = NO_CHECK_TYPE; @@ -2030,13 +1953,23 @@ next: if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) { start_blk = next_pgofs; - if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode, - max_inode_blocks(inode))) + if (F2FS_BLK_TO_BYTES(start_blk) < maxbytes) goto prep_next; flags |= FIEMAP_EXTENT_LAST; } + /* + * current extent may cross boundary of inquiry, increase len to + * requery. + */ + if (!compr_cluster && (map.m_flags & F2FS_MAP_MAPPED) && + map.m_lblk + map.m_len - 1 == last_blk && + blk_len != max_len) { + blk_len = max_len; + goto next; + } + compr_appended = false; /* In a case of compressed cluster, append this to the last extent */ if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) || @@ -2068,14 +2001,14 @@ skip_fill: } else if (compr_appended) { unsigned int appended_blks = cluster_size - count_in_cluster + 1; - size += blks_to_bytes(inode, appended_blks); + size += F2FS_BLK_TO_BYTES(appended_blks); start_blk += appended_blks; compr_cluster = false; } else { - logical = blks_to_bytes(inode, start_blk); + logical = F2FS_BLK_TO_BYTES(start_blk); phys = __is_valid_data_blkaddr(map.m_pblk) ? - blks_to_bytes(inode, map.m_pblk) : 0; - size = blks_to_bytes(inode, map.m_len); + F2FS_BLK_TO_BYTES(map.m_pblk) : 0; + size = F2FS_BLK_TO_BYTES(map.m_len); flags = 0; if (compr_cluster) { @@ -2083,13 +2016,13 @@ skip_fill: count_in_cluster += map.m_len; if (count_in_cluster == cluster_size) { compr_cluster = false; - size += blks_to_bytes(inode, 1); + size += F2FS_BLKSIZE; } } else if (map.m_flags & F2FS_MAP_DELALLOC) { flags = FIEMAP_EXTENT_UNWRITTEN; } - start_blk += bytes_to_blks(inode, size); + start_blk += F2FS_BYTES_TO_BLK(size); } prep_next: @@ -2109,30 +2042,36 @@ out: static inline loff_t f2fs_readpage_limit(struct inode *inode) { if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) - return inode->i_sb->s_maxbytes; + return F2FS_BLK_TO_BYTES(max_file_blocks(inode)); return i_size_read(inode); } -static int f2fs_read_single_page(struct inode *inode, struct page *page, +static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac) +{ + return rac ? REQ_RAHEAD : 0; +} + +static int f2fs_read_single_page(struct inode *inode, struct folio *folio, unsigned nr_pages, struct f2fs_map_blocks *map, struct bio **bio_ret, sector_t *last_block_in_bio, - bool is_readahead) + struct readahead_control *rac) { struct bio *bio = *bio_ret; - const unsigned blocksize = blks_to_bytes(inode, 1); + const unsigned int blocksize = F2FS_BLKSIZE; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; sector_t block_nr; + pgoff_t index = folio_index(folio); int ret = 0; - block_in_file = (sector_t)page_index(page); + block_in_file = (sector_t)index; last_block = block_in_file + nr_pages; - last_block_in_file = bytes_to_blks(inode, - f2fs_readpage_limit(inode) + blocksize - 1); + last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) + + blocksize - 1); if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -2160,26 +2099,24 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, got_it: if ((map->m_flags & F2FS_MAP_MAPPED)) { block_nr = map->m_pblk + block_in_file - map->m_lblk; - SetPageMappedToDisk(page); + folio_set_mappedtodisk(folio); if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { ret = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), - ERROR_INVALID_BLKADDR); goto out; } } else { zero_out: - zero_user_segment(page, 0, PAGE_SIZE); - if (f2fs_need_verity(inode, page->index) && - !fsverity_verify_page(page)) { + folio_zero_segment(folio, 0, folio_size(folio)); + if (f2fs_need_verity(inode, index) && + !fsverity_verify_folio(folio)) { ret = -EIO; goto out; } - if (!PageUptodate(page)) - SetPageUptodate(page); - unlock_page(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + folio_unlock(folio); goto out; } @@ -2189,14 +2126,14 @@ zero_out: */ if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio, *last_block_in_bio, block_nr) || - !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { + !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, page->index, + f2fs_ra_op_flags(rac), index, false); if (IS_ERR(bio)) { ret = PTR_ERR(bio); @@ -2211,7 +2148,7 @@ submit_and_realloc: */ f2fs_wait_on_block_writeback(inode, block_nr); - if (bio_add_page(bio, page, blocksize, 0) < blocksize) + if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); @@ -2226,7 +2163,7 @@ out: #ifdef CONFIG_F2FS_FS_COMPRESSION int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead, bool for_write) + struct readahead_control *rac, bool for_write) { struct dnode_of_data dn; struct inode *inode = cc->inode; @@ -2234,7 +2171,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, struct bio *bio = *bio_ret; unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; sector_t last_block_in_file; - const unsigned blocksize = blks_to_bytes(inode, 1); + const unsigned int blocksize = F2FS_BLKSIZE; struct decompress_io_ctx *dic = NULL; struct extent_info ei = {}; bool from_dnode = true; @@ -2243,25 +2180,28 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); - last_block_in_file = bytes_to_blks(inode, - f2fs_readpage_limit(inode) + blocksize - 1); + last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) + + blocksize - 1); /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { struct page *page = cc->rpages[i]; + struct folio *folio; if (!page) continue; - if ((sector_t)page->index >= last_block_in_file) { - zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); - } else if (!PageUptodate(page)) { + + folio = page_folio(page); + if ((sector_t)folio->index >= last_block_in_file) { + folio_zero_segment(folio, 0, folio_size(folio)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + } else if (!folio_test_uptodate(folio)) { continue; } - unlock_page(page); + folio_unlock(folio); if (for_write) - put_page(page); + folio_put(folio); cc->rpages[i] = NULL; cc->nr_rpages--; } @@ -2321,7 +2261,7 @@ skip_reading_dnode: } for (i = 0; i < cc->nr_cpages; i++) { - struct page *page = dic->cpages[i]; + struct folio *folio = page_folio(dic->cpages[i]); block_t blkaddr; struct bio_post_read_ctx *ctx; @@ -2331,7 +2271,8 @@ skip_reading_dnode: f2fs_wait_on_block_writeback(inode, blkaddr); - if (f2fs_load_compressed_page(sbi, page, blkaddr)) { + if (f2fs_load_compressed_page(sbi, folio_page(folio, 0), + blkaddr)) { if (atomic_dec_and_test(&dic->remaining_pages)) { f2fs_decompress_cluster(dic, true); break; @@ -2341,7 +2282,7 @@ skip_reading_dnode: if (bio && (!page_is_mergeable(sbi, bio, *last_block_in_bio, blkaddr) || - !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { + !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(sbi, bio, DATA); bio = NULL; @@ -2349,8 +2290,8 @@ submit_and_realloc: if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, - page->index, for_write); + f2fs_ra_op_flags(rac), + folio->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); f2fs_decompress_end_io(dic, ret, true); @@ -2360,7 +2301,7 @@ submit_and_realloc: } } - if (bio_add_page(bio, page, blocksize, 0) < blocksize) + if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; ctx = get_post_read_ctx(bio); @@ -2398,7 +2339,7 @@ out: * Major change was from block_size == page_size in f2fs by default. */ static int f2fs_mpage_readpages(struct inode *inode, - struct readahead_control *rac, struct page *page) + struct readahead_control *rac, struct folio *folio) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -2415,6 +2356,7 @@ static int f2fs_mpage_readpages(struct inode *inode, .nr_cpages = 0, }; pgoff_t nc_cluster_idx = NULL_CLUSTER; + pgoff_t index; #endif unsigned nr_pages = rac ? readahead_count(rac) : 1; unsigned max_nr_pages = nr_pages; @@ -2431,64 +2373,63 @@ static int f2fs_mpage_readpages(struct inode *inode, for (; nr_pages; nr_pages--) { if (rac) { - page = readahead_page(rac); - prefetchw(&page->flags); + folio = readahead_folio(rac); + prefetchw(&folio->flags); } #ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_compressed_file(inode)) { - /* there are remained compressed pages, submit them */ - if (!f2fs_cluster_can_merge_page(&cc, page->index)) { - ret = f2fs_read_multi_pages(&cc, &bio, - max_nr_pages, - &last_block_in_bio, - rac != NULL, false); - f2fs_destroy_compress_ctx(&cc, false); - if (ret) - goto set_error_page; - } - if (cc.cluster_idx == NULL_CLUSTER) { - if (nc_cluster_idx == - page->index >> cc.log_cluster_size) { - goto read_single_page; - } - - ret = f2fs_is_compressed_cluster(inode, page->index); - if (ret < 0) - goto set_error_page; - else if (!ret) { - nc_cluster_idx = - page->index >> cc.log_cluster_size; - goto read_single_page; - } - - nc_cluster_idx = NULL_CLUSTER; - } - ret = f2fs_init_compress_ctx(&cc); + index = folio_index(folio); + + if (!f2fs_compressed_file(inode)) + goto read_single_page; + + /* there are remained compressed pages, submit them */ + if (!f2fs_cluster_can_merge_page(&cc, index)) { + ret = f2fs_read_multi_pages(&cc, &bio, + max_nr_pages, + &last_block_in_bio, + rac, false); + f2fs_destroy_compress_ctx(&cc, false); if (ret) goto set_error_page; + } + if (cc.cluster_idx == NULL_CLUSTER) { + if (nc_cluster_idx == index >> cc.log_cluster_size) + goto read_single_page; - f2fs_compress_ctx_add_page(&cc, page); + ret = f2fs_is_compressed_cluster(inode, index); + if (ret < 0) + goto set_error_page; + else if (!ret) { + nc_cluster_idx = + index >> cc.log_cluster_size; + goto read_single_page; + } - goto next_page; + nc_cluster_idx = NULL_CLUSTER; } + ret = f2fs_init_compress_ctx(&cc); + if (ret) + goto set_error_page; + + f2fs_compress_ctx_add_page(&cc, folio); + + goto next_page; read_single_page: #endif - ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, + ret = f2fs_read_single_page(inode, folio, max_nr_pages, &map, &bio, &last_block_in_bio, rac); if (ret) { #ifdef CONFIG_F2FS_FS_COMPRESSION set_error_page: #endif - zero_user_segment(page, 0, PAGE_SIZE); - unlock_page(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_unlock(folio); } #ifdef CONFIG_F2FS_FS_COMPRESSION next_page: #endif - if (rac) - put_page(page); #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { @@ -2497,7 +2438,7 @@ next_page: ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - rac != NULL, false); + rac, false); f2fs_destroy_compress_ctx(&cc, false); } } @@ -2510,22 +2451,21 @@ next_page: static int f2fs_read_data_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = folio->mapping->host; int ret = -EAGAIN; - trace_f2fs_readpage(page, DATA); + trace_f2fs_readpage(folio, DATA); if (!f2fs_is_compress_backend_ready(inode)) { - unlock_page(page); + folio_unlock(folio); return -EOPNOTSUPP; } /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) - ret = f2fs_read_inline_data(inode, page); + ret = f2fs_read_inline_data(inode, folio); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(inode, NULL, page); + ret = f2fs_mpage_readpages(inode, NULL, folio); return ret; } @@ -2650,7 +2590,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_NOQUOTA(inode)) return true; - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return true; /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */ if (f2fs_compressed_file(inode) && @@ -2668,8 +2608,6 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) if (fio) { if (page_private_gcing(fio->page)) return true; - if (page_private_dummy(fio->page)) - return true; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) return true; @@ -2689,28 +2627,28 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) int f2fs_do_write_data_page(struct f2fs_io_info *fio) { - struct page *page = fio->page; - struct inode *inode = page->mapping->host; + struct folio *folio = page_folio(fio->page); + struct inode *inode = folio->mapping->host; struct dnode_of_data dn; struct node_info ni; bool ipu_force = false; + bool atomic_commit; int err = 0; /* Use COW inode to make dnode_of_data for atomic write */ - if (f2fs_is_atomic_file(inode)) + atomic_commit = f2fs_is_atomic_file(inode) && + page_private_atomic(folio_page(folio, 0)); + if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_inplace_update(fio) && - f2fs_lookup_read_extent_cache_block(inode, page->index, + f2fs_lookup_read_extent_cache_block(inode, folio->index, &fio->old_blkaddr)) { if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC_ENHANCE)) { - f2fs_handle_error(fio->sbi, - ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE)) return -EFSCORRUPTED; - } ipu_force = true; fio->need_lock = LOCK_DONE; @@ -2721,7 +2659,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) return -EAGAIN; - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); if (err) goto out; @@ -2729,8 +2667,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { - ClearPageUptodate(page); - clear_page_private_gcing(page); + folio_clear_uptodate(folio); + clear_page_private_gcing(folio_page(folio, 0)); goto out_writepage; } got_it: @@ -2738,12 +2676,11 @@ got_it: !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; - f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); goto out_writepage; } /* wait for GCed page writeback via META_MAPPING */ - if (fio->post_read) + if (fio->meta_gc) f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); /* @@ -2757,7 +2694,7 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_put_dnode(&dn); if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); @@ -2765,12 +2702,11 @@ got_it: if (err) { if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); - if (PageWriteback(page)) - end_page_writeback(page); + folio_end_writeback(folio); } else { set_inode_flag(inode, FI_UPDATE_WRITE); } - trace_f2fs_do_write_data_page(fio->page, IPU); + trace_f2fs_do_write_data_page(folio, IPU); return err; } @@ -2792,15 +2728,17 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); - trace_f2fs_do_write_data_page(page, OPU); + trace_f2fs_do_write_data_page(folio, OPU); set_inode_flag(inode, FI_APPEND_WRITE); + if (atomic_commit) + clear_page_private_atomic(folio_page(folio, 0)); out_writepage: f2fs_put_dnode(&dn); out: @@ -2809,7 +2747,7 @@ out: return err; } -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, @@ -2817,12 +2755,13 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, int compr_blocks, bool allow_balance) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; + struct page *page = folio_page(folio, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long)i_size) >> PAGE_SHIFT; - loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; bool quota_inode = IS_NOQUOTA(inode); @@ -2838,19 +2777,19 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .encrypted_page = NULL, .submitted = 0, .compr_blocks = compr_blocks, - .need_lock = LOCK_RETRY, - .post_read = f2fs_post_read_required(inode) ? 1 : 0, + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY, + .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, .bio = bio, .last_block = last_block, }; - trace_f2fs_writepage(page, DATA); + trace_f2fs_writepage(folio, DATA); /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(page->mapping, -EIO); + mapping_set_error(folio->mapping, -EIO); /* * don't drop any dirty dentry pages for keeping lastest * directory structure. @@ -2868,7 +2807,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index || + if (folio->index < end_index || f2fs_verity_in_progress(inode) || compr_blocks) goto write; @@ -2878,10 +2817,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, * this page does not have to be written to disk. */ offset = i_size & (PAGE_SIZE - 1); - if ((page->index >= end_index + 1) || !offset) + if ((folio->index >= end_index + 1) || !offset) goto out; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); write: /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || quota_inode) { @@ -2911,7 +2850,7 @@ write: err = -EAGAIN; if (f2fs_has_inline_data(inode)) { - err = f2fs_write_inline_data(inode, page); + err = f2fs_write_inline_data(inode, folio); if (!err) goto out; } @@ -2919,6 +2858,7 @@ write: if (err == -EAGAIN) { err = f2fs_do_write_data_page(&fio); if (err == -EAGAIN) { + f2fs_bug_on(sbi, compr_blocks); fio.need_lock = LOCK_REQ; err = f2fs_do_write_data_page(&fio); } @@ -2940,7 +2880,7 @@ done: out: inode_dec_dirty_pages(inode); if (err) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); clear_page_private_gcing(page); } @@ -2950,7 +2890,7 @@ out: f2fs_remove_dirty_inode(inode); submitted = NULL; } - unlock_page(page); + folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && !F2FS_I(inode)->wb_task && allow_balance) f2fs_balance_fs(sbi, need_balance_fs); @@ -2968,7 +2908,7 @@ out: return 0; redirty_out: - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); /* * pageout() in MM translates EAGAIN, so calls handle_write_error() * -> mapping_set_error() -> set_bit(AS_EIO, ...). @@ -2977,29 +2917,30 @@ redirty_out: */ if (!err || wbc->for_reclaim) return AOP_WRITEPAGE_ACTIVATE; - unlock_page(page); + folio_unlock(folio); return err; } static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); #ifdef CONFIG_F2FS_FS_COMPRESSION - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) goto out; if (f2fs_compressed_file(inode)) { - if (f2fs_is_compressed_cluster(inode, page->index)) { - redirty_page_for_writepage(wbc, page); + if (f2fs_is_compressed_cluster(inode, folio->index)) { + folio_redirty_for_writepage(wbc, folio); return AOP_WRITEPAGE_ACTIVATE; } } out: #endif - return f2fs_write_single_data_page(page, NULL, NULL, NULL, + return f2fs_write_single_data_page(folio, NULL, NULL, NULL, wbc, FS_DATA_IO, 0, true); } @@ -3205,11 +3146,12 @@ continue_unlock: #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { folio_get(folio); - f2fs_compress_ctx_add_page(&cc, &folio->page); + f2fs_compress_ctx_add_page(&cc, folio); continue; } #endif - ret = f2fs_write_single_data_page(&folio->page, + submitted = 0; + ret = f2fs_write_single_data_page(folio, &submitted, &bio, &last_block, wbc, io_type, 0, true); if (ret == AOP_WRITEPAGE_ACTIVATE) @@ -3417,11 +3359,11 @@ void f2fs_write_failed(struct inode *inode, loff_t to) } static int prepare_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed) { - struct inode *inode = page->mapping->host; - pgoff_t index = page->index; + struct inode *inode = folio->mapping->host; + pgoff_t index = folio->index; struct dnode_of_data dn; struct page *ipage; bool locked = false; @@ -3458,19 +3400,24 @@ restart: if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { - f2fs_do_read_inline_data(page, ipage); + f2fs_do_read_inline_data(folio, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_page_private_inline(ipage); goto out; } - err = f2fs_convert_inline_page(&dn, page); + err = f2fs_convert_inline_page(&dn, folio_page(folio, 0)); if (err || dn.data_blkaddr != NULL_ADDR) goto out; } if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { + if (IS_DEVICE_ALIASING(inode)) { + err = -ENODATA; + goto out; + } + if (locked) { err = f2fs_reserve_block(&dn, index); goto out; @@ -3557,12 +3504,12 @@ unlock_out: } static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned int len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed, bool *use_cow) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct inode *cow_inode = F2FS_I(inode)->cow_inode; - pgoff_t index = page->index; + pgoff_t index = folio->index; int err = 0; block_t ori_blk_addr = NULL_ADDR; @@ -3600,12 +3547,12 @@ reserve_block: } static int f2fs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, struct page **pagep, void **fsdata) + loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page = NULL; - pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; + struct folio *folio; + pgoff_t index = pos >> PAGE_SHIFT; bool need_balance = false; bool use_cow = false; block_t blkaddr = NULL_ADDR; @@ -3621,7 +3568,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, /* * We should check this at this moment to avoid deadlock on inode page * and #0 page. The locking rule for inline_data conversion should be: - * lock_page(page #0) -> lock_page(inode_page) + * folio_lock(folio #0) -> folio_lock(inode_page) */ if (index != 0) { err = f2fs_convert_inline_inode(inode); @@ -3632,18 +3579,20 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { int ret; + struct page *page; *fsdata = NULL; if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode))) goto repeat; - ret = f2fs_prepare_compress_overwrite(inode, pagep, + ret = f2fs_prepare_compress_overwrite(inode, &page, index, fsdata); if (ret < 0) { err = ret; goto fail; } else if (ret) { + *foliop = page_folio(page); return 0; } } @@ -3651,82 +3600,85 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, repeat: /* - * Do not use grab_cache_page_write_begin() to avoid deadlock due to - * wait_for_stable_page. Will wait that below with our IO control. + * Do not use FGP_STABLE to avoid deadlock. + * Will wait that below with our IO control. */ - page = f2fs_pagecache_get_page(mapping, index, + folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); - if (!page) { - err = -ENOMEM; + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto fail; } /* TODO: cluster can be compressed due to race with .writepage */ - *pagep = page; + *foliop = folio; if (f2fs_is_atomic_file(inode)) - err = prepare_atomic_write_begin(sbi, page, pos, len, + err = prepare_atomic_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance, &use_cow); else - err = prepare_write_begin(sbi, page, pos, len, + err = prepare_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); if (err) - goto fail; + goto put_folio; if (need_balance && !IS_NOQUOTA(inode) && has_not_enough_free_secs(sbi, 0, 0)) { - unlock_page(page); + folio_unlock(folio); f2fs_balance_fs(sbi, true); - lock_page(page); - if (page->mapping != mapping) { - /* The page got truncated from under us */ - f2fs_put_page(page, 1); + folio_lock(folio); + if (folio->mapping != mapping) { + /* The folio got truncated from under us */ + folio_unlock(folio); + folio_put(folio); goto repeat; } } - f2fs_wait_on_page_writeback(page, DATA, false, true); + f2fs_wait_on_page_writeback(&folio->page, DATA, false, true); - if (len == PAGE_SIZE || PageUptodate(page)) + if (len == folio_size(folio) || folio_test_uptodate(folio)) return 0; if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && !f2fs_verity_in_progress(inode)) { - zero_user_segment(page, len, PAGE_SIZE); + folio_zero_segment(folio, len, folio_size(folio)); return 0; } if (blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); } else { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); - goto fail; + goto put_folio; } err = f2fs_submit_page_read(use_cow ? - F2FS_I(inode)->cow_inode : inode, page, - blkaddr, 0, true); + F2FS_I(inode)->cow_inode : inode, + folio, blkaddr, 0, true); if (err) - goto fail; + goto put_folio; - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); + folio_lock(folio); + if (unlikely(folio->mapping != mapping)) { + folio_unlock(folio); + folio_put(folio); goto repeat; } - if (unlikely(!PageUptodate(page))) { + if (unlikely(!folio_test_uptodate(folio))) { err = -EIO; - goto fail; + goto put_folio; } } return 0; +put_folio: + folio_unlock(folio); + folio_put(folio); fail: - f2fs_put_page(page, 1); f2fs_write_failed(inode, pos + len); return err; } @@ -3734,9 +3686,9 @@ fail: static int f2fs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; trace_f2fs_write_end(inode, pos, len, copied); @@ -3745,17 +3697,17 @@ static int f2fs_write_end(struct file *file, * should be PAGE_SIZE. Otherwise, we treat it with zero copied and * let generic_perform_write() try to copy data again through copied=0. */ - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { if (unlikely(copied != len)) copied = 0; else - SetPageUptodate(page); + folio_mark_uptodate(folio); } #ifdef CONFIG_F2FS_FS_COMPRESSION /* overwrite compressed file */ if (f2fs_compressed_file(inode) && fsdata) { - f2fs_compress_write_end(inode, fsdata, page->index, copied); + f2fs_compress_write_end(inode, fsdata, folio->index, copied); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (pos + copied > i_size_read(inode) && @@ -3768,7 +3720,10 @@ static int f2fs_write_end(struct file *file, if (!copied) goto unlock_out; - set_page_dirty(page); + folio_mark_dirty(folio); + + if (f2fs_is_atomic_file(inode)) + set_page_private_atomic(folio_page(folio, 0)); if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { @@ -3778,7 +3733,8 @@ static int f2fs_write_end(struct file *file, pos + copied); } unlock_out: - f2fs_put_page(page, 1); + folio_unlock(folio); + folio_put(folio); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } @@ -3820,7 +3776,7 @@ static bool f2fs_dirty_data_folio(struct address_space *mapping, { struct inode *inode = mapping->host; - trace_f2fs_set_page_dirty(&folio->page, DATA); + trace_f2fs_set_page_dirty(folio, DATA); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); @@ -3905,26 +3861,36 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int blkofs; unsigned int blk_per_sec = BLKS_PER_SEC(sbi); + unsigned int end_blk = start_blk + blkcnt - 1; unsigned int secidx = start_blk / blk_per_sec; - unsigned int end_sec = secidx + blkcnt / blk_per_sec; + unsigned int end_sec; int ret = 0; + if (!blkcnt) + return 0; + end_sec = end_blk / blk_per_sec; + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); set_inode_flag(inode, FI_OPU_WRITE); - for (; secidx < end_sec; secidx++) { + for (; secidx <= end_sec; secidx++) { + unsigned int blkofs_end = secidx == end_sec ? + end_blk % blk_per_sec : blk_per_sec - 1; + f2fs_down_write(&sbi->pin_sem); - f2fs_lock_op(sbi); - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); - f2fs_unlock_op(sbi); + ret = f2fs_allocate_pinning_section(sbi); + if (ret) { + f2fs_up_write(&sbi->pin_sem); + break; + } set_inode_flag(inode, FI_SKIP_WRITES); - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { struct page *page; unsigned int blkidx = secidx * blk_per_sec + blkofs; @@ -3966,15 +3932,14 @@ static int check_swap_activate(struct swap_info_struct *sis, struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - sector_t cur_lblock; - sector_t last_lblock; - sector_t pblock; - sector_t lowest_pblock = -1; - sector_t highest_pblock = 0; + block_t cur_lblock; + block_t last_lblock; + block_t pblock; + block_t lowest_pblock = -1; + block_t highest_pblock = 0; int nr_extents = 0; - unsigned long nr_pblocks; + unsigned int nr_pblocks; unsigned int blks_per_sec = BLKS_PER_SEC(sbi); - unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1; unsigned int not_aligned = 0; int ret = 0; @@ -3983,7 +3948,7 @@ static int check_swap_activate(struct swap_info_struct *sis, * to be very smart. */ cur_lblock = 0; - last_lblock = bytes_to_blks(inode, i_size_read(inode)); + last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode)); while (cur_lblock < last_lblock && cur_lblock < sis->max) { struct f2fs_map_blocks map; @@ -4012,28 +3977,35 @@ retry: pblock = map.m_pblk; nr_pblocks = map.m_len; - if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || - nr_pblocks & sec_blks_mask) { + if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec || + nr_pblocks % blks_per_sec || + !f2fs_valid_pinned_area(sbi, pblock)) { + bool last_extent = false; + not_aligned++; nr_pblocks = roundup(nr_pblocks, blks_per_sec); if (cur_lblock + nr_pblocks > sis->max) nr_pblocks -= blks_per_sec; + /* this extent is last one */ if (!nr_pblocks) { - /* this extent is last one */ - nr_pblocks = map.m_len; - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); - goto next; + nr_pblocks = last_lblock - cur_lblock; + last_extent = true; } ret = f2fs_migrate_blocks(inode, cur_lblock, nr_pblocks); - if (ret) + if (ret) { + if (ret == -ENOENT) + ret = -EINVAL; goto out; - goto retry; + } + + if (!last_extent) + goto retry; } -next: + if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; @@ -4059,7 +4031,6 @@ next: cur_lblock = 1; /* force Empty message */ sis->max = cur_lblock; sis->pages = cur_lblock - 1; - sis->highest_bit = cur_lblock - 1; out: if (not_aligned) f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)", @@ -4071,17 +4042,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { struct inode *inode = file_inode(file); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + if (f2fs_readonly(sbi->sb)) return -EROFS; - if (f2fs_lfs_mode(F2FS_I_SB(inode))) { - f2fs_err(F2FS_I_SB(inode), - "Swapfile not supported in LFS mode"); + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { + f2fs_err(sbi, "Swapfile not supported in LFS mode"); return -EINVAL; } @@ -4092,6 +4063,10 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (!f2fs_disable_compressed_file(inode)) return -EINVAL; + ret = filemap_fdatawrite(inode->i_mapping); + if (ret < 0) + return ret; + f2fs_precache_extents(inode); ret = check_swap_activate(sis, file, span); @@ -4100,7 +4075,7 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, stat_inc_swapfile_inode(inode); set_inode_flag(inode, FI_PIN_FILE); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); return ret; } @@ -4139,13 +4114,13 @@ const struct address_space_operations f2fs_dblock_aops = { .swap_deactivate = f2fs_swap_deactivate, }; -void f2fs_clear_page_cache_dirty_tag(struct page *page) +void f2fs_clear_page_cache_dirty_tag(struct folio *folio) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio->mapping; unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - __xa_clear_mark(&mapping->i_pages, page_index(page), + __xa_clear_mark(&mapping->i_pages, folio->index, PAGECACHE_TAG_DIRTY); xa_unlock_irqrestore(&mapping->i_pages, flags); } @@ -4215,10 +4190,11 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, pgoff_t next_pgofs = 0; int err; - map.m_lblk = bytes_to_blks(inode, offset); - map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1; + map.m_lblk = F2FS_BYTES_TO_BLK(offset); + map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1; map.m_next_pgofs = &next_pgofs; - map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), + inode->i_write_hint); if (flags & IOMAP_WRITE) map.m_may_create = true; @@ -4226,7 +4202,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (err) return err; - iomap->offset = blks_to_bytes(inode, map.m_lblk); + iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk); /* * When inline encryption is enabled, sometimes I/O to an encrypted file @@ -4239,23 +4215,32 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, * We should never see delalloc or compressed extents here based on * prior flushing and checks. */ - if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR)) - return -EINVAL; if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR)) return -EINVAL; - if (map.m_pblk != NULL_ADDR) { - iomap->length = blks_to_bytes(inode, map.m_len); + if (map.m_flags & F2FS_MAP_MAPPED) { + if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR)) + return -EINVAL; + + iomap->length = F2FS_BLK_TO_BYTES(map.m_len); iomap->type = IOMAP_MAPPED; iomap->flags |= IOMAP_F_MERGED; iomap->bdev = map.m_bdev; - iomap->addr = blks_to_bytes(inode, map.m_pblk); + iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk); } else { if (flags & IOMAP_WRITE) return -ENOTBLK; - iomap->length = blks_to_bytes(inode, next_pgofs) - - iomap->offset; - iomap->type = IOMAP_HOLE; + + if (map.m_pblk == NULL_ADDR) { + iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) - + iomap->offset; + iomap->type = IOMAP_HOLE; + } else if (map.m_pblk == NEW_ADDR) { + iomap->length = F2FS_BLK_TO_BYTES(map.m_len); + iomap->type = IOMAP_UNWRITTEN; + } else { + f2fs_bug_on(F2FS_I_SB(inode), 1); + } iomap->addr = IOMAP_NULL_ADDR; } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index fdbf994f1271..468828288a4a 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -41,7 +41,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi) total_vblocks = 0; blks_per_sec = CAP_BLKS_PER_SEC(sbi); hblks_per_sec = blks_per_sec / 2; - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { vblocks = get_valid_blocks(sbi, segno, true); dist = abs(vblocks - hblks_per_sec); bimodal += dist * dist; @@ -60,6 +60,70 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi) } #ifdef CONFIG_DEBUG_FS +static void update_multidevice_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + struct f2fs_dev_stats *dev_stats = si->dev_stats; + int i, j; + + if (!f2fs_is_multi_device(sbi)) + return; + + memset(dev_stats, 0, sizeof(struct f2fs_dev_stats) * sbi->s_ndevs); + for (i = 0; i < sbi->s_ndevs; i++) { + unsigned int start_segno, end_segno; + block_t start_blk, end_blk; + + if (i == 0) { + start_blk = MAIN_BLKADDR(sbi); + end_blk = FDEV(i).end_blk + 1 - SEG0_BLKADDR(sbi); + } else { + start_blk = FDEV(i).start_blk; + end_blk = FDEV(i).end_blk + 1; + } + + start_segno = GET_SEGNO(sbi, start_blk); + end_segno = GET_SEGNO(sbi, end_blk); + + for (j = start_segno; j < end_segno; j++) { + unsigned int seg_blks, sec_blks; + + seg_blks = get_seg_entry(sbi, j)->valid_blocks; + + /* update segment stats */ + if (IS_CURSEG(sbi, j)) + dev_stats[i].devstats[0][DEVSTAT_INUSE]++; + else if (seg_blks == BLKS_PER_SEG(sbi)) + dev_stats[i].devstats[0][DEVSTAT_FULL]++; + else if (seg_blks != 0) + dev_stats[i].devstats[0][DEVSTAT_DIRTY]++; + else if (!test_bit(j, FREE_I(sbi)->free_segmap)) + dev_stats[i].devstats[0][DEVSTAT_FREE]++; + else + dev_stats[i].devstats[0][DEVSTAT_PREFREE]++; + + if (!__is_large_section(sbi) || + (j % SEGS_PER_SEC(sbi)) != 0) + continue; + + sec_blks = get_sec_entry(sbi, j)->valid_blocks; + + /* update section stats */ + if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, j))) + dev_stats[i].devstats[1][DEVSTAT_INUSE]++; + else if (sec_blks == BLKS_PER_SEC(sbi)) + dev_stats[i].devstats[1][DEVSTAT_FULL]++; + else if (sec_blks != 0) + dev_stats[i].devstats[1][DEVSTAT_DIRTY]++; + else if (!test_bit(GET_SEC_FROM_SEG(sbi, j), + FREE_I(sbi)->free_secmap)) + dev_stats[i].devstats[1][DEVSTAT_FREE]++; + else + dev_stats[i].devstats[1][DEVSTAT_PREFREE]++; + } + } +} + static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); @@ -135,7 +199,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->cur_ckpt_time = sbi->cprc_info.cur_time; si->peak_ckpt_time = sbi->cprc_info.peak_time; spin_unlock(&sbi->cprc_info.stat_lock); - si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; + si->total_count = BLKS_TO_SEGS(sbi, (int)sbi->user_block_count); si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); si->valid_count = valid_user_blocks(sbi); @@ -176,11 +240,10 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->io_skip_bggc = sbi->io_skip_bggc; si->other_skip_bggc = sbi->other_skip_bggc; - si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) + si->util_free = (int)(BLKS_TO_SEGS(sbi, free_user_blocks(sbi))) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) / 2; - si->util_valid = (int)(written_block_count(sbi) >> - sbi->log_blocks_per_seg) + si->util_valid = (int)(BLKS_TO_SEGS(sbi, written_block_count(sbi))) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) / 2; si->util_invalid = 50 - si->util_free - si->util_valid; @@ -208,13 +271,15 @@ static void update_general_status(struct f2fs_sb_info *sbi) if (!blks) continue; - if (blks == sbi->blocks_per_seg) + if (blks == BLKS_PER_SEG(sbi)) si->full_seg[type]++; else si->dirty_seg[type]++; si->valid_blks[type] += blks; } + update_multidevice_stats(sbi); + for (i = 0; i < MAX_CALL_TYPE; i++) si->cp_call_count[i] = atomic_read(&sbi->cp_call_count[i]); @@ -276,7 +341,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); - si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); + si->base_mem += F2FS_BLK_TO_BYTES(NM_I(sbi)->nat_bits_blocks); si->base_mem += NM_I(sbi)->nat_blocks * f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK); si->base_mem += NM_I(sbi)->nat_blocks / 8; @@ -499,6 +564,36 @@ static int stat_show(struct seq_file *s, void *v) si->dirty_count); seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", si->prefree_count, si->free_segs, si->free_secs); + if (f2fs_is_multi_device(sbi)) { + seq_puts(s, "Multidevice stats:\n"); + seq_printf(s, " [seg: %8s %8s %8s %8s %8s]", + "inuse", "dirty", "full", "free", "prefree"); + if (__is_large_section(sbi)) + seq_printf(s, " [sec: %8s %8s %8s %8s %8s]\n", + "inuse", "dirty", "full", "free", "prefree"); + else + seq_puts(s, "\n"); + + for (i = 0; i < sbi->s_ndevs; i++) { + seq_printf(s, " #%-2d %8u %8u %8u %8u %8u", i, + si->dev_stats[i].devstats[0][DEVSTAT_INUSE], + si->dev_stats[i].devstats[0][DEVSTAT_DIRTY], + si->dev_stats[i].devstats[0][DEVSTAT_FULL], + si->dev_stats[i].devstats[0][DEVSTAT_FREE], + si->dev_stats[i].devstats[0][DEVSTAT_PREFREE]); + if (!__is_large_section(sbi)) { + seq_puts(s, "\n"); + continue; + } + seq_printf(s, " %8u %8u %8u %8u %8u\n", + si->dev_stats[i].devstats[1][DEVSTAT_INUSE], + si->dev_stats[i].devstats[1][DEVSTAT_DIRTY], + si->dev_stats[i].devstats[1][DEVSTAT_FULL], + si->dev_stats[i].devstats[1][DEVSTAT_FREE], + si->dev_stats[i].devstats[1][DEVSTAT_PREFREE]); + } + seq_puts(s, "\n"); + } seq_printf(s, "CP calls: %d (BG: %d)\n", si->cp_call_count[TOTAL_CALL], si->cp_call_count[BACKGROUND]); @@ -599,9 +694,9 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); - seq_printf(s, " - datas: %4d in files:%4d\n", + seq_printf(s, " - data: %4d in files:%4d\n", si->ndirty_data, si->ndirty_files); - seq_printf(s, " - quota datas: %4d in quota files:%4d\n", + seq_printf(s, " - quota data: %4d in quota files:%4d\n", si->ndirty_qdata, si->nquota_files); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); @@ -666,6 +761,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + struct f2fs_dev_stats *dev_stats; unsigned long flags; int i; @@ -673,6 +769,15 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) if (!si) return -ENOMEM; + dev_stats = f2fs_kzalloc(sbi, sizeof(struct f2fs_dev_stats) * + sbi->s_ndevs, GFP_KERNEL); + if (!dev_stats) { + kfree(si); + return -ENOMEM; + } + + si->dev_stats = dev_stats; + si->all_area_segs = le32_to_cpu(raw_super->segment_count); si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); @@ -725,6 +830,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) list_del(&si->stat_list); raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + kfree(si->dev_stats); kfree(si); } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 042593aed1ec..54dd52de7269 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -5,7 +5,7 @@ * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/signal.h> @@ -42,35 +42,49 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } +#if IS_ENABLED(CONFIG_UNICODE) /* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */ int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname) { -#if IS_ENABLED(CONFIG_UNICODE) struct super_block *sb = dir->i_sb; + unsigned char *buf; + int len; if (IS_CASEFOLDED(dir) && !is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) { - fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab, - GFP_NOFS, false, F2FS_SB(sb)); - if (!fname->cf_name.name) + buf = f2fs_kmem_cache_alloc(f2fs_cf_name_slab, + GFP_NOFS, false, F2FS_SB(sb)); + if (!buf) return -ENOMEM; - fname->cf_name.len = utf8_casefold(sb->s_encoding, - fname->usr_fname, - fname->cf_name.name, - F2FS_NAME_LEN); - if ((int)fname->cf_name.len <= 0) { - kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); - fname->cf_name.name = NULL; + + len = utf8_casefold(sb->s_encoding, fname->usr_fname, + buf, F2FS_NAME_LEN); + if (len <= 0) { + kmem_cache_free(f2fs_cf_name_slab, buf); if (sb_has_strict_encoding(sb)) return -EINVAL; /* fall back to treating name as opaque byte sequence */ + return 0; } + fname->cf_name.name = buf; + fname->cf_name.len = len; } -#endif + return 0; } +void f2fs_free_casefolded_name(struct f2fs_filename *fname) +{ + unsigned char *buf = (unsigned char *)fname->cf_name.name; + + if (buf) { + kmem_cache_free(f2fs_cf_name_slab, buf); + fname->cf_name.name = NULL; + } +} +#endif /* CONFIG_UNICODE */ + static int __f2fs_setup_filename(const struct inode *dir, const struct fscrypt_name *crypt_name, struct f2fs_filename *fname) @@ -142,12 +156,7 @@ void f2fs_free_filename(struct f2fs_filename *fname) kfree(fname->crypto_buf.name); fname->crypto_buf.name = NULL; #endif -#if IS_ENABLED(CONFIG_UNICODE) - if (fname->cf_name.name) { - kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); - fname->cf_name.name = NULL; - } -#endif + f2fs_free_casefolded_name(fname); } static unsigned long dir_block_index(unsigned int level, @@ -157,7 +166,8 @@ static unsigned long dir_block_index(unsigned int level, unsigned long bidx = 0; for (i = 0; i < level; i++) - bidx += dir_buckets(i, dir_level) * bucket_blocks(i); + bidx += mul_u32_u32(dir_buckets(i, dir_level), + bucket_blocks(i)); bidx += idx * bucket_blocks(level); return bidx; } @@ -165,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level, static struct f2fs_dir_entry *find_in_block(struct inode *dir, struct page *dentry_page, const struct f2fs_filename *fname, - int *max_slots) + int *max_slots, + bool use_hash) { struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; @@ -173,60 +184,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(dir, &d, dentry_blk); - return f2fs_find_target_dentry(&d, fname, max_slots); -} - -#if IS_ENABLED(CONFIG_UNICODE) -/* - * Test whether a case-insensitive directory entry matches the filename - * being searched for. - * - * Returns 1 for a match, 0 for no match, and -errno on an error. - */ -static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, - const u8 *de_name, u32 de_name_len) -{ - const struct super_block *sb = dir->i_sb; - const struct unicode_map *um = sb->s_encoding; - struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); - struct qstr entry = QSTR_INIT(de_name, de_name_len); - int res; - - if (IS_ENCRYPTED(dir)) { - const struct fscrypt_str encrypted_name = - FSTR_INIT((u8 *)de_name, de_name_len); - - if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir))) - return -EINVAL; - - decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); - if (!decrypted_name.name) - return -ENOMEM; - res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name, - &decrypted_name); - if (res < 0) - goto out; - entry.name = decrypted_name.name; - entry.len = decrypted_name.len; - } - - res = utf8_strncasecmp_folded(um, name, &entry); - /* - * In strict mode, ignore invalid names. In non-strict mode, - * fall back to treating them as opaque byte sequences. - */ - if (res < 0 && !sb_has_strict_encoding(sb)) { - res = name->len == entry.len && - memcmp(name->name, entry.name, name->len) == 0; - } else { - /* utf8_strncasecmp_folded returns 0 on match */ - res = (res == 0); - } -out: - kfree(decrypted_name.name); - return res; + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); } -#endif /* CONFIG_UNICODE */ static inline int f2fs_match_name(const struct inode *dir, const struct f2fs_filename *fname, @@ -235,11 +194,11 @@ static inline int f2fs_match_name(const struct inode *dir, struct fscrypt_name f; #if IS_ENABLED(CONFIG_UNICODE) - if (fname->cf_name.name) { - struct qstr cf = FSTR_TO_QSTR(&fname->cf_name); + if (fname->cf_name.name) + return generic_ci_match(dir, fname->usr_fname, + &fname->cf_name, + de_name, de_name_len); - return f2fs_match_ci_name(dir, &cf, de_name, de_name_len); - } #endif f.usr_fname = fname->usr_fname; f.disk_name = fname->disk_name; @@ -250,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir, } struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots) + const struct f2fs_filename *fname, int *max_slots, + bool use_hash) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; @@ -273,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, continue; } - if (de->hash_code == fname->hash) { + if (!use_hash || de->hash_code == fname->hash) { res = f2fs_match_name(d->inode, fname, d->filename[bit_pos], le16_to_cpu(de->name_len)); @@ -300,11 +260,12 @@ found: static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, const struct f2fs_filename *fname, - struct page **res_page) + struct page **res_page, + bool use_hash) { int s = GET_DENTRY_SLOTS(fname->disk_name.len); unsigned int nbucket, nblock; - unsigned int bidx, end_block; + unsigned int bidx, end_block, bucket_no; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; pgoff_t next_pgofs; @@ -314,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; + +start_find_bucket: bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, - le32_to_cpu(fname->hash) % nbucket); + bucket_no); end_block = bidx + nblock; while (bidx < end_block) { @@ -332,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } } - de = find_in_block(dir, dentry_page, fname, &max_slots); + de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash); if (IS_ERR(de)) { *res_page = ERR_CAST(de); de = NULL; @@ -349,12 +313,18 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, bidx++; } - if (!de && room && F2FS_I(dir)->chash != fname->hash) { - F2FS_I(dir)->chash = fname->hash; - F2FS_I(dir)->clevel = level; - } + if (de) + return de; - return de; + if (likely(use_hash)) { + if (room && F2FS_I(dir)->chash != fname->hash) { + F2FS_I(dir)->chash = fname->hash; + F2FS_I(dir)->clevel = level; + } + } else if (++bucket_no < nbucket) { + goto start_find_bucket; + } + return NULL; } struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, @@ -365,11 +335,15 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, struct f2fs_dir_entry *de = NULL; unsigned int max_depth; unsigned int level; + bool use_hash = true; *res_page = NULL; +#if IS_ENABLED(CONFIG_UNICODE) +start_find_entry: +#endif if (f2fs_has_inline_dentry(dir)) { - de = f2fs_find_in_inline_dir(dir, fname, res_page); + de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash); goto out; } @@ -385,11 +359,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, } for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, fname, res_page); + de = find_in_level(dir, level, fname, res_page, use_hash); if (de || IS_ERR(*res_page)) break; } + out: +#if IS_ENABLED(CONFIG_UNICODE) + if (IS_CASEFOLDED(dir) && !de && use_hash) { + use_hash = false; + goto start_find_entry; + } +#endif /* This is to increase the speed of f2fs_create */ if (!de) F2FS_I(dir)->task = current; @@ -830,13 +811,14 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, return err; } -int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, + struct f2fs_filename *fname) { struct page *page; int err = 0; f2fs_down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, NULL, NULL); + page = f2fs_init_inode_metadata(inode, dir, fname, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -883,6 +865,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + pgoff_t index = page_folio(page)->index; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); @@ -908,8 +891,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); if (bit_pos == NR_DENTRY_IN_BLOCK && - !f2fs_truncate_hole(dir, page->index, page->index + 1)) { - f2fs_clear_page_cache_dirty_tag(page); + !f2fs_truncate_hole(dir, index, index + 1)) { + f2fs_clear_page_cache_dirty_tag(page_folio(page)); clear_page_dirty_for_io(page); ClearPageUptodate(page); clear_page_private_all(page); @@ -995,9 +978,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, de = &d->dentry[bit_pos]; if (de->name_len == 0) { if (found_valid_dirent || !bit_pos) { - printk_ratelimited( - "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", - KERN_WARNING, sbi->sb->s_id, + f2fs_warn_ratelimited(sbi, + "invalid namelen(0), ino:%u, run fsck to fix.", le32_to_cpu(de->ino)); set_sbi_flag(sbi, SBI_NEED_FSCK); } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index ad8dfac73bd4..347b3b647834 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -19,37 +19,56 @@ #include "node.h" #include <trace/events/f2fs.h> -bool sanity_check_extent_cache(struct inode *inode) +bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); - struct extent_tree *et = fi->extent_tree[EX_READ]; - struct extent_info *ei; - - if (!et) - return true; + struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; + struct extent_info ei; + int devi; - ei = &et->largest; - if (!ei->len) - return true; + get_read_extent_info(&ei, i_ext); - /* Let's drop, if checkpoint got corrupted. */ - if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) { - ei->len = 0; - et->largest_updated = true; + if (!ei.len) return true; - } - if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) || - !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, + if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) || + !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1, DATA_GENERIC_ENHANCE)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", __func__, inode->i_ino, - ei->blk, ei->fofs, ei->len); + ei.blk, ei.fofs, ei.len); return false; } - return true; + + if (!IS_DEVICE_ALIASING(inode)) + return true; + + for (devi = 0; devi < sbi->s_ndevs; devi++) { + if (FDEV(devi).start_blk != ei.blk || + FDEV(devi).end_blk != ei.blk + ei.len - 1) + continue; + + if (devi == 0) { + f2fs_warn(sbi, + "%s: inode (ino=%lx) is an alias of meta device", + __func__, inode->i_ino); + return false; + } + + if (bdev_is_zoned(FDEV(devi).bdev)) { + f2fs_warn(sbi, + "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] maps to zoned block device", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; + } + return true; + } + + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] is inconsistent w/ any devices", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; } static void __set_extent_info(struct extent_info *ei, @@ -87,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) static bool __may_extent_tree(struct inode *inode, enum extent_type type) { + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) + return true; + /* * for recovered files during mount do not create extents * if shrinker is not registered. @@ -357,27 +379,28 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode, } static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et) + struct extent_tree *et, unsigned int nr_shrink) { struct rb_node *node, *next; struct extent_node *en; - unsigned int count = atomic_read(&et->node_cnt); + unsigned int count; node = rb_first_cached(&et->root); - while (node) { + + for (count = 0; node && count < nr_shrink; count++) { next = rb_next(node); en = rb_entry(node, struct extent_node, rb_node); __release_extent_node(sbi, et, en); node = next; } - return count - atomic_read(&et->node_cnt); + return count; } static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - if (fofs < et->largest.fofs + et->largest.len && + if (fofs < (pgoff_t)et->largest.fofs + et->largest.len && fofs + len > et->largest.fofs) { et->largest.len = 0; et->largest_updated = true; @@ -395,24 +418,27 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ - if (i_ext && i_ext->len) { + if (i_ext->len) { f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; set_page_dirty(ipage); } - goto out; + set_inode_flag(inode, FI_NO_EXTENT); + return; } et = __grab_extent_tree(inode, EX_READ); - if (!i_ext || !i_ext->len) - goto out; - get_read_extent_info(&ei, i_ext); write_lock(&et->lock); - if (atomic_read(&et->node_cnt)) - goto unlock_out; + if (atomic_read(&et->node_cnt) || !ei.len) + goto skip; + + if (IS_DEVICE_ALIASING(inode)) { + et->largest = ei; + goto skip; + } en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); @@ -424,11 +450,13 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) list_add_tail(&en->list, &eti->extent_list); spin_unlock(&eti->extent_lock); } -unlock_out: +skip: + /* Let's drop, if checkpoint got corrupted. */ + if (f2fs_cp_error(sbi)) { + et->largest.len = 0; + et->largest_updated = true; + } write_unlock(&et->lock); -out: - if (!F2FS_I(inode)->extent_tree[EX_READ]) - set_inode_flag(inode, FI_NO_EXTENT); } void f2fs_init_age_extent_tree(struct inode *inode) @@ -467,13 +495,18 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, if (type == EX_READ && et->largest.fofs <= pgofs && - et->largest.fofs + et->largest.len > pgofs) { + (pgoff_t)et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; stat_inc_largest_node_hit(sbi); goto out; } + if (IS_DEVICE_ALIASING(inode)) { + ret = false; + goto out; + } + en = __lookup_extent_node(&et->root, et->cached_en, pgofs); if (!en) goto out; @@ -590,6 +623,30 @@ do_insert: return en; } +static unsigned int __destroy_extent_node(struct inode *inode, + enum extent_type type) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; + unsigned int nr_shrink = type == EX_READ ? + READ_EXTENT_CACHE_SHRINK_NUMBER : + AGE_EXTENT_CACHE_SHRINK_NUMBER; + unsigned int node_cnt = 0; + + if (!et || !atomic_read(&et->node_cnt)) + return 0; + + while (atomic_read(&et->node_cnt)) { + write_lock(&et->lock); + node_cnt += __free_extent_tree(sbi, et, nr_shrink); + write_unlock(&et->lock); + } + + f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); + + return node_cnt; +} + static void __update_extent_tree_range(struct inode *inode, struct extent_info *tei, enum extent_type type) { @@ -660,7 +717,9 @@ static void __update_extent_tree_range(struct inode *inode, } if (end < org_end && (type != EX_READ || - org_end - end >= F2FS_MIN_EXTENT_LEN)) { + (org_end - end >= F2FS_MIN_EXTENT_LEN && + atomic_read(&et->node_cnt) < + sbi->max_read_extent_count))) { if (parts) { __set_extent_info(&ei, end, org_end - end, @@ -728,9 +787,6 @@ static void __update_extent_tree_range(struct inode *inode, } } - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - __free_extent_tree(sbi, et); - if (et->largest_updated) { et->largest_updated = false; updated = true; @@ -748,6 +804,9 @@ update_age_extent_cache: out_read_extent_cache: write_unlock(&et->lock); + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + __destroy_extent_node(inode, EX_READ); + if (updated) f2fs_mark_inode_dirty_sync(inode, true); } @@ -856,10 +915,8 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, goto out; if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { - f2fs_bug_on(sbi, 1); + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) return -EINVAL; - } out: /* * init block age with zero, this can happen when the block age extent @@ -912,10 +969,14 @@ static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink list_for_each_entry_safe(et, next, &eti->zombie_list, list) { if (atomic_read(&et->node_cnt)) { write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et); + node_cnt += __free_extent_tree(sbi, et, + nr_shrink - node_cnt - tree_cnt); write_unlock(&et->lock); } - f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); + + if (atomic_read(&et->node_cnt)) + goto unlock_out; + list_del_init(&et->list); radix_tree_delete(&eti->extent_tree_root, et->ino); kmem_cache_free(extent_tree_slab, et); @@ -1054,23 +1115,6 @@ unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); } -static unsigned int __destroy_extent_node(struct inode *inode, - enum extent_type type) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; - unsigned int node_cnt = 0; - - if (!et || !atomic_read(&et->node_cnt)) - return 0; - - write_lock(&et->lock); - node_cnt = __free_extent_tree(sbi, et); - write_unlock(&et->lock); - - return node_cnt; -} - void f2fs_destroy_extent_node(struct inode *inode) { __destroy_extent_node(inode, EX_READ); @@ -1079,7 +1123,6 @@ void f2fs_destroy_extent_node(struct inode *inode) static void __drop_extent_tree(struct inode *inode, enum extent_type type) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; bool updated = false; @@ -1087,7 +1130,6 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type) return; write_lock(&et->lock); - __free_extent_tree(sbi, et); if (type == EX_READ) { set_inode_flag(inode, FI_NO_EXTENT); if (et->largest.len) { @@ -1096,6 +1138,9 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type) } } write_unlock(&et->lock); + + __destroy_extent_node(inode, type); + if (updated) f2fs_mark_inode_dirty_sync(inode, true); } @@ -1169,6 +1214,7 @@ void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; sbi->last_age_weight = LAST_AGE_WEIGHT; + sbi->max_read_extent_count = DEF_MAX_READ_EXTENT_COUNT; } int __init f2fs_create_extent_cache(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 65294e3b0bef..1afa7be16e7d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -11,7 +11,6 @@ #include <linux/uio.h> #include <linux/types.h> #include <linux/page-flags.h> -#include <linux/buffer_head.h> #include <linux/slab.h> #include <linux/crc32.h> #include <linux/magic.h> @@ -24,7 +23,7 @@ #include <linux/blkdev.h> #include <linux/quotaops.h> #include <linux/part_stat.h> -#include <crypto/hash.h> +#include <linux/rw_hint.h> #include <linux/fscrypt.h> #include <linux/fsverity.h> @@ -60,7 +59,9 @@ enum { FAULT_SLAB_ALLOC, FAULT_DQUOT_INIT, FAULT_LOCK_OP, - FAULT_BLKADDR, + FAULT_BLKADDR_VALIDITY, + FAULT_BLKADDR_CONSISTENCE, + FAULT_NO_SEGMENT, FAULT_MAX, }; @@ -69,12 +70,17 @@ enum { struct f2fs_fault_info { atomic_t inject_ops; - unsigned int inject_rate; + int inject_rate; unsigned int inject_type; }; extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type)) + +/* maximum retry count for injected failure */ +#define DEFAULT_FAILURE_RETRY_COUNT 8 +#else +#define DEFAULT_FAILURE_RETRY_COUNT 1 #endif /* @@ -126,6 +132,12 @@ typedef u32 nid_t; #define COMPRESS_EXT_NUM 16 +enum blkzone_allocation_policy { + BLKZONE_ALLOC_PRIOR_SEQ, /* Prioritize writing to sequential zones */ + BLKZONE_ALLOC_ONLY_SEQ, /* Only allow writing to sequential zones */ + BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */ +}; + /* * An implementation of an rwsem that is explicitly unfair to readers. This * prevents priority inversion when a low-priority reader acquires the read lock @@ -142,7 +154,6 @@ struct f2fs_rwsem { struct f2fs_mount_info { unsigned int opt; - int write_io_size_bits; /* Write IO size bits */ block_t root_reserved_blocks; /* root reserved blocks */ kuid_t s_resuid; /* reserved blocks for uid */ kgid_t s_resgid; /* reserved blocks for gid */ @@ -201,6 +212,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_CASEFOLD 0x00001000 #define F2FS_FEATURE_COMPRESSION 0x00002000 #define F2FS_FEATURE_RO 0x00004000 +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -278,6 +290,7 @@ enum { APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ TRANS_DIR_INO, /* for transactions dir ino list */ + XATTR_DIR_INO, /* for xattr updated dir ino list */ FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; @@ -524,7 +537,7 @@ struct f2fs_filename { * internal operation where usr_fname is also NULL. In all these cases * we fall back to treating the name as an opaque byte sequence. */ - struct fscrypt_str cf_name; + struct qstr cf_name; #endif }; @@ -621,6 +634,9 @@ enum { #define DEF_HOT_DATA_AGE_THRESHOLD 262144 #define DEF_WARM_DATA_AGE_THRESHOLD 2621440 +/* default max read extent count per inode */ +#define DEF_MAX_READ_EXTENT_COUNT 10240 + /* extent cache type */ enum extent_type { EX_READ, @@ -758,11 +774,6 @@ enum { #define DEF_DIR_LEVEL 0 -enum { - GC_FAILURE_PIN, - MAX_GC_FAILURE -}; - /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -782,7 +793,6 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_SKIP_WRITES, /* should skip data page writeback */ FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ @@ -800,7 +810,9 @@ enum { FI_ALIGNED_WRITE, /* enable aligned write */ FI_COW_FILE, /* indicate COW file */ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ + FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */ FI_ATOMIC_REPLACE, /* indicate atomic replace */ + FI_OPENED_FILE, /* indicate file has been opened */ FI_MAX, /* max flag, never be used */ }; @@ -809,9 +821,10 @@ struct f2fs_inode_info { unsigned long i_flags; /* keep an inode flags for ioctl */ unsigned char i_advise; /* use to give file attribute hints */ unsigned char i_dir_level; /* use for dentry level for large dir */ - unsigned int i_current_depth; /* only for directory depth */ - /* for gc failure statistic */ - unsigned int i_gc_failures[MAX_GC_FAILURE]; + union { + unsigned int i_current_depth; /* only for directory depth */ + unsigned short i_gc_failures; /* for gc failure statistic */ + }; unsigned int i_pino; /* parent inode number */ umode_t i_acl_mode; /* keep file acl mode temporarily */ @@ -829,7 +842,7 @@ struct f2fs_inode_info { spinlock_t i_size_lock; /* protect last_disk_size */ #ifdef CONFIG_QUOTA - struct dquot *i_dquot[MAXQUOTAS]; + struct dquot __rcu *i_dquot[MAXQUOTAS]; /* quota space reservation, managed internally by quota code */ qsize_t i_reserved_quota; @@ -839,7 +852,11 @@ struct f2fs_inode_info { struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; /* cached extent_tree entry */ - struct inode *cow_inode; /* copy-on-write inode for atomic write */ + union { + struct inode *cow_inode; /* copy-on-write inode for atomic write */ + struct inode *atomic_inode; + /* point to atomic_inode, available only for cow_inode */ + }; /* avoid racing between foreground op and gc */ struct f2fs_rwsem i_gc_rwsem[2]; @@ -1004,7 +1021,7 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, #define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) #define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE) -enum { +enum log_type { CURSEG_HOT_DATA = 0, /* directory entry blocks */ CURSEG_WARM_DATA, /* data blocks */ CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ @@ -1049,7 +1066,6 @@ struct f2fs_sm_info { unsigned int segment_count; /* total # of segments */ unsigned int main_segments; /* # of segments in main area */ unsigned int reserved_segments; /* # of reserved segments */ - unsigned int additional_reserved_segments;/* reserved segs for IO align feature */ unsigned int ovp_segments; /* # of overprovision segments */ /* a threshold to reclaim prefree segments */ @@ -1080,7 +1096,8 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(p, f) \ + (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -1110,6 +1127,7 @@ enum count_type { * ... Only can be used with META. */ #define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type)) +#define PAGE_TYPE_ON_MAIN(type) ((type) == DATA || (type) == NODE) enum page_type { DATA = 0, NODE = 1, /* should not change this */ @@ -1145,6 +1163,7 @@ enum cp_reason_type { CP_FASTBOOT_MODE, CP_SPEC_LOG_NUM, CP_RECOVER_DIR, + CP_XATTR_DIR, }; enum iostat_type { @@ -1204,9 +1223,8 @@ struct f2fs_io_info { unsigned int submitted:1; /* indicate IO submission */ unsigned int in_list:1; /* indicate fio is in io_list */ unsigned int is_por:1; /* indicate IO is from recovery or not */ - unsigned int retry:1; /* need to reallocate block address */ unsigned int encrypted:1; /* indicate file is encrypted */ - unsigned int post_read:1; /* require post read */ + unsigned int meta_gc:1; /* require meta inode GC */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ @@ -1239,7 +1257,7 @@ struct f2fs_bio_info { #define FDEV(i) (sbi->devs[i]) #define RDEV(i) (raw_super->devs[i]) struct f2fs_dev_info { - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct block_device *bdev; char path[MAX_PATH_LEN]; unsigned int total_segments; @@ -1284,6 +1302,7 @@ struct f2fs_gc_control { bool no_bg_gc; /* check the space and stop bg_gc */ bool should_migrate_blocks; /* should migrate blocks */ bool err_gc_skipped; /* return EAGAIN if GC skipped */ + bool one_time; /* require one time GC in one migration unit */ unsigned int nr_free_secs; /* # of free sections to do GC */ }; @@ -1406,10 +1425,10 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr); * Layout A: lowest bit should be 1 * | bit0 = 1 | bit1 | bit2 | ... | bit MAX | private data .... | * bit 0 PAGE_PRIVATE_NOT_POINTER - * bit 1 PAGE_PRIVATE_DUMMY_WRITE - * bit 2 PAGE_PRIVATE_ONGOING_MIGRATION - * bit 3 PAGE_PRIVATE_INLINE_INODE - * bit 4 PAGE_PRIVATE_REF_RESOURCE + * bit 1 PAGE_PRIVATE_ONGOING_MIGRATION + * bit 2 PAGE_PRIVATE_INLINE_INODE + * bit 3 PAGE_PRIVATE_REF_RESOURCE + * bit 4 PAGE_PRIVATE_ATOMIC_WRITE * bit 5- f2fs private data * * Layout B: lowest bit should be 0 @@ -1417,10 +1436,10 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr); */ enum { PAGE_PRIVATE_NOT_POINTER, /* private contains non-pointer data */ - PAGE_PRIVATE_DUMMY_WRITE, /* data page for padding aligned IO */ PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */ PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */ PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */ + PAGE_PRIVATE_ATOMIC_WRITE, /* data page from atomic write path */ PAGE_PRIVATE_MAX }; @@ -1551,6 +1570,9 @@ struct f2fs_sb_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ + unsigned int max_open_zones; /* max open zone resources of the zoned device */ + /* For adjust the priority writing position of data in zone UFS */ + unsigned int blkzone_alloc_policy; #endif /* for node-related operations */ @@ -1564,7 +1586,6 @@ struct f2fs_sb_info { struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ /* keep migration IO order for LFS mode */ struct f2fs_rwsem io_order_lock; - mempool_t *write_io_dummy; /* Dummy pages */ pgoff_t page_eio_ofs[NR_PAGE_TYPE]; /* EIO page offset */ int page_eio_cnt[NR_PAGE_TYPE]; /* EIO count */ @@ -1600,6 +1621,7 @@ struct f2fs_sb_info { /* for extent tree cache */ struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; atomic64_t allocated_data_blocks; /* for block age extent_cache */ + unsigned int max_read_extent_count; /* max read extent count per inode */ /* The threshold used for hot and warm data seperation*/ unsigned int hot_data_age_threshold; @@ -1671,13 +1693,15 @@ struct f2fs_sb_info { unsigned long long skipped_gc_rwsem; /* FG_GC only */ /* threshold for gc trials on pinned files */ - u64 gc_pin_file_threshold; + unsigned short gc_pin_file_threshold; struct f2fs_rwsem pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; /* migration granularity of garbage collection, unit: segment */ unsigned int migration_granularity; + /* migration window granularity of garbage collection, unit: segment */ + unsigned int migration_window_granularity; /* * for stat information. @@ -1737,14 +1761,12 @@ struct f2fs_sb_info { unsigned int dirty_device; /* for checkpoint data flush */ spinlock_t dev_lock; /* protect dirty_device */ bool aligned_blksize; /* all devices has the same logical blksize */ + unsigned int first_zoned_segno; /* first zoned segno */ /* For write statistics */ u64 sectors_written_start; u64 kbytes_written; - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *s_chksum_driver; - /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; @@ -1810,6 +1832,37 @@ struct f2fs_sb_info { #endif }; +/* Definitions to access f2fs_sb_info */ +#define SEGS_TO_BLKS(sbi, segs) \ + ((segs) << (sbi)->log_blocks_per_seg) +#define BLKS_TO_SEGS(sbi, blks) \ + ((blks) >> (sbi)->log_blocks_per_seg) + +#define BLKS_PER_SEG(sbi) ((sbi)->blocks_per_seg) +#define BLKS_PER_SEC(sbi) (SEGS_TO_BLKS(sbi, (sbi)->segs_per_sec)) +#define SEGS_PER_SEC(sbi) ((sbi)->segs_per_sec) + +__printf(3, 4) +void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...); + +#define f2fs_err(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_notice(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_NOTICE fmt, ##__VA_ARGS__) +#define f2fs_info(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_INFO fmt, ##__VA_ARGS__) +#define f2fs_debug(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_DEBUG fmt, ##__VA_ARGS__) + +#define f2fs_err_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_info_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_INFO fmt, ##__VA_ARGS__) + #ifdef CONFIG_F2FS_FAULT_INJECTION #define time_to_inject(sbi, type) __time_to_inject(sbi, type, __func__, \ __builtin_return_address(0)) @@ -1827,9 +1880,8 @@ static inline bool __time_to_inject(struct f2fs_sb_info *sbi, int type, atomic_inc(&ffi->inject_ops); if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { atomic_set(&ffi->inject_ops, 0); - printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", - KERN_INFO, sbi->sb->s_id, f2fs_fault_name[type], - func, parent_func); + f2fs_info_ratelimited(sbi, "inject %s in %s of %pS", + f2fs_fault_name[type], func, parent_func); return true; } return false; @@ -1892,21 +1944,7 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc, const void *address, unsigned int length) { - struct { - struct shash_desc shash; - char ctx[4]; - } desc; - int err; - - BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx)); - - desc.shash.tfm = sbi->s_chksum_driver; - *(u32 *)desc.ctx = crc; - - err = crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); - - return *(u32 *)desc.ctx; + return crc32(crc, address, length); } static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, @@ -1947,9 +1985,14 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) return F2FS_I_SB(mapping->host); } +static inline struct f2fs_sb_info *F2FS_F_SB(struct folio *folio) +{ + return F2FS_M_SB(folio->mapping); +} + static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) { - return F2FS_M_SB(page_file_mapping(page)); + return F2FS_F_SB(page_folio(page)); } static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) @@ -1957,6 +2000,16 @@ static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) return (struct f2fs_super_block *)(sbi->raw_super); } +static inline struct f2fs_super_block *F2FS_SUPER_BLOCK(struct folio *folio, + pgoff_t index) +{ + pgoff_t idx_in_folio = index % (1 << folio_order(folio)); + + return (struct f2fs_super_block *) + (page_address(folio_page(folio, idx_in_folio)) + + F2FS_SUPER_OFFSET); +} + static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) { return (struct f2fs_checkpoint *)(sbi->ckpt); @@ -2249,11 +2302,32 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, return false; } +static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, bool cap) +{ + block_t avail_user_block_count; + + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks; + + if (!__allow_reserved_blocks(sbi, inode, cap)) + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; + + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (avail_user_block_count > sbi->unusable_block_count) + avail_user_block_count -= sbi->unusable_block_count; + else + avail_user_block_count = 0; + } + + return avail_user_block_count; +} + static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, blkcnt_t *count) + struct inode *inode, blkcnt_t *count, bool partial) { - blkcnt_t diff = 0, release = 0; + long long diff = 0, release = 0; block_t avail_user_block_count; int ret; @@ -2273,35 +2347,27 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); spin_lock(&sbi->stat_lock); - sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = sbi->user_block_count - - sbi->current_reserved_blocks; - if (!__allow_reserved_blocks(sbi, inode, true)) - avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - - if (F2FS_IO_ALIGNED(sbi)) - avail_user_block_count -= sbi->blocks_per_seg * - SM_I(sbi)->additional_reserved_segments; - - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - if (avail_user_block_count > sbi->unusable_block_count) - avail_user_block_count -= sbi->unusable_block_count; - else - avail_user_block_count = 0; - } - if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { - diff = sbi->total_valid_block_count - avail_user_block_count; + avail_user_block_count = get_available_block_count(sbi, inode, true); + diff = (long long)sbi->total_valid_block_count + *count - + avail_user_block_count; + if (unlikely(diff > 0)) { + if (!partial) { + spin_unlock(&sbi->stat_lock); + release = *count; + goto enospc; + } if (diff > *count) diff = *count; *count -= diff; release = diff; - sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(&sbi->stat_lock); goto enospc; } } + sbi->total_valid_block_count += (block_t)(*count); + spin_unlock(&sbi->stat_lock); if (unlikely(release)) { @@ -2318,20 +2384,6 @@ release_quota: return -ENOSPC; } -__printf(2, 3) -void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...); - -#define f2fs_err(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__) -#define f2fs_warn(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__) -#define f2fs_notice(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__) -#define f2fs_info(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__) -#define f2fs_debug(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__) - #define PAGE_PRIVATE_GET_FUNC(name, flagname) \ static inline bool page_private_##name(struct page *page) \ { \ @@ -2360,17 +2412,17 @@ static inline void clear_page_private_##name(struct page *page) \ PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE); +PAGE_PRIVATE_GET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE); +PAGE_PRIVATE_SET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE); +PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); static inline unsigned long get_page_private_data(struct page *page) { @@ -2402,6 +2454,7 @@ static inline void clear_page_private_all(struct page *page) clear_page_private_reference(page); clear_page_private_gcing(page); clear_page_private_inline(page); + clear_page_private_atomic(page); f2fs_bug_on(F2FS_P_SB(page), page_private(page)); } @@ -2504,11 +2557,8 @@ static inline int get_dirty_pages(struct inode *inode) static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) { - unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; - unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >> - sbi->log_blocks_per_seg; - - return segs / sbi->segs_per_sec; + return div_u64(get_pages(sbi, block_type) + BLKS_PER_SEC(sbi) - 1, + BLKS_PER_SEC(sbi)); } static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) @@ -2572,7 +2622,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); if (sbi->cur_cp_pack == 2) - start_addr += sbi->blocks_per_seg; + start_addr += BLKS_PER_SEG(sbi); return start_addr; } @@ -2581,7 +2631,7 @@ static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi) block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); if (sbi->cur_cp_pack == 1) - start_addr += sbi->blocks_per_seg; + start_addr += BLKS_PER_SEG(sbi); return start_addr; } @@ -2600,7 +2650,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { block_t valid_block_count; - unsigned int valid_node_count, user_block_count; + unsigned int valid_node_count; + unsigned int avail_user_block_count; int err; if (is_inode) { @@ -2620,21 +2671,10 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); - valid_block_count = sbi->total_valid_block_count + - sbi->current_reserved_blocks + 1; - - if (!__allow_reserved_blocks(sbi, inode, false)) - valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + valid_block_count = sbi->total_valid_block_count + 1; + avail_user_block_count = get_available_block_count(sbi, inode, false); - if (F2FS_IO_ALIGNED(sbi)) - valid_block_count += sbi->blocks_per_seg * - SM_I(sbi)->additional_reserved_segments; - - user_block_count = sbi->user_block_count; - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - user_block_count -= sbi->unusable_block_count; - - if (unlikely(valid_block_count > user_block_count)) { + if (unlikely(valid_block_count > avail_user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } @@ -2834,13 +2874,26 @@ static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) return false; } +static inline bool is_inflight_read_io(struct f2fs_sb_info *sbi) +{ + return get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_DIO_READ); +} + static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { + bool zoned_gc = (type == GC_TIME && + F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_BLKZONED)); + if (sbi->gc_mode == GC_URGENT_HIGH) return true; - if (is_inflight_io(sbi, type)) - return false; + if (zoned_gc) { + if (is_inflight_read_io(sbi)) + return false; + } else { + if (is_inflight_io(sbi, type)) + return false; + } if (sbi->gc_mode == GC_URGENT_MID) return true; @@ -2849,6 +2902,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) (type == DISCARD_TIME || type == GC_TIME)) return true; + if (zoned_gc) + return true; + return f2fs_time_over(sbi, type); } @@ -2880,26 +2936,27 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) } static inline int f2fs_has_extra_attr(struct inode *inode); -static inline block_t data_blkaddr(struct inode *inode, - struct page *node_page, unsigned int offset) +static inline unsigned int get_dnode_base(struct inode *inode, + struct page *node_page) { - struct f2fs_node *raw_node; - __le32 *addr_array; - int base = 0; - bool is_inode = IS_INODE(node_page); + if (!IS_INODE(node_page)) + return 0; - raw_node = F2FS_NODE(node_page); + return inode ? get_extra_isize(inode) : + offset_in_addr(&F2FS_NODE(node_page)->i); +} - if (is_inode) { - if (!inode) - /* from GC path only */ - base = offset_in_addr(&raw_node->i); - else if (f2fs_has_extra_attr(inode)) - base = get_extra_isize(inode); - } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page) +{ + return blkaddr_in_node(F2FS_NODE(node_page)) + + get_dnode_base(inode, node_page); +} - addr_array = blkaddr_in_node(raw_node); - return le32_to_cpu(addr_array[base + offset]); +static inline block_t data_blkaddr(struct inode *inode, + struct page *node_page, unsigned int offset) +{ + return le32_to_cpu(*(get_dnode_addr(inode, node_page) + offset)); } static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) @@ -2981,6 +3038,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) @@ -2996,6 +3054,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) + static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) { if (S_ISDIR(mode)) @@ -3018,7 +3078,6 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; fallthrough; case FI_DATA_EXIST: - case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: f2fs_mark_inode_dirty_sync(inode, true); @@ -3114,7 +3173,7 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) static inline void f2fs_i_gc_failures_write(struct inode *inode, unsigned int count) { - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = count; + F2FS_I(inode)->i_gc_failures = count; f2fs_mark_inode_dirty_sync(inode, true); } @@ -3142,8 +3201,6 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) set_bit(FI_DATA_EXIST, fi->flags); - if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) @@ -3164,8 +3221,6 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(inode, FI_INLINE_DOTS)) - ri->i_inline |= F2FS_INLINE_DOTS; if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) ri->i_inline |= F2FS_EXTRA_ATTR; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -3206,21 +3261,15 @@ static inline bool f2fs_need_compress_data(struct inode *inode) return false; } -static inline unsigned int addrs_per_inode(struct inode *inode) +static inline unsigned int addrs_per_page(struct inode *inode, + bool is_inode) { - unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - - get_inline_xattr_addrs(inode); + unsigned int addrs = is_inode ? (CUR_ADDRS_PER_INODE(inode) - + get_inline_xattr_addrs(inode)) : DEF_ADDRS_PER_BLOCK; - if (!f2fs_compressed_file(inode)) - return addrs; - return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); -} - -static inline unsigned int addrs_per_block(struct inode *inode) -{ - if (!f2fs_compressed_file(inode)) - return DEF_ADDRS_PER_BLOCK; - return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size); + if (f2fs_compressed_file(inode)) + return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); + return addrs; } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) @@ -3252,11 +3301,6 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(inode, FI_DATA_EXIST); } -static inline int f2fs_has_inline_dots(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_INLINE_DOTS); -} - static inline int f2fs_is_mmap_file(struct inode *inode) { return is_inode_flag_set(inode, FI_MMAP_FILE); @@ -3277,8 +3321,6 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline __le32 *get_dnode_addr(struct inode *inode, - struct page *node_page); static inline void *inline_data_addr(struct inode *inode, struct page *page) { __le32 *addr = get_dnode_addr(inode, page); @@ -3364,17 +3406,6 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) return is_set_ckpt_flags(sbi, CP_ERROR_FLAG); } -static inline bool is_dot_dotdot(const u8 *name, size_t len) -{ - if (len == 1 && name[0] == '.') - return true; - - if (len == 2 && name[0] == '.' && name[1] == '.') - return true; - - return false; -} - static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { @@ -3428,17 +3459,6 @@ static inline int get_inline_xattr_addrs(struct inode *inode) return F2FS_I(inode)->i_inline_xattr_size; } -static inline __le32 *get_dnode_addr(struct inode *inode, - struct page *node_page) -{ - int base = 0; - - if (IS_INODE(node_page) && f2fs_has_extra_attr(inode)) - base = get_extra_isize(inode); - - return blkaddr_in_node(F2FS_NODE(node_page)) + base; -} - #define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -3455,7 +3475,7 @@ static inline __le32 *get_dnode_addr(struct inode *inode, sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ -#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) +#define __is_large_section(sbi) (SEGS_PER_SEC(sbi) > 1) #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) @@ -3464,11 +3484,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, static inline void verify_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.", blkaddr, type); - f2fs_bug_on(sbi, 1); - } } static inline bool __is_valid_data_blkaddr(block_t blkaddr) @@ -3492,6 +3510,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly, bool need_lock); int f2fs_precache_extents(struct inode *inode); int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int f2fs_fileattr_set(struct mnt_idmap *idmap, @@ -3528,15 +3548,30 @@ int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, /* * dir.c */ +#if IS_ENABLED(CONFIG_UNICODE) int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname); +void f2fs_free_casefolded_name(struct f2fs_filename *fname); +#else +static inline int f2fs_init_casefolded_name(const struct inode *dir, + struct f2fs_filename *fname) +{ + return 0; +} + +static inline void f2fs_free_casefolded_name(struct f2fs_filename *fname) +{ +} +#endif /* CONFIG_UNICODE */ + int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct f2fs_filename *fname); int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_filename *fname); void f2fs_free_filename(struct f2fs_filename *fname); struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots); + const struct f2fs_filename *fname, int *max_slots, + bool use_hash); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, @@ -3570,7 +3605,8 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, struct inode *inode); -int f2fs_do_tmpfile(struct inode *inode, struct inode *dir); +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, + struct f2fs_filename *fname); bool f2fs_empty_dir(struct inode *dir); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) @@ -3592,8 +3628,7 @@ int f2fs_quota_sync(struct super_block *sb, int type); loff_t max_file_blocks(struct inode *inode); void f2fs_quota_off_umount(struct super_block *sb); void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag); -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, - bool irq_context); +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason); void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); @@ -3671,7 +3706,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); -void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len); bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); int f2fs_start_discard_thread(struct f2fs_sb_info *sbi); void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); @@ -3685,22 +3721,22 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno); -void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); +int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); +int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi); void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi); -void f2fs_get_new_segment(struct f2fs_sb_info *sbi, - unsigned int *newseg, bool new_sec, int dir); -void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); +int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type); void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio); void f2fs_outplace_write_data(struct dnode_of_data *dn, @@ -3714,7 +3750,9 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr); -void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, + enum log_type seg_type); +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio); @@ -3730,17 +3768,19 @@ void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc); void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); -int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi); -int f2fs_check_write_pointer(struct f2fs_sb_info *sbi); +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi); int f2fs_build_segment_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_segment_manager_caches(void); void f2fs_destroy_segment_manager_caches(void); -int f2fs_rw_hint_to_seg_type(enum rw_hint hint); -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno); +int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint); +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp); +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); +unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, + unsigned int segno); #define DEF_FRAGMENT_SIZE 4 #define MIN_FRAGMENT_SIZE 1 @@ -3764,6 +3804,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, @@ -3804,6 +3846,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); +bool f2fs_is_cp_guaranteed(struct page *page); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, @@ -3843,7 +3886,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int f2fs_encrypt_one_page(struct f2fs_io_info *fio); bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type, @@ -3852,7 +3895,7 @@ void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool f2fs_release_folio(struct folio *folio, gfp_t wait); bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); -void f2fs_clear_page_cache_dirty_tag(struct page *page); +void f2fs_clear_page_cache_dirty_tag(struct folio *folio); int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); @@ -3867,13 +3910,16 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, + bool dry_run, unsigned int dry_run_sections); int f2fs_resize_fs(struct file *filp, __u64 block_count); int __init f2fs_create_garbage_collection_cache(void); void f2fs_destroy_garbage_collection_cache(void); /* victim selection function for cleaning and SSR */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age); + unsigned long long age, bool one_time); /* * recovery.c @@ -3887,6 +3933,19 @@ void f2fs_destroy_recovery_cache(void); * debug.c */ #ifdef CONFIG_F2FS_STAT_FS +enum { + DEVSTAT_INUSE, + DEVSTAT_DIRTY, + DEVSTAT_FULL, + DEVSTAT_FREE, + DEVSTAT_PREFREE, + DEVSTAT_MAX, +}; + +struct f2fs_dev_stats { + unsigned int devstats[2][DEVSTAT_MAX]; /* 0: segs, 1: secs */ +}; + struct f2fs_stat_info { struct list_head stat_list; struct f2fs_sb_info *sbi; @@ -3950,6 +4009,7 @@ struct f2fs_stat_info { unsigned int block_count[2]; unsigned int inplace_count; unsigned long long base_mem, cache_mem, page_mem; + struct f2fs_dev_stats *dev_stats; }; static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) @@ -3959,7 +4019,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_call_count(sbi, foreground) \ atomic_inc(&sbi->cp_call_count[(foreground)]) -#define stat_inc_cp_count(si) (F2FS_STAT(sbi)->cp_count++) +#define stat_inc_cp_count(sbi) (F2FS_STAT(sbi)->cp_count++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -4135,20 +4195,21 @@ extern struct kmem_cache *f2fs_inode_entry_slab; * inline.c */ bool f2fs_may_inline_data(struct inode *inode); -bool f2fs_sanity_check_inline_data(struct inode *inode); +bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage); bool f2fs_may_inline_dentry(struct inode *inode); -void f2fs_do_read_inline_data(struct page *page, struct page *ipage); +void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage); void f2fs_truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from); -int f2fs_read_inline_data(struct inode *inode, struct page *page); +int f2fs_read_inline_data(struct inode *inode, struct folio *folio); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); -int f2fs_write_inline_data(struct inode *inode, struct page *page); +int f2fs_write_inline_data(struct inode *inode, struct folio *folio); int f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page); + struct page **res_page, + bool use_hash); int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, @@ -4176,7 +4237,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -bool sanity_check_extent_cache(struct inode *inode); +bool sanity_check_extent_cache(struct inode *inode, struct page *ipage); void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); @@ -4247,10 +4308,25 @@ static inline bool f2fs_post_read_required(struct inode *inode) f2fs_compressed_file(inode); } +static inline bool f2fs_used_in_atomic_write(struct inode *inode) +{ + return f2fs_is_atomic_file(inode) || f2fs_is_cow_file(inode); +} + +static inline bool f2fs_meta_inode_gc_required(struct inode *inode) +{ + return f2fs_post_read_required(inode) || f2fs_used_in_atomic_write(inode); +} + /* * compress.c */ #ifdef CONFIG_F2FS_FS_COMPRESSION +enum cluster_check_type { + CLUSTER_IS_COMPR, /* check only if compressed cluster */ + CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ + CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ +}; bool f2fs_is_compressed_page(struct page *page); struct page *f2fs_compress_control_page(struct page *page); int f2fs_prepare_compress_overwrite(struct inode *inode, @@ -4271,23 +4347,25 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate); bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio); int f2fs_write_multi_pages(struct compress_ctx *cc, int *submitted, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index); void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead, bool for_write); + struct readahead_control *rac, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, bool in_task); void f2fs_put_page_dic(struct page *page, bool in_task); -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); @@ -4298,7 +4376,8 @@ void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); int __init f2fs_init_compress_cache(void); void f2fs_destroy_compress_cache(void); struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi); -void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len); void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, nid_t ino, block_t blkaddr); bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page, @@ -4344,7 +4423,8 @@ static inline void f2fs_put_page_dic(struct page *page, bool in_task) { WARN_ON_ONCE(1); } -static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; } +static inline unsigned int f2fs_cluster_blocks_are_contiguous( + struct dnode_of_data *dn, unsigned int ofs_in_node) { return 0; } static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; } static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { } @@ -4352,8 +4432,8 @@ static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } -static inline void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, - block_t blkaddr) { } +static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) { } static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, nid_t ino, block_t blkaddr) { } static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, @@ -4361,6 +4441,12 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) +static inline int f2fs_is_compressed_cluster( + struct inode *inode, + pgoff_t index) { return 0; } +static inline bool f2fs_is_sparse_cluster( + struct inode *inode, + pgoff_t index) { return true; } static inline void f2fs_update_read_extent_tree_range_compressed( struct inode *inode, pgoff_t fofs, block_t blkaddr, @@ -4371,22 +4457,18 @@ static inline int set_compress_context(struct inode *inode) { #ifdef CONFIG_F2FS_FS_COMPRESSION struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); - F2FS_I(inode)->i_compress_algorithm = - F2FS_OPTION(sbi).compress_algorithm; - F2FS_I(inode)->i_log_cluster_size = - F2FS_OPTION(sbi).compress_log_size; - F2FS_I(inode)->i_compress_flag = - F2FS_OPTION(sbi).compress_chksum ? - BIT(COMPRESS_CHKSUM) : 0; - F2FS_I(inode)->i_cluster_size = - BIT(F2FS_I(inode)->i_log_cluster_size); - if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 || - F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) && + fi->i_compress_algorithm = F2FS_OPTION(sbi).compress_algorithm; + fi->i_log_cluster_size = F2FS_OPTION(sbi).compress_log_size; + fi->i_compress_flag = F2FS_OPTION(sbi).compress_chksum ? + BIT(COMPRESS_CHKSUM) : 0; + fi->i_cluster_size = BIT(fi->i_log_cluster_size); + if ((fi->i_compress_algorithm == COMPRESS_LZ4 || + fi->i_compress_algorithm == COMPRESS_ZSTD) && F2FS_OPTION(sbi).compress_level) - F2FS_I(inode)->i_compress_level = - F2FS_OPTION(sbi).compress_level; - F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; + fi->i_flags |= F2FS_COMPR_FL; set_inode_flag(inode, FI_COMPRESSED_FILE); stat_inc_compr_inode(inode); inc_compr_inode_stat(inode); @@ -4401,15 +4483,24 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - if (!f2fs_compressed_file(inode)) + f2fs_down_write(&fi->i_sem); + + if (!f2fs_compressed_file(inode)) { + f2fs_up_write(&fi->i_sem); return true; - if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) + } + if (f2fs_is_mmap_file(inode) || + (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { + f2fs_up_write(&fi->i_sem); return false; + } fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); clear_inode_flag(inode, FI_COMPRESSED_FILE); f2fs_mark_inode_dirty_sync(inode, true); + + f2fs_up_write(&fi->i_sem); return true; } @@ -4433,6 +4524,7 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, @@ -4512,6 +4604,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + if (f2fs_sb_has_blkzoned(sbi)) { + int devi = f2fs_target_device_index(sbi, blkaddr); + + return !bdev_is_zoned(FDEV(devi).bdev); + } + return true; +} + static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) { return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; @@ -4563,10 +4666,14 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) } #ifdef CONFIG_F2FS_FAULT_INJECTION -extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, - unsigned int type); +extern int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, + unsigned long type); #else -#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0) +static inline int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, + unsigned long rate, unsigned long type) +{ + return 0; +} #endif static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) @@ -4593,9 +4700,11 @@ static inline void f2fs_io_schedule_timeout(long timeout) io_schedule_timeout(timeout); } -static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs, - enum page_type type) +static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, + struct folio *folio, enum page_type type) { + pgoff_t ofs = folio->index; + if (unlikely(f2fs_cp_error(sbi))) return; @@ -4613,11 +4722,37 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi) return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb); } +static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int cnt) +{ + bool need_submit = false; + int i = 0; + + do { + struct page *page; + + page = find_get_page(META_MAPPING(sbi), blkaddr + i); + if (page) { + if (folio_test_writeback(page_folio(page))) + need_submit = true; + f2fs_put_page(page, 0); + } + } while (++i < cnt && !need_submit); + + if (need_submit) + f2fs_submit_merged_write_cond(sbi, sbi->meta_inode, + NULL, 0, DATA); + + truncate_inode_pages_range(META_MAPPING(sbi), + F2FS_BLK_TO_BYTES((loff_t)blkaddr), + F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1))); +} + static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi, - block_t blkaddr) + block_t blkaddr, unsigned int len) { - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr); - f2fs_invalidate_compress_page(sbi, blkaddr); + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); + f2fs_invalidate_compress_pages_range(sbi, blkaddr, len); } #define EFSBADCRC EBADMSG /* Bad CRC detected */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b58ab1157b7e..f92a9fba9991 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -8,7 +8,6 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/stat.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/falloc.h> @@ -39,6 +38,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); + vm_flags_t flags = vmf->vma->vm_flags; vm_fault_t ret; ret = filemap_fault(vmf); @@ -46,18 +46,18 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_MAPPED_READ_IO, F2FS_BLKSIZE); - trace_f2fs_filemap_fault(inode, vmf->pgoff, vmf->vma->vm_flags, ret); + trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); return ret; } static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - bool need_alloc = true; + bool need_alloc = !f2fs_is_pinned_file(inode); int err = 0; vm_fault_t ret; @@ -85,7 +85,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { - int ret = f2fs_is_compressed_cluster(inode, page->index); + int ret = f2fs_is_compressed_cluster(inode, folio->index); if (ret < 0) { err = ret; @@ -105,34 +105,33 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); - lock_page(page); - if (unlikely(page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode) || - !PageUptodate(page))) { - unlock_page(page); + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) > i_size_read(inode) || + !folio_test_uptodate(folio))) { + folio_unlock(folio); err = -EFAULT; goto out_sem; } + set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_alloc) { /* block allocation */ - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_block_locked(&dn, page->index); - } - -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (!need_alloc) { - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_block_locked(&dn, folio->index); + } else { + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); f2fs_put_dnode(&dn); + if (f2fs_is_pinned_file(inode) && + !__is_valid_data_blkaddr(dn.data_blkaddr)) + err = -EIO; } -#endif + if (err) { - unlock_page(page); + folio_unlock(folio); goto out_sem; } - f2fs_wait_on_page_writeback(page, DATA, false, true); + f2fs_wait_on_page_writeback(folio_page(folio, 0), DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -140,18 +139,18 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) /* * check to see if the page is mapped already (no holes) */ - if (PageMappedToDisk(page)) + if (folio_test_mappedtodisk(folio)) goto out_sem; /* page is wholly or partially inside EOF */ - if (((loff_t)(page->index + 1) << PAGE_SHIFT) > + if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > i_size_read(inode)) { loff_t offset; offset = i_size_read(inode) & ~PAGE_MASK; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); } - set_page_dirty(page); + folio_mark_dirty(folio); f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); f2fs_update_time(sbi, REQ_TIME); @@ -163,7 +162,7 @@ out_sem: out: ret = vmf_fs_error(err); - trace_f2fs_vm_page_mkwrite(inode, page->index, vmf->vma->vm_flags, ret); + trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); return ret; } @@ -185,7 +184,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - *pino = parent_ino(dentry); + *pino = d_parent_ino(dentry); dput(dentry); return 1; } @@ -218,6 +217,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; + else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + XATTR_DIR_INO)) + cp_reason = CP_XATTR_DIR; return cp_reason; } @@ -373,8 +375,7 @@ sync_nodes: f2fs_remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - if ((!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) || - (atomic && !test_opt(sbi, NOBARRIER) && f2fs_sb_has_blkzoned(sbi))) + if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ret = f2fs_issue_flush(sbi, inode->i_ino); if (!ret) { f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); @@ -394,9 +395,20 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return f2fs_do_sync_file(file, start, end, datasync, false); } -static bool __found_offset(struct address_space *mapping, block_t blkaddr, - pgoff_t index, int whence) +static bool __found_offset(struct address_space *mapping, + struct dnode_of_data *dn, pgoff_t index, int whence) { + block_t blkaddr = f2fs_data_blkaddr(dn); + struct inode *inode = mapping->host; + bool compressed_cluster = false; + + if (f2fs_compressed_file(inode)) { + block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, + ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); + + compressed_cluster = first_blkaddr == COMPRESS_ADDR; + } + switch (whence) { case SEEK_DATA: if (__is_valid_data_blkaddr(blkaddr)) @@ -404,8 +416,12 @@ static bool __found_offset(struct address_space *mapping, block_t blkaddr, if (blkaddr == NEW_ADDR && xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) return true; + if (compressed_cluster) + return true; break; case SEEK_HOLE: + if (compressed_cluster) + return false; if (blkaddr == NULL_ADDR) return true; break; @@ -416,7 +432,7 @@ static bool __found_offset(struct address_space *mapping, block_t blkaddr, static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); struct dnode_of_data dn; pgoff_t pgofs, end_offset; loff_t data_ofs = offset; @@ -474,7 +490,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; } - if (__found_offset(file->f_mapping, blkaddr, + if (__found_offset(file->f_mapping, &dn, pgofs, whence)) { f2fs_put_dnode(&dn); goto found; @@ -498,10 +514,7 @@ fail: static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; - - if (f2fs_compressed_file(inode)) - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); switch (whence) { case SEEK_SET: @@ -539,6 +552,42 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +static int finish_preallocate_blocks(struct inode *inode) +{ + int ret; + + inode_lock(inode); + if (is_inode_flag_set(inode, FI_OPENED_FILE)) { + inode_unlock(inode); + return 0; + } + + if (!file_should_truncate(inode)) { + set_inode_flag(inode, FI_OPENED_FILE); + inode_unlock(inode); + return 0; + } + + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + filemap_invalidate_lock(inode->i_mapping); + + truncate_setsize(inode, i_size_read(inode)); + ret = f2fs_truncate(inode); + + filemap_invalidate_unlock(inode->i_mapping); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + + if (!ret) + set_inode_flag(inode, FI_OPENED_FILE); + + inode_unlock(inode); + if (ret) + return ret; + + file_dont_truncate(inode); + return 0; +} + static int f2fs_file_open(struct inode *inode, struct file *filp) { int err = fscrypt_file_open(inode, filp); @@ -553,10 +602,14 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; + filp->f_mode |= FMODE_NOWAIT; filp->f_mode |= FMODE_CAN_ODIRECT; - return dquot_file_open(inode, filp); + err = dquot_file_open(inode, filp); + if (err) + return err; + + return finish_preallocate_blocks(inode); } void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -568,8 +621,11 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); + block_t blkstart; + int blklen = 0; addr = get_dnode_addr(dn->inode, dn->node_page) + ofs; + blkstart = le32_to_cpu(*addr); /* Assumption: truncation starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { @@ -585,24 +641,44 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) } if (blkaddr == NULL_ADDR) - continue; + goto next; f2fs_set_data_blkaddr(dn, NULL_ADDR); if (__is_valid_data_blkaddr(blkaddr)) { - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE)) - continue; + if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) + goto next; + if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, + DATA_GENERIC_ENHANCE)) + goto next; if (compressed_cluster) valid_blocks++; } - f2fs_invalidate_blocks(sbi, blkaddr); + if (blkstart + blklen == blkaddr) { + blklen++; + } else { + f2fs_invalidate_blocks(sbi, blkstart, blklen); + blkstart = blkaddr; + blklen = 1; + } if (!released || blkaddr != COMPRESS_ADDR) nr_free++; + + continue; + +next: + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + + blkstart = le32_to_cpu(*(addr + 1)); + blklen = 0; } + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + if (compressed_cluster) f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); @@ -670,6 +746,11 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); + if (IS_DEVICE_ALIASING(inode) && from) { + err = -EINVAL; + goto out_err; + } + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= max_file_blocks(inode)) @@ -684,6 +765,19 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) goto out; } + if (IS_DEVICE_ALIASING(inode)) { + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_info ei = et->largest; + + f2fs_invalidate_blocks(sbi, ei.blk, ei.len); + + dec_valid_block_count(sbi, inode, ei.len); + f2fs_update_time(sbi, REQ_TIME); + + f2fs_put_page(ipage, 1); + goto out; + } + if (f2fs_has_inline_data(inode)) { f2fs_truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); @@ -719,7 +813,7 @@ free_partial: /* lastly zero out the first data page */ if (!err) err = truncate_partial_data_page(inode, from, truncate_page); - +out_err: trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -808,6 +902,12 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) return true; if (f2fs_compressed_file(inode)) return true; + /* + * only force direct read to use buffered IO, for direct write, + * it expects inline data conversion before committing IO. + */ + if (f2fs_has_inline_data(inode) && rw == READ) + return true; /* disallow direct IO if any of devices has unaligned blksize */ if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) @@ -816,9 +916,8 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. */ - if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE)) - return true; - if (f2fs_lfs_mode(sbi) && rw == WRITE && F2FS_IO_ALIGNED(sbi)) + if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && + !f2fs_is_pinned_file(inode)) return true; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) return true; @@ -907,10 +1006,8 @@ static void __setattr_copy(struct mnt_idmap *idmap, inode_set_ctime_to_ts(inode, attr->ia_ctime); if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; - vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); - if (!vfsgid_in_group_p(vfsgid) && - !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) + if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; set_acl_inode(inode, mode); } @@ -923,6 +1020,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct f2fs_inode_info *fi = F2FS_I(inode); int err; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) @@ -936,9 +1034,15 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ATTR_GID | ATTR_TIMES_SET)))) return -EPERM; - if ((attr->ia_valid & ATTR_SIZE) && - !f2fs_is_compress_backend_ready(inode)) - return -EOPNOTSUPP; + if ((attr->ia_valid & ATTR_SIZE)) { + if (!f2fs_is_compress_backend_ready(inode) || + IS_DEVICE_ALIASING(inode)) + return -EOPNOTSUPP; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(fi->i_cluster_size))) + return -EINVAL; + } err = setattr_prepare(idmap, dentry, attr); if (err) @@ -990,7 +1094,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; } - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + /* + * wait for inflight dio, blocks should be removed after + * IO completion. + */ + if (attr->ia_size < old_size) + inode_dio_wait(inode); + + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_setsize(inode, attr->ia_size); @@ -1002,14 +1113,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, * larger than i_size. */ filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; - spin_lock(&F2FS_I(inode)->i_size_lock); + spin_lock(&fi->i_size_lock); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); - F2FS_I(inode)->last_disk_size = i_size_read(inode); - spin_unlock(&F2FS_I(inode)->i_size_lock); + fi->last_disk_size = i_size_read(inode); + spin_unlock(&fi->i_size_lock); } __setattr_copy(idmap, inode, attr); @@ -1019,7 +1130,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) - inode->i_mode = F2FS_I(inode)->i_acl_mode; + inode->i_mode = fi->i_acl_mode; clear_inode_flag(inode, FI_ACL_MODE); } } @@ -1192,7 +1303,6 @@ next_dnode: !f2fs_is_valid_blkaddr(sbi, *blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; } @@ -1232,7 +1342,7 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); if (ret) { dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, *blkaddr); + f2fs_invalidate_blocks(sbi, *blkaddr, 1); } else { f2fs_update_data_blkaddr(&dn, *blkaddr); } @@ -1310,6 +1420,9 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_page(psrc, 1); return PTR_ERR(pdst); } + + f2fs_wait_on_page_writeback(pdst, DATA, true, true); + memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); set_page_dirty(pdst); set_page_private_gcing(pdst); @@ -1478,11 +1591,10 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, DATA_GENERIC_ENHANCE)) { ret = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); break; } - f2fs_invalidate_blocks(sbi, dn->data_blkaddr); + f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); f2fs_set_data_blkaddr(dn, NEW_ADDR); } @@ -1662,10 +1774,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) } filemap_invalidate_unlock(mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (ret) + return ret; /* write out all moved pages, if possible */ filemap_invalidate_lock(mapping); - filemap_write_and_wait_range(mapping, offset, LLONG_MAX); + ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); filemap_invalidate_unlock(mapping); @@ -1720,7 +1834,8 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, map.m_len = sec_blks; next_alloc: - if (has_not_enough_free_secs(sbi, 0, + if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ? + ZONED_PIN_SEC_REQUIRED_COUNT : GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { f2fs_down_write(&sbi->gc_lock); stat_inc_gc_call_count(sbi, FOREGROUND); @@ -1731,9 +1846,11 @@ next_alloc: f2fs_down_write(&sbi->pin_sem); - f2fs_lock_op(sbi); - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); - f2fs_unlock_op(sbi); + err = f2fs_allocate_pinning_section(sbi); + if (err) { + f2fs_up_write(&sbi->pin_sem); + goto out_err; + } map.m_seg_type = CURSEG_COLD_DATA_PINNED; err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); @@ -1788,7 +1905,7 @@ static long f2fs_fallocate(struct file *file, int mode, return -EIO; if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) return -ENOSPC; - if (!f2fs_is_compress_backend_ready(inode)) + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) return -EOPNOTSUPP; /* f2fs only support ->fallocate for regular file */ @@ -1799,15 +1916,6 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; - /* - * Pinned file should not support partial truncation since the block - * can be used by applications. - */ - if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && - (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | - FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) - return -EOPNOTSUPP; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) @@ -1815,10 +1923,27 @@ static long f2fs_fallocate(struct file *file, int mode, inode_lock(inode); + /* + * Pinned file should not support partial truncation since the block + * can be used by applications. + */ + if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { + ret = -EOPNOTSUPP; + goto out; + } + ret = file_modified(file); if (ret) goto out; + /* + * wait for inflight dio, blocks should be removed after IO + * completion. + */ + inode_dio_wait(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; @@ -1920,15 +2045,15 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (err) return err; - f2fs_down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&fi->i_sem); if (!f2fs_may_compress(inode) || (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return -EINVAL; } err = set_compress_context(inode); - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); if (err) return err; @@ -2047,10 +2172,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) struct mnt_idmap *idmap = file_mnt_idmap(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode *pinode; loff_t isize; int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2066,7 +2193,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) inode_lock(inode); - if (!f2fs_disable_compressed_file(inode)) { + if (!f2fs_disable_compressed_file(inode) || + f2fs_is_pinned_file(inode)) { ret = -EINVAL; goto out; } @@ -2079,6 +2207,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) goto out; f2fs_down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[READ]); /* * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -2088,37 +2217,33 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; /* Check if the inode already has a COW inode */ if (fi->cow_inode == NULL) { /* Create a COW inode for atomic write */ - pinode = f2fs_iget(inode->i_sb, fi->i_pino); - if (IS_ERR(pinode)) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - ret = PTR_ERR(pinode); - goto out; - } + struct dentry *dentry = file_dentry(filp); + struct inode *dir = d_inode(dentry->d_parent); - ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode); - iput(pinode); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); + if (ret) + goto out_unlock; set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); + + /* Set the COW inode's atomic_inode to the atomic inode */ + F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ + f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); + + invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); + ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; } f2fs_write_inode(inode, NULL); @@ -2137,7 +2262,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) } f2fs_i_size_write(fi->cow_inode, isize); +out_unlock: + f2fs_up_write(&fi->i_gc_rwsem[READ]); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + if (ret) + goto out; f2fs_update_time(sbi, REQ_TIME); fi->atomic_write_task = current; @@ -2155,6 +2284,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2187,6 +2319,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2205,34 +2340,13 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) return ret; } -static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly, bool need_lock) { - struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; - __u32 in; int ret = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (get_user(in, (__u32 __user *)arg)) - return -EFAULT; - - if (in != F2FS_GOING_DOWN_FULLSYNC) { - ret = mnt_want_write_file(filp); - if (ret) { - if (ret == -EROFS) { - ret = 0; - f2fs_stop_checkpoint(sbi, false, - STOP_CP_REASON_SHUTDOWN); - trace_f2fs_shutdown(sbi, in, ret); - } - return ret; - } - } - - switch (in) { + switch (flag) { case F2FS_GOING_DOWN_FULLSYNC: ret = bdev_freeze(sb->s_bdev); if (ret) @@ -2243,8 +2357,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ ret = f2fs_sync_fs(sb, 1); - if (ret) + if (ret) { + if (ret == -EIO) + ret = 0; goto out; + } f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: @@ -2260,24 +2377,73 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) set_sbi_flag(sbi, SBI_IS_DIRTY); /* do checkpoint only */ ret = f2fs_sync_fs(sb, 1); + if (ret == -EIO) + ret = 0; goto out; default: ret = -EINVAL; goto out; } + if (readonly) + goto out; + + /* + * grab sb->s_umount to avoid racing w/ remount() and other shutdown + * paths. + */ + if (need_lock) + down_write(&sbi->sb->s_umount); + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); + if (need_lock) + up_write(&sbi->sb->s_umount); + f2fs_update_time(sbi, REQ_TIME); out: - if (in != F2FS_GOING_DOWN_FULLSYNC) - mnt_drop_write_file(filp); - trace_f2fs_shutdown(sbi, in, ret); + trace_f2fs_shutdown(sbi, flag, ret); + + return ret; +} + +static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + __u32 in; + int ret; + bool need_drop = false, readonly = false; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__u32 __user *)arg)) + return -EFAULT; + + if (in != F2FS_GOING_DOWN_FULLSYNC) { + ret = mnt_want_write_file(filp); + if (ret) { + if (ret != -EROFS) + return ret; + + /* fallback to nosync shutdown for readonly fs */ + in = F2FS_GOING_DOWN_NOSYNC; + readonly = true; + } else { + need_drop = true; + } + } + + ret = f2fs_do_shutdown(sbi, in, readonly, true); + + if (need_drop) + mnt_drop_write_file(filp); return ret; } @@ -2330,13 +2496,14 @@ static bool uuid_is_nonzero(__u8 u[16]) static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + int ret; if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) return -EOPNOTSUPP; + ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - - return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); + return ret; } static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) @@ -2578,20 +2745,21 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, .m_may_create = false }; struct extent_info ei = {}; pgoff_t pg_start, pg_end, next_pgofs; - unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; block_t blk_end = 0; bool fragmented = false; int err; - pg_start = range->start >> PAGE_SHIFT; - pg_end = (range->start + range->len) >> PAGE_SHIFT; - f2fs_balance_fs(sbi, true); inode_lock(inode); + pg_start = range->start >> PAGE_SHIFT; + pg_end = min_t(pgoff_t, + (range->start + range->len) >> PAGE_SHIFT, + DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || + f2fs_is_atomic_file(inode)) { err = -EINVAL; goto unlock_out; } @@ -2604,8 +2772,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } /* writeback all dirty pages in the range */ - err = filemap_write_and_wait_range(inode->i_mapping, range->start, - range->start + range->len - 1); + err = filemap_write_and_wait_range(inode->i_mapping, + pg_start << PAGE_SHIFT, + (pg_end << PAGE_SHIFT) - 1); if (err) goto out; @@ -2614,7 +2783,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * block addresses are continuous. */ if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { - if (ei.fofs + ei.len >= pg_end) + if ((pgoff_t)ei.fofs + ei.len >= pg_end) goto out; } @@ -2687,7 +2856,8 @@ do_map: set_inode_flag(inode, FI_SKIP_WRITES); idx = map.m_lblk; - while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { + while (idx < map.m_lblk + map.m_len && + cnt < BLKS_PER_SEG(sbi)) { struct page *page; page = f2fs_get_lock_data_page(inode, idx, true); @@ -2696,6 +2866,8 @@ do_map: goto clear_out; } + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); @@ -2707,7 +2879,7 @@ do_map: map.m_lblk = idx; check: - if (map.m_lblk < pg_end && cnt < blk_per_seg) + if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) goto do_map; clear_inode_flag(inode, FI_SKIP_WRITES); @@ -2737,7 +2909,7 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode)) + if (!S_ISREG(inode->i_mode)) return -EINVAL; if (f2fs_readonly(sbi->sb)) @@ -2762,7 +2934,8 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) err = f2fs_defragment_range(sbi, filp, &range); mnt_drop_write_file(filp); - f2fs_update_time(sbi, REQ_TIME); + if (range.len) + f2fs_update_time(sbi, REQ_TIME); if (err < 0) return err; @@ -2813,11 +2986,17 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out; } - if (f2fs_compressed_file(src) || f2fs_compressed_file(dst)) { + if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || + f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { ret = -EOPNOTSUPP; goto out_unlock; } + if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { + ret = -EINVAL; + goto out_unlock; + } + ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) goto out_unlock; @@ -2869,9 +3048,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } f2fs_lock_op(sbi); - ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, - pos_out >> F2FS_BLKSIZE_BITS, - len >> F2FS_BLKSIZE_BITS, false); + ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), + F2FS_BYTES_TO_BLK(pos_out), + F2FS_BYTES_TO_BLK(len), false); if (!ret) { if (dst_max_i_size) @@ -2907,32 +3086,27 @@ out: static int __f2fs_ioc_move_range(struct file *filp, struct f2fs_move_range *range) { - struct fd dst; int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) return -EBADF; - dst = fdget(range->dst_fd); - if (!dst.file) + CLASS(fd, dst)(range->dst_fd); + if (fd_empty(dst)) return -EBADF; - if (!(dst.file->f_mode & FMODE_WRITE)) { - err = -EBADF; - goto err_out; - } + if (!(fd_file(dst)->f_mode & FMODE_WRITE)) + return -EBADF; err = mnt_want_write_file(filp); if (err) - goto err_out; + return err; - err = f2fs_move_file_range(filp, range->pos_in, dst.file, + err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), range->pos_out, range->len); mnt_drop_write_file(filp); -err_out: - fdput(dst); return err; } @@ -2976,8 +3150,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || __is_large_section(sbi)) { - f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1", - range.dev_num, sbi->s_ndevs, sbi->segs_per_sec); + f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", + range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); return -EINVAL; } @@ -3165,24 +3339,27 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - /* Use i_gc_failures for normal file as a risk signal. */ - if (inc) - f2fs_i_gc_failures_write(inode, - fi->i_gc_failures[GC_FAILURE_PIN] + 1); + if (IS_DEVICE_ALIASING(inode)) + return -EINVAL; - if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { + if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", - __func__, inode->i_ino, - fi->i_gc_failures[GC_FAILURE_PIN]); + __func__, inode->i_ino, fi->i_gc_failures); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } + + /* Use i_gc_failures for normal file as a risk signal. */ + if (inc) + f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); + return 0; } static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); __u32 pin; int ret = 0; @@ -3192,22 +3369,39 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + if (f2fs_readonly(sbi->sb)) return -EROFS; + if (!pin && IS_DEVICE_ALIASING(inode)) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + ret = -EINVAL; + goto out; + } + if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); goto done; + } else if (f2fs_is_pinned_file(inode)) { + goto done; } - if (f2fs_should_update_outplace(inode, NULL)) { + if (F2FS_HAS_BLOCKS(inode)) { + ret = -EFBIG; + goto out; + } + + /* Let's allow file pinning on zoned device. */ + if (!f2fs_sb_has_blkzoned(sbi) && + f2fs_should_update_outplace(inode, NULL)) { ret = -EINVAL; goto out; } @@ -3227,9 +3421,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) } set_inode_flag(inode, FI_PIN_FILE); - ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + ret = F2FS_I(inode)->i_gc_failures; done: - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); mnt_drop_write_file(filp); @@ -3242,10 +3436,16 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) __u32 pin = 0; if (is_inode_flag_set(inode, FI_PIN_FILE)) - pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + pin = F2FS_I(inode)->i_gc_failures; return put_user(pin, (u32 __user *)arg); } +static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) +{ + return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, + (u32 __user *)arg); +} + int f2fs_precache_extents(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); @@ -3438,10 +3638,8 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (!__is_valid_data_blkaddr(blkaddr)) continue; if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE))) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - } } while (count) { @@ -3481,6 +3679,7 @@ next: static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int released_blocks = 0; @@ -3490,9 +3689,6 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3511,7 +3707,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -3520,7 +3717,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) goto out; - if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + if (!atomic_read(&fi->i_compr_blocks)) { ret = -EPERM; goto out; } @@ -3529,7 +3726,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3538,9 +3735,12 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3552,12 +3752,14 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) end_offset = ADDRS_PER_PAGE(dn.node_page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); ret = release_compress_blocks(&dn, count); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -3566,8 +3768,10 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); out: + if (released_blocks) + f2fs_update_time(sbi, REQ_TIME); inode_unlock(inode); mnt_drop_write_file(filp); @@ -3575,23 +3779,23 @@ out: if (ret >= 0) { ret = put_user(released_blocks, (u64 __user *)arg); } else if (released_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " "iblocks=%llu, released=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, released_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; } -static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, + unsigned int *reserved_blocks) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - unsigned int reserved_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; block_t blkaddr; int i; @@ -3603,56 +3807,75 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (!__is_valid_data_blkaddr(blkaddr)) continue; if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE))) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - } } while (count) { int compr_blocks = 0; - blkcnt_t reserved; + blkcnt_t reserved = 0; + blkcnt_t to_reserved; int ret; - for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { - blkaddr = f2fs_data_blkaddr(dn); + for (i = 0; i < cluster_size; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); if (i == 0) { - if (blkaddr == COMPRESS_ADDR) - continue; - dn->ofs_in_node += cluster_size; - goto next; + if (blkaddr != COMPRESS_ADDR) { + dn->ofs_in_node += cluster_size; + goto next; + } + continue; } + /* + * compressed cluster was not released due to it + * fails in release_compress_blocks(), so NEW_ADDR + * is a possible case. + */ + if (blkaddr == NEW_ADDR) { + reserved++; + continue; + } if (__is_valid_data_blkaddr(blkaddr)) { compr_blocks++; continue; } + } - f2fs_set_data_blkaddr(dn, NEW_ADDR); + to_reserved = cluster_size - compr_blocks - reserved; + + /* for the case all blocks in cluster were reserved */ + if (reserved && to_reserved == 1) { + dn->ofs_in_node += cluster_size; + goto next; } - reserved = cluster_size - compr_blocks; - ret = inc_valid_block_count(sbi, dn->inode, &reserved); - if (ret) + ret = inc_valid_block_count(sbi, dn->inode, + &to_reserved, false); + if (unlikely(ret)) return ret; - if (reserved != cluster_size - compr_blocks) - return -ENOSPC; + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + if (f2fs_data_blkaddr(dn) == NULL_ADDR) + f2fs_set_data_blkaddr(dn, NEW_ADDR); + } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); - reserved_blocks += reserved; + *reserved_blocks += to_reserved; next: count -= cluster_size; } - return reserved_blocks; + return 0; } static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int reserved_blocks = 0; @@ -3661,9 +3884,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3671,19 +3891,20 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) - goto out; - f2fs_balance_fs(sbi, true); inode_lock(inode); - if (!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto unlock_inode; } - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (atomic_read(&fi->i_compr_blocks)) + goto unlock_inode; + + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3692,9 +3913,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3706,43 +3930,45 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) end_offset = ADDRS_PER_PAGE(dn.node_page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); - ret = reserve_compress_blocks(&dn, count); + ret = reserve_compress_blocks(&dn, count, &reserved_blocks); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; page_idx += count; - reserved_blocks += ret; } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - if (ret >= 0) { + if (!ret) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); } unlock_inode: + if (reserved_blocks) + f2fs_update_time(sbi, REQ_TIME); inode_unlock(inode); -out: mnt_drop_write_file(filp); - if (ret >= 0) { + if (!ret) { ret = put_user(reserved_blocks, (u64 __user *)arg); } else if (reserved_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " + f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " "iblocks=%llu, reserved=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, reserved_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; @@ -3805,7 +4031,9 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) return -EOPNOTSUPP; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || @@ -3877,8 +4105,6 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) DATA_GENERIC_ENHANCE)) { ret = -EFSCORRUPTED; f2fs_put_dnode(&dn); - f2fs_handle_error(sbi, - ERROR_INVALID_BLKADDR); goto out; } @@ -3927,12 +4153,13 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) if (len) ret = f2fs_secure_erase(prev_bdev, inode, prev_index, prev_block, len, range.flags); + f2fs_update_time(sbi, REQ_TIME); out: filemap_invalidate_unlock(mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); err: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -3967,6 +4194,7 @@ static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_comp_option option; int ret = 0; @@ -3981,16 +4209,22 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) sizeof(option))) return -EFAULT; - if (!f2fs_compressed_file(inode) || - option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || - option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || - option.algorithm >= COMPRESS_MAX) + if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || + option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || + option.algorithm >= COMPRESS_MAX) return -EINVAL; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); f2fs_down_write(&F2FS_I(inode)->i_sem); + if (!f2fs_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } + if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { ret = -EBUSY; goto out; @@ -4001,27 +4235,27 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) goto out; } - F2FS_I(inode)->i_compress_algorithm = option.algorithm; - F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size; - F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size); + fi->i_compress_algorithm = option.algorithm; + fi->i_log_cluster_size = option.log_cluster_size; + fi->i_cluster_size = BIT(option.log_cluster_size); /* Set default level */ - if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) - F2FS_I(inode)->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + if (fi->i_compress_algorithm == COMPRESS_ZSTD) + fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; else - F2FS_I(inode)->i_compress_level = 0; + fi->i_compress_level = 0; /* Adjust mount option level */ if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && F2FS_OPTION(sbi).compress_level) - F2FS_I(inode)->i_compress_level = F2FS_OPTION(sbi).compress_level; + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; f2fs_mark_inode_dirty_sync(inode, true); if (!f2fs_is_compress_backend_ready(inode)) f2fs_warn(sbi, "compression algorithm is successfully set, " "but current kernel doesn't support this algorithm."); out: - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4051,6 +4285,8 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) /* It will never fail, when page has pinned above */ f2fs_bug_on(F2FS_I_SB(inode), !page); + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); @@ -4065,10 +4301,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - pgoff_t page_idx = 0, last_idx; - unsigned int blk_per_seg = sbi->blocks_per_seg; - int cluster_size = fi->i_cluster_size; - int count, ret; + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4077,12 +4311,11 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4090,7 +4323,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4103,22 +4337,24 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (!f2fs_is_compressed_cluster(inode, page_idx)) + continue; + + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; - if (get_dirty_pages(inode) >= blk_per_seg) { + if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { ret = filemap_fdatawrite(inode->i_mapping); if (ret < 0) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4133,9 +4369,10 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (ret) f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", __func__, ret); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4144,10 +4381,9 @@ static int f2fs_ioc_compress_file(struct file *filp) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t page_idx = 0, last_idx; - unsigned int blk_per_seg = sbi->blocks_per_seg; - int cluster_size = F2FS_I(inode)->i_cluster_size; - int count, ret; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4156,12 +4392,11 @@ static int f2fs_ioc_compress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4169,7 +4404,8 @@ static int f2fs_ioc_compress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4181,22 +4417,24 @@ static int f2fs_ioc_compress_file(struct file *filp) set_inode_flag(inode, FI_ENABLE_COMPRESS); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; + + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (f2fs_is_sparse_cluster(inode, page_idx)) + continue; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; - if (get_dirty_pages(inode) >= blk_per_seg) { + if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { ret = filemap_fdatawrite(inode->i_mapping); if (ret < 0) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4213,9 +4451,10 @@ static int f2fs_ioc_compress_file(struct file *filp) if (ret) f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", __func__, ret); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4306,6 +4545,8 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_decompress_file(filp); case F2FS_IOC_COMPRESS_FILE: return f2fs_ioc_compress_file(filp); + case F2FS_IOC_GET_DEV_ALIAS_FILE: + return f2fs_ioc_get_dev_alias_file(filp, arg); default: return -ENOTTY; } @@ -4396,6 +4637,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_down_read(&fi->i_gc_rwsem[READ]); } + /* dio is not compatible w/ atomic file */ + if (f2fs_is_atomic_file(inode)) { + f2fs_up_read(&fi->i_gc_rwsem[READ]); + ret = -EOPNOTSUPP; + goto out; + } + /* * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of * the higher-level function iomap_dio_rw() in order to ensure that the @@ -4455,6 +4703,11 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + /* In LFS mode, if there is inflight dio, wait for its completion */ + if (f2fs_lfs_mode(F2FS_I_SB(inode)) && + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) + inode_dio_wait(inode); + if (f2fs_should_use_dio(inode, iocb, to)) { ret = f2fs_dio_read_iter(iocb, to); } else { @@ -4569,9 +4822,11 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, else return 0; - map.m_may_create = true; + if (!IS_DEVICE_ALIASING(inode)) + map.m_may_create = true; if (dio) { - map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, + inode->i_write_hint); flag = F2FS_GET_BLOCK_PRE_DIO; } else { map.m_seg_type = NO_CHECK_TYPE; @@ -4619,8 +4874,21 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, return 0; } +static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, + struct bio *bio, loff_t file_offset) +{ + struct inode *inode = iter->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); + enum temp_type temp = f2fs_get_segment_temp(sbi, type); + + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); + submit_bio(bio); +} + static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { - .end_io = f2fs_dio_write_end_io, + .end_io = f2fs_dio_write_end_io, + .submit_io = f2fs_dio_write_submit_io, }; static void f2fs_flush_buffered_write(struct address_space *mapping, @@ -4757,6 +5025,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) bool dio; bool may_need_sync = true; int preallocated; + const loff_t pos = iocb->ki_pos; + const ssize_t count = iov_iter_count(from); ssize_t ret; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { @@ -4778,6 +5048,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); } + if (f2fs_is_pinned_file(inode) && + !f2fs_overwrite_io(inode, pos, count)) { + ret = -EIO; + goto out_unlock; + } + ret = f2fs_write_checks(iocb, from); if (ret <= 0) goto out_unlock; @@ -4785,6 +5061,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Determine whether we will do a direct write or a buffered write. */ dio = f2fs_should_use_dio(inode, iocb, from); + /* dio is not compatible w/ atomic write */ + if (dio && f2fs_is_atomic_file(inode)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + /* Possibly preallocate the blocks for the write. */ target_size = iocb->ki_pos + iov_iter_count(from); preallocated = f2fs_preallocate_blocks(iocb, from, dio); @@ -4978,6 +5260,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_SET_COMPRESS_OPTION: case F2FS_IOC_DECOMPRESS_FILE: case F2FS_IOC_COMPRESS_FILE: + case F2FS_IOC_GET_DEV_ALIAS_FILE: break; default: return -ENOIOCTLCMD; @@ -5004,4 +5287,5 @@ const struct file_operations f2fs_file_operations = { .splice_read = f2fs_file_splice_read, .splice_write = iter_file_splice_write, .fadvise = f2fs_file_fadvise, + .fop_flags = FOP_BUFFER_RASYNC, }; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a079eebfb080..faf9fa1c804d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -81,6 +81,8 @@ static int gc_thread_func(void *data) continue; } + gc_control.one_time = false; + /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -116,15 +118,30 @@ static int gc_thread_func(void *data) goto next; } - if (has_enough_invalid_blocks(sbi)) + if (f2fs_sb_has_blkzoned(sbi)) { + if (has_enough_free_blocks(sbi, + gc_th->no_zoned_gc_percent)) { + wait_ms = gc_th->no_gc_sleep_time; + f2fs_up_write(&sbi->gc_lock); + goto next; + } + if (wait_ms == gc_th->no_gc_sleep_time) + wait_ms = gc_th->max_sleep_time; + } + + if (need_to_boost_gc(sbi)) { decrease_sleep_time(gc_th, &wait_ms); - else + if (f2fs_sb_has_blkzoned(sbi)) + gc_control.one_time = true; + } else { increase_sleep_time(gc_th, &wait_ms); + } do_gc: stat_inc_gc_call_count(sbi, foreground ? FOREGROUND : BACKGROUND); - sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || + gc_control.one_time; /* foreground GC was been triggered via f2fs_balance_fs() */ if (foreground) @@ -179,9 +196,21 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) return -ENOMEM; gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; - gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; - gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; - gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; + + if (f2fs_sb_has_blkzoned(sbi)) { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED; + gc_th->no_zoned_gc_percent = LIMIT_NO_ZONED_GC; + gc_th->boost_zoned_gc_percent = LIMIT_BOOST_ZONED_GC; + } else { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->no_zoned_gc_percent = 0; + gc_th->boost_zoned_gc_percent = 0; + } gc_th->gc_wake = false; @@ -228,6 +257,8 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type) switch (sbi->gc_mode) { case GC_IDLE_CB: + case GC_URGENT_LOW: + case GC_URGENT_MID: gc_mode = GC_CB; break; case GC_IDLE_GREEDY: @@ -259,7 +290,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->ofs_unit = 1; } else { p->gc_mode = select_gc_type(sbi, gc_type); - p->ofs_unit = sbi->segs_per_sec; + p->ofs_unit = SEGS_PER_SEC(sbi); if (__is_large_section(sbi)) { p->dirty_bitmap = dirty_i->dirty_secmap; p->max_search = count_bits(p->dirty_bitmap, @@ -280,11 +311,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - /* let's select beginning hot/small space first in no_heap mode*/ + /* let's select beginning hot/small space first. */ if (f2fs_need_rand_seg(sbi)) - p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); - else if (test_opt(sbi, NOHEAP) && - (type == CURSEG_HOT_DATA || IS_NODESEG(type))) + p->offset = get_random_u32_below(MAIN_SECS(sbi) * + SEGS_PER_SEC(sbi)); + else if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) p->offset = 0; else p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; @@ -295,13 +326,13 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, { /* SSR allocates in a segment unit */ if (p->alloc_mode == SSR) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); else if (p->alloc_mode == AT_SSR) return UINT_MAX; /* LFS */ if (p->gc_mode == GC_GREEDY) - return 2 * sbi->blocks_per_seg * p->ofs_unit; + return SEGS_TO_BLKS(sbi, 2 * p->ofs_unit); else if (p->gc_mode == GC_CB) return UINT_MAX; else if (p->gc_mode == GC_AT) @@ -332,23 +363,18 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; unsigned int vblocks; unsigned char age = 0; unsigned char u; - unsigned int i; - unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); - for (i = 0; i < usable_segs_per_sec; i++) - mtime += get_seg_entry(sbi, start + i)->mtime; + mtime = f2fs_get_section_mtime(sbi, segno); + f2fs_bug_on(sbi, mtime == INVALID_MTIME); vblocks = get_valid_blocks(sbi, segno, true); - - mtime = div_u64(mtime, usable_segs_per_sec); vblocks = div_u64(vblocks, usable_segs_per_sec); - u = (vblocks * 100) >> sbi->log_blocks_per_seg; + u = BLKS_TO_SEGS(sbi, vblocks * 100); /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) @@ -368,6 +394,11 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >= + CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio / + 100)) + return UINT_MAX; + /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) return get_valid_blocks(sbi, segno, true); @@ -485,10 +516,7 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, struct victim_sel_policy *p, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; - unsigned int i; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (p->gc_mode == GC_AT && @@ -496,9 +524,8 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, return; } - for (i = 0; i < sbi->segs_per_sec; i++) - mtime += get_seg_entry(sbi, start + i)->mtime; - mtime = div_u64(mtime, sbi->segs_per_sec); + mtime = f2fs_get_section_mtime(sbi, segno); + f2fs_bug_on(sbi, mtime == INVALID_MTIME); /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) @@ -599,7 +626,6 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi, unsigned long long age; unsigned long long max_mtime = sit_i->dirty_max_mtime; unsigned long long min_mtime = sit_i->dirty_min_mtime; - unsigned int seg_blocks = sbi->blocks_per_seg; unsigned int vblocks; unsigned int dirty_threshold = max(am->max_candidate_count, am->candidate_ratio * @@ -629,7 +655,7 @@ next_node: f2fs_bug_on(sbi, !vblocks); /* rare case */ - if (vblocks == seg_blocks) + if (vblocks == BLKS_PER_SEG(sbi)) goto skip_node; iter++; @@ -743,7 +769,7 @@ static int f2fs_gc_pinned_control(struct inode *inode, int gc_type, */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age) + unsigned long long age, bool one_time) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct sit_info *sm = SIT_I(sbi); @@ -755,11 +781,12 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int ret = 0; mutex_lock(&dirty_i->seglist_lock); - last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; + last_segment = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi); p.alloc_mode = alloc_mode; p.age = age; p.age_threshold = sbi->am.age_threshold; + p.one_time_gc = one_time; retry: select_policy(sbi, gc_type, type, &p); @@ -779,11 +806,14 @@ retry: goto out; } - if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) { ret = -EBUSY; - else - p.min_segno = *result; - goto out; + goto out; + } + if (gc_type == FG_GC) + clear_bit(GET_SEC_FROM_SEG(sbi, *result), dirty_i->victim_secmap); + p.min_segno = *result; + goto got_result; } ret = -ENODATA; @@ -896,7 +926,7 @@ next: else sm->last_victim[p.gc_mode] = segno + p.ofs_unit; sm->last_victim[p.gc_mode] %= - (MAIN_SECS(sbi) * sbi->segs_per_sec); + (MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); break; } } @@ -1172,7 +1202,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static int ra_data_block(struct inode *inode, pgoff_t index) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct address_space *mapping = inode->i_mapping; + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct dnode_of_data dn; struct page *page; struct f2fs_io_info fio = { @@ -1184,7 +1215,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index) .op_flags = 0, .encrypted_page = NULL, .in_list = 0, - .retry = 0, }; int err; @@ -1197,7 +1227,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto put_page; } goto got_it; @@ -1216,7 +1245,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE))) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto put_page; } got_it: @@ -1264,6 +1292,8 @@ put_page: static int move_data_block(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .ino = inode->i_ino, @@ -1273,7 +1303,6 @@ static int move_data_block(struct inode *inode, block_t bidx, .op_flags = 0, .encrypted_page = NULL, .in_list = 0, - .retry = 0, }; struct dnode_of_data dn; struct f2fs_summary sum; @@ -1287,7 +1316,7 @@ static int move_data_block(struct inode *inode, block_t bidx, CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; /* do not read out */ - page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); + page = f2fs_grab_cache_page(mapping, bidx, false); if (!page) return -ENOMEM; @@ -1364,8 +1393,13 @@ static int move_data_block(struct inode *inode, block_t bidx, set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); /* allocate block address */ - f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, + err = f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, type, NULL); + if (err) { + f2fs_put_page(mpage, 1); + /* filesystem should shutdown, no need to recovery block */ + goto up_out; + } fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); @@ -1381,7 +1415,7 @@ static int move_data_block(struct inode *inode, block_t bidx, page_address(mpage), PAGE_SIZE); f2fs_put_page(mpage, 1); - f2fs_invalidate_internal_cache(fio.sbi, fio.old_blkaddr); + f2fs_invalidate_internal_cache(fio.sbi, fio.old_blkaddr, 1); set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) @@ -1393,18 +1427,12 @@ static int move_data_block(struct inode *inode, block_t bidx, fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; f2fs_submit_page_write(&fio); - if (fio.retry) { - err = -EAGAIN; - if (PageWriteback(fio.encrypted_page)) - end_page_writeback(fio.encrypted_page); - goto put_page_out; - } f2fs_update_iostat(fio.sbi, NULL, FS_GC_DATA_IO, F2FS_BLKSIZE); f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); -put_page_out: + f2fs_put_page(fio.encrypted_page, 1); recover_block: if (err) @@ -1440,7 +1468,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, goto out; if (gc_type == BG_GC) { - if (PageWriteback(page)) { + if (folio_test_writeback(page_folio(page))) { err = -EAGAIN; goto out; } @@ -1560,9 +1588,24 @@ next_step: int err; inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode) || is_bad_inode(inode) || - special_file(inode->i_mode)) + if (IS_ERR(inode)) + continue; + + if (is_bad_inode(inode) || + special_file(inode->i_mode)) { + iput(inode); + continue; + } + + if (f2fs_has_inline_data(inode)) { + iput(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err_ratelimited(sbi, + "inode %lx has both inline_data flag and " + "data block, nid=%u, ofs_in_node=%u", + inode->i_ino, dni.nid, ofs_in_node); continue; + } err = f2fs_gc_pinned_control(inode, gc_type, segno); if (err == -EAGAIN) { @@ -1580,7 +1623,7 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_post_read_required(inode)) { + if (f2fs_meta_inode_gc_required(inode)) { int err = ra_data_block(inode, start_bidx); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -1631,7 +1674,7 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_post_read_required(inode)) + if (f2fs_meta_inode_gc_required(inode)) err = move_data_block(inode, start_bidx, gc_type, segno, off); else @@ -1639,7 +1682,7 @@ next_step: segno, off); if (!err && (gc_type == FG_GC || - f2fs_post_read_required(inode))) + f2fs_meta_inode_gc_required(inode))) submitted++; if (locked) { @@ -1658,13 +1701,14 @@ next_step: } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type) + int gc_type, bool one_time) { struct sit_info *sit_i = SIT_I(sbi); int ret; down_write(&sit_i->sentry_lock); - ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS, 0); + ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, + LFS, 0, one_time); up_write(&sit_i->sentry_lock); return ret; } @@ -1672,30 +1716,49 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int start_segno, struct gc_inode_list *gc_list, int gc_type, - bool force_migrate) + bool force_migrate, bool one_time) { struct page *sum_page; struct f2fs_summary_block *sum; struct blk_plug plug; unsigned int segno = start_segno; - unsigned int end_segno = start_segno + sbi->segs_per_sec; + unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi); + unsigned int sec_end_segno; int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE; int submitted = 0; - if (__is_large_section(sbi)) - end_segno = rounddown(end_segno, sbi->segs_per_sec); + if (__is_large_section(sbi)) { + sec_end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); - /* - * zone-capacity can be less than zone-size in zoned devices, - * resulting in less than expected usable segments in the zone, - * calculate the end segno in the zone which can be garbage collected - */ - if (f2fs_sb_has_blkzoned(sbi)) - end_segno -= sbi->segs_per_sec - - f2fs_usable_segs_in_sec(sbi, segno); + /* + * zone-capacity can be less than zone-size in zoned devices, + * resulting in less than expected usable segments in the zone, + * calculate the end segno in the zone which can be garbage + * collected + */ + if (f2fs_sb_has_blkzoned(sbi)) + sec_end_segno -= SEGS_PER_SEC(sbi) - + f2fs_usable_segs_in_sec(sbi); + + if (gc_type == BG_GC || one_time) { + unsigned int window_granularity = + sbi->migration_window_granularity; + + if (f2fs_sb_has_blkzoned(sbi) && + !has_enough_free_blocks(sbi, + sbi->gc_thread->boost_zoned_gc_percent)) + window_granularity *= + BOOST_GC_MULTIPLE; + + end_segno = start_segno + window_granularity; + } + + if (end_segno > sec_end_segno) + end_segno = sec_end_segno; + } sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type); @@ -1743,7 +1806,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, if (type != GET_SUM_TYPE((&sum->footer))) { f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_SUMMARY); goto skip; @@ -1775,7 +1837,8 @@ freed: if (__is_large_section(sbi)) sbi->next_victim_seg[gc_type] = - (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO; + (segno + 1 < sec_end_segno) ? + segno + 1 : NULL_SEGNO; skip: f2fs_put_page(sum_page, 0); } @@ -1852,7 +1915,7 @@ gc_more: goto stop; } retry: - ret = __get_victim(sbi, &segno, gc_type); + ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time); if (ret) { /* allow to search victim from sections has pinned data */ if (ret == -ENODATA && gc_type == FG_GC && @@ -1864,17 +1927,21 @@ retry: } seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, - gc_control->should_migrate_blocks); + gc_control->should_migrate_blocks, + gc_control->one_time); if (seg_freed < 0) goto stop; total_freed += seg_freed; - if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) { + if (seg_freed == f2fs_usable_segs_in_sec(sbi)) { sec_freed++; total_sec_freed++; } + if (gc_control->one_time) + goto stop; + if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -1983,10 +2050,42 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) init_atgc_management(sbi); } +int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, + bool dry_run, unsigned int dry_run_sections) +{ + unsigned int segno; + unsigned int gc_secs = dry_run_sections; + + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + + for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), + }; + + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); + put_gc_inode(&gc_list); + + if (!dry_run && get_valid_blocks(sbi, segno, true)) + return -EAGAIN; + if (dry_run && dry_run_sections && + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0) + break; + + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + } + + return 0; +} + static int free_segment_range(struct f2fs_sb_info *sbi, - unsigned int secs, bool gc_only) + unsigned int secs, bool dry_run) { - unsigned int segno, next_inuse, start, end; + unsigned int next_inuse, start, end; struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; int gc_mode, gc_type; int err = 0; @@ -1994,7 +2093,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, /* Force block allocation for GC */ MAIN_SECS(sbi) -= secs; - start = MAIN_SECS(sbi) * sbi->segs_per_sec; + start = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi); end = MAIN_SEGS(sbi) - 1; mutex_lock(&DIRTY_I(sbi)->seglist_lock); @@ -2008,29 +2107,15 @@ static int free_segment_range(struct f2fs_sb_info *sbi, mutex_unlock(&DIRTY_I(sbi)->seglist_lock); /* Move out cursegs from the target range */ - for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) - f2fs_allocate_segment_for_resize(sbi, type, start, end); - - /* do GC to move out valid blocks in the range */ - for (segno = start; segno <= end; segno += sbi->segs_per_sec) { - struct gc_inode_list gc_list = { - .ilist = LIST_HEAD_INIT(gc_list.ilist), - .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), - }; - - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); - put_gc_inode(&gc_list); - - if (!gc_only && get_valid_blocks(sbi, segno, true)) { - err = -EAGAIN; - goto out; - } - if (fatal_signal_pending(current)) { - err = -ERESTARTSYS; + for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) { + err = f2fs_allocate_segment_for_resize(sbi, type, start, end); + if (err) goto out; - } } - if (gc_only) + + /* do GC to move out valid blocks in the range */ + err = f2fs_gc_range(sbi, start, end, dry_run, 0); + if (err || dry_run) goto out; stat_inc_cp_call_count(sbi, TOTAL_CALL); @@ -2056,7 +2141,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) int segment_count; int segment_count_main; long long block_count; - int segs = secs * sbi->segs_per_sec; + int segs = secs * SEGS_PER_SEC(sbi); f2fs_down_write(&sbi->sb_lock); @@ -2069,7 +2154,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->segment_count = cpu_to_le32(segment_count + segs); raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); raw_sb->block_count = cpu_to_le64(block_count + - (long long)segs * sbi->blocks_per_seg); + (long long)SEGS_TO_BLKS(sbi, segs)); if (f2fs_is_multi_device(sbi)) { int last_dev = sbi->s_ndevs - 1; int dev_segs = @@ -2084,8 +2169,8 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) { - int segs = secs * sbi->segs_per_sec; - long long blks = (long long)segs * sbi->blocks_per_seg; + int segs = secs * SEGS_PER_SEC(sbi); + long long blks = SEGS_TO_BLKS(sbi, segs); long long user_block_count = le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); @@ -2127,7 +2212,7 @@ int f2fs_resize_fs(struct file *filp, __u64 block_count) int last_dev = sbi->s_ndevs - 1; __u64 last_segs = FDEV(last_dev).total_segments; - if (block_count + last_segs * sbi->blocks_per_seg <= + if (block_count + SEGS_TO_BLKS(sbi, last_segs) <= old_block_count) return -EINVAL; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 28a00942802c..5c1eaf55e127 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -15,17 +15,30 @@ #define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 #define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +/* GC sleep parameters for zoned deivces */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED 10 +#define DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED 20 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED 60000 + /* choose candidates from sections which has age of more than 7 days */ #define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7) #define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */ #define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */ #define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */ +#define DEF_GC_THREAD_VALID_THRESH_RATIO 95 /* do not GC over 95% valid block ratio for one time GC */ #define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ +#define LIMIT_NO_ZONED_GC 60 /* percentage over total user space of no gc for zoned devices */ +#define LIMIT_BOOST_ZONED_GC 25 /* percentage over total user space of boosted gc for zoned devices */ +#define DEF_MIGRATION_WINDOW_GRANULARITY_ZONED 3 +#define BOOST_GC_MULTIPLE 5 +#define ZONED_PIN_SEC_REQUIRED_COUNT 1 + #define DEF_GC_FAILED_PINNED_FILES 2048 +#define MAX_GC_FAILED_PINNED_FILES USHRT_MAX /* Search max. number of dirty segments to select a victim segment */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ @@ -50,6 +63,11 @@ struct f2fs_gc_kthread { * caller of f2fs_balance_fs() * will wait on this wait queue. */ + + /* for gc control for zoned devices */ + unsigned int no_zoned_gc_percent; + unsigned int boost_zoned_gc_percent; + unsigned int valid_thresh_ratio; }; struct gc_inode_list { @@ -96,7 +114,7 @@ static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi) if (f2fs_sb_has_blkzoned(sbi)) return free_segs_blk_count_zoned(sbi); - return free_segments(sbi) << sbi->log_blocks_per_seg; + return SEGS_TO_BLKS(sbi, free_segments(sbi)); } static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) @@ -104,7 +122,7 @@ static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) block_t free_blks, ovp_blks; free_blks = free_segs_blk_count(sbi); - ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg; + ovp_blks = SEGS_TO_BLKS(sbi, overprovision_segments(sbi)); if (free_blks < ovp_blks) return 0; @@ -151,6 +169,12 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, *wait -= min_time; } +static inline bool has_enough_free_blocks(struct f2fs_sb_info *sbi, + unsigned int limit_perc) +{ + return free_sections(sbi) > ((sbi->total_sections * limit_perc) / 100); +} + static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) { block_t user_block_count = sbi->user_block_count; @@ -166,3 +190,10 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) free_user_blocks(sbi) < limit_free_user_blocks(invalid_user_blocks)); } + +static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi) +{ + if (f2fs_sb_has_blkzoned(sbi)) + return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC); + return has_enough_invalid_blocks(sbi); +} diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index ac00423f117b..3e3c35d4c98b 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -16,7 +16,7 @@ static bool support_inline_data(struct inode *inode) { - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return false; if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; @@ -33,11 +33,29 @@ bool f2fs_may_inline_data(struct inode *inode) return !f2fs_post_read_required(inode); } -bool f2fs_sanity_check_inline_data(struct inode *inode) +static bool inode_has_blocks(struct inode *inode, struct page *ipage) +{ + struct f2fs_inode *ri = F2FS_INODE(ipage); + int i; + + if (F2FS_HAS_BLOCKS(inode)) + return true; + + for (i = 0; i < DEF_NIDS_PER_INODE; i++) { + if (ri->i_nid[i]) + return true; + } + return false; +} + +bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage) { if (!f2fs_has_inline_data(inode)) return false; + if (inode_has_blocks(inode, ipage)) + return false; + if (!support_inline_data(inode)) return true; @@ -61,22 +79,22 @@ bool f2fs_may_inline_dentry(struct inode *inode) return true; } -void f2fs_do_read_inline_data(struct page *page, struct page *ipage) +void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) return; - f2fs_bug_on(F2FS_P_SB(page), page->index); + f2fs_bug_on(F2FS_I_SB(inode), folio_index(folio)); - zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE); + folio_zero_segment(folio, MAX_INLINE_DATA(inode), folio_size(folio)); /* Copy the whole inline data block */ - memcpy_to_page(page, 0, inline_data_addr(inode, ipage), + memcpy_to_folio(folio, 0, inline_data_addr(inode, ipage), MAX_INLINE_DATA(inode)); - if (!PageUptodate(page)) - SetPageUptodate(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); } void f2fs_truncate_inline_inode(struct inode *inode, @@ -97,13 +115,13 @@ void f2fs_truncate_inline_inode(struct inode *inode, clear_inode_flag(inode, FI_DATA_EXIST); } -int f2fs_read_inline_data(struct inode *inode, struct page *page) +int f2fs_read_inline_data(struct inode *inode, struct folio *folio) { struct page *ipage; ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { - unlock_page(page); + folio_unlock(folio); return PTR_ERR(ipage); } @@ -112,15 +130,15 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) return -EAGAIN; } - if (page->index) - zero_user_segment(page, 0, PAGE_SIZE); + if (folio_index(folio)) + folio_zero_segment(folio, 0, folio_size(folio)); else - f2fs_do_read_inline_data(page, ipage); + f2fs_do_read_inline_data(folio, ipage); - if (!PageUptodate(page)) - SetPageUptodate(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); f2fs_put_page(ipage, 1); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -164,9 +182,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) return -EFSCORRUPTED; } - f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); + f2fs_bug_on(F2FS_P_SB(page), folio_test_writeback(page_folio(page))); - f2fs_do_read_inline_data(page, dn->inode_page); + f2fs_do_read_inline_data(page_folio(page), dn->inode_page); set_page_dirty(page); /* clear dirty state */ @@ -203,8 +221,10 @@ int f2fs_convert_inline_inode(struct inode *inode) struct page *ipage, *page; int err = 0; - if (!f2fs_has_inline_data(inode) || - f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) + if (f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) + return -EROFS; + + if (!f2fs_has_inline_data(inode)) return 0; err = f2fs_dquot_initialize(inode); @@ -240,35 +260,34 @@ out: return err; } -int f2fs_write_inline_data(struct inode *inode, struct page *page) +int f2fs_write_inline_data(struct inode *inode, struct folio *folio) { - struct dnode_of_data dn; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct page *ipage; - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); - if (err) - return err; + ipage = f2fs_get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); if (!f2fs_has_inline_data(inode)) { - f2fs_put_dnode(&dn); + f2fs_put_page(ipage, 1); return -EAGAIN; } - f2fs_bug_on(F2FS_I_SB(inode), page->index); + f2fs_bug_on(F2FS_I_SB(inode), folio->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); - memcpy_from_page(inline_data_addr(inode, dn.inode_page), - page, 0, MAX_INLINE_DATA(inode)); - set_page_dirty(dn.inode_page); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); + memcpy_from_folio(inline_data_addr(inode, ipage), + folio, 0, MAX_INLINE_DATA(inode)); + set_page_dirty(ipage); - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(folio); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_page_private_inline(dn.inode_page); - f2fs_put_dnode(&dn); + clear_page_private_inline(ipage); + f2fs_put_page(ipage, 1); return 0; } @@ -333,7 +352,8 @@ process_inline: struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page) + struct page **res_page, + bool use_hash) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct f2fs_dir_entry *de; @@ -350,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(dir, ipage); make_dentry_ptr_inline(dir, &d, inline_dentry); - de = f2fs_find_target_dentry(&d, fname, NULL); + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash); unlock_page(ipage); if (IS_ERR(de)) { *res_page = ERR_CAST(de); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c26effdce9aa..3dd25f64d6f1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -7,7 +7,6 @@ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/sched/mm.h> #include <linux/lz4.h> @@ -29,9 +28,17 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) if (is_inode_flag_set(inode, FI_NEW_INODE)) return; + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return; + if (f2fs_inode_dirtied(inode, sync)) return; + if (f2fs_is_atomic_file(inode)) { + set_inode_flag(inode, FI_ATOMIC_DIRTIED); + return; + } + mark_inode_dirty_sync(inode); } @@ -161,7 +168,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) if (!f2fs_enable_inode_chksum(sbi, page)) #else if (!f2fs_enable_inode_chksum(sbi, page) || - PageDirty(page) || PageWriteback(page)) + PageDirty(page) || + folio_test_writeback(page_folio(page))) #endif return true; @@ -171,7 +179,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - page->index, ino_of_node(page), provided, calculated); + page_folio(page)->index, ino_of_node(page), + provided, calculated); return provided == calculated; } @@ -293,15 +302,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) F2FS_TOTAL_EXTRA_ATTR_SIZE); return false; } - if (f2fs_sb_has_flexible_inline_xattr(sbi) && - f2fs_has_inline_xattr(inode) && - (!fi->i_inline_xattr_size || - fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { - f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %lu", - __func__, inode->i_ino, fi->i_inline_xattr_size, - MAX_INLINE_XATTR_SIZE); - return false; - } if (f2fs_sb_has_compression(sbi) && fi->i_flags & F2FS_COMPR_FL && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, @@ -309,9 +309,15 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (!sanity_check_compress_inode(inode, ri)) return false; } - } else if (f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", - __func__, inode->i_ino); + } + + if (f2fs_sb_has_flexible_inline_xattr(sbi) && + f2fs_has_inline_xattr(inode) && + (fi->i_inline_xattr_size < MIN_INLINE_XATTR_SIZE || + fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, min: %zu, max: %lu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MIN_INLINE_XATTR_SIZE, MAX_INLINE_XATTR_SIZE); return false; } @@ -343,7 +349,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } - if (f2fs_sanity_check_inline_data(inode)) { + if (f2fs_sanity_check_inline_data(inode, node_page)) { f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; @@ -361,6 +367,25 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (fi->i_xattr_nid && f2fs_check_nid_range(sbi, fi->i_xattr_nid)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_xattr_nid: %u, run fsck to fix.", + __func__, inode->i_ino, fi->i_xattr_nid); + return false; + } + + if (IS_DEVICE_ALIASING(inode)) { + if (!f2fs_sb_has_device_alias(sbi)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", + __func__, inode->i_ino); + return false; + } + if (!f2fs_is_pinned_file(inode)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned", + __func__, inode->i_ino); + return false; + } + } + return true; } @@ -408,8 +433,7 @@ static int do_read_inode(struct inode *inode) if (S_ISDIR(inode->i_mode)) fi->i_current_depth = le32_to_cpu(ri->i_current_depth); else if (S_ISREG(inode->i_mode)) - fi->i_gc_failures[GC_FAILURE_PIN] = - le16_to_cpu(ri->i_gc_failures); + fi->i_gc_failures = le16_to_cpu(ri->i_gc_failures); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); if (S_ISREG(inode->i_mode)) @@ -502,16 +526,16 @@ static int do_read_inode(struct inode *inode) init_idisk_time(inode); - /* Need all the flag bits */ - f2fs_init_read_extent_tree(inode, node_page); - f2fs_init_age_extent_tree(inode); - - if (!sanity_check_extent_cache(inode)) { + if (!sanity_check_extent_cache(inode, node_page)) { f2fs_put_page(node_page, 1); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; } + /* Need all the flag bits */ + f2fs_init_read_extent_tree(inode, node_page); + f2fs_init_age_extent_tree(inode); + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -604,14 +628,6 @@ make_now: } f2fs_set_inode_flags(inode); - if (file_should_truncate(inode) && - !is_sbi_flag_set(sbi, SBI_POR_DOING)) { - ret = f2fs_truncate(inode); - if (ret) - goto bad_inode; - file_dont_truncate(inode); - } - unlock_new_inode(inode); trace_f2fs_iget(inode); return inode; @@ -639,8 +655,9 @@ retry: void f2fs_update_inode(struct inode *inode, struct page *node_page) { + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_inode *ri; - struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_tree *et = fi->extent_tree[EX_READ]; f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); @@ -650,7 +667,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri = F2FS_INODE(node_page); ri->i_mode = cpu_to_le16(inode->i_mode); - ri->i_advise = F2FS_I(inode)->i_advise; + ri->i_advise = fi->i_advise; ri->i_uid = cpu_to_le32(i_uid_read(inode)); ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); @@ -676,59 +693,49 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); ri->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); if (S_ISDIR(inode->i_mode)) - ri->i_current_depth = - cpu_to_le32(F2FS_I(inode)->i_current_depth); + ri->i_current_depth = cpu_to_le32(fi->i_current_depth); else if (S_ISREG(inode->i_mode)) - ri->i_gc_failures = - cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]); - ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); - ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); - ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); + ri->i_gc_failures = cpu_to_le16(fi->i_gc_failures); + ri->i_xattr_nid = cpu_to_le32(fi->i_xattr_nid); + ri->i_flags = cpu_to_le32(fi->i_flags); + ri->i_pino = cpu_to_le32(fi->i_pino); ri->i_generation = cpu_to_le32(inode->i_generation); - ri->i_dir_level = F2FS_I(inode)->i_dir_level; + ri->i_dir_level = fi->i_dir_level; if (f2fs_has_extra_attr(inode)) { - ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + ri->i_extra_isize = cpu_to_le16(fi->i_extra_isize); if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode))) ri->i_inline_xattr_size = - cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); + cpu_to_le16(fi->i_inline_xattr_size); if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, - i_projid)) { + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) { projid_t i_projid; - i_projid = from_kprojid(&init_user_ns, - F2FS_I(inode)->i_projid); + i_projid = from_kprojid(&init_user_ns, fi->i_projid); ri->i_projid = cpu_to_le32(i_projid); } if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, - i_crtime)) { - ri->i_crtime = - cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec); - ri->i_crtime_nsec = - cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec); + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + ri->i_crtime = cpu_to_le64(fi->i_crtime.tv_sec); + ri->i_crtime_nsec = cpu_to_le32(fi->i_crtime.tv_nsec); } if (f2fs_sb_has_compression(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_compress_flag)) { unsigned short compress_flag; - ri->i_compr_blocks = - cpu_to_le64(atomic_read( - &F2FS_I(inode)->i_compr_blocks)); - ri->i_compress_algorithm = - F2FS_I(inode)->i_compress_algorithm; - compress_flag = F2FS_I(inode)->i_compress_flag | - F2FS_I(inode)->i_compress_level << + ri->i_compr_blocks = cpu_to_le64( + atomic_read(&fi->i_compr_blocks)); + ri->i_compress_algorithm = fi->i_compress_algorithm; + compress_flag = fi->i_compress_flag | + fi->i_compress_level << COMPRESS_LEVEL_OFFSET; ri->i_compress_flag = cpu_to_le16(compress_flag); - ri->i_log_cluster_size = - F2FS_I(inode)->i_log_cluster_size; + ri->i_log_cluster_size = fi->i_log_cluster_size; } } @@ -782,8 +789,10 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; - if (!f2fs_is_checkpoint_ready(sbi)) + if (!f2fs_is_checkpoint_ready(sbi)) { + f2fs_mark_inode_dirty_sync(inode, true); return -ENOSPC; + } /* * We need to balance fs here to prevent from producing dirty node pages @@ -804,11 +813,13 @@ void f2fs_evict_inode(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); nid_t xnid = fi->i_xattr_nid; int err = 0; + bool freeze_protected = false; f2fs_abort_atomic_write(inode, true); - if (fi->cow_inode) { + if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) { clear_inode_flag(fi->cow_inode, FI_COW_FILE); + F2FS_I(fi->cow_inode)->atomic_inode = NULL; iput(fi->cow_inode); fi->cow_inode = NULL; } @@ -828,7 +839,8 @@ void f2fs_evict_inode(struct inode *inode) f2fs_bug_on(sbi, get_dirty_pages(inode)); f2fs_remove_dirty_inode(inode); - f2fs_destroy_extent_tree(inode); + if (!IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; @@ -843,8 +855,10 @@ void f2fs_evict_inode(struct inode *inode) f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); - if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) { sb_start_intwrite(inode->i_sb); + freeze_protected = true; + } set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: @@ -882,12 +896,15 @@ retry: goto retry; } + if (IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); + if (err) { f2fs_update_inode_page(inode); if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); } - if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + if (freeze_protected) sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b3bb815fc6aa..a278c7da8177 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -221,6 +221,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, const char *name) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct f2fs_inode_info *fi; nid_t ino; struct inode *inode; bool nid_free = false; @@ -241,14 +242,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, inode_init_owner(idmap, inode, dir, mode); + fi = F2FS_I(inode); inode->i_ino = ino; inode->i_blocks = 0; simple_inode_init_ts(inode); - F2FS_I(inode)->i_crtime = inode_get_mtime(inode); + fi->i_crtime = inode_get_mtime(inode); inode->i_generation = get_random_u32(); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_current_depth = 1; + fi->i_current_depth = 1; err = insert_inode_locked(inode); if (err) { @@ -258,9 +260,9 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, if (f2fs_sb_has_project_quota(sbi) && (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) - F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; + fi->i_projid = F2FS_I(dir)->i_projid; else - F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, + fi->i_projid = make_kprojid(&init_user_ns, F2FS_DEF_PROJID); err = fscrypt_prepare_new_inode(dir, inode, &encrypt); @@ -278,7 +280,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); - F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; + fi->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } if (test_opt(sbi, INLINE_XATTR)) @@ -296,15 +298,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, f2fs_has_inline_dentry(inode)) { xattr_size = DEFAULT_INLINE_XATTR_ADDRS; } - F2FS_I(inode)->i_inline_xattr_size = xattr_size; + fi->i_inline_xattr_size = xattr_size; - F2FS_I(inode)->i_flags = + fi->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_flags |= F2FS_INDEX_FL; + fi->i_flags |= F2FS_INDEX_FL; - if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) + if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); /* Check compression first. */ @@ -339,6 +341,7 @@ fail_drop: trace_f2fs_new_inode(inode, err); dquot_drop(inode); inode->i_flags |= S_NOQUOTA; + make_bad_inode(inode); if (nid_free) set_inode_flag(inode, FI_FREE_NID); clear_nlink(inode); @@ -455,62 +458,6 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } -static int __recover_dot_dentries(struct inode *dir, nid_t pino) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct qstr dot = QSTR_INIT(".", 1); - struct f2fs_dir_entry *de; - struct page *page; - int err = 0; - - if (f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", - dir->i_ino, pino); - return 0; - } - - if (!S_ISDIR(dir->i_mode)) { - f2fs_err(sbi, "inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u)", - dir->i_ino, dir->i_mode, pino); - set_sbi_flag(sbi, SBI_NEED_FSCK); - return -ENOTDIR; - } - - err = f2fs_dquot_initialize(dir); - if (err) - return err; - - f2fs_balance_fs(sbi, true); - - f2fs_lock_op(sbi); - - de = f2fs_find_entry(dir, &dot, &page); - if (de) { - f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } else { - err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); - if (err) - goto out; - } - - de = f2fs_find_entry(dir, &dotdot_name, &page); - if (de) - f2fs_put_page(page, 0); - else if (IS_ERR(page)) - err = PTR_ERR(page); - else - err = f2fs_do_add_link(dir, &dotdot_name, NULL, pino, S_IFDIR); -out: - if (!err) - clear_inode_flag(dir, FI_INLINE_DOTS); - - f2fs_unlock_op(sbi); - return err; -} - static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -520,7 +467,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *new; nid_t ino = -1; int err = 0; - unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); @@ -531,7 +477,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } err = f2fs_prepare_lookup(dir, dentry, &fname); - generic_set_encrypted_ci_d_ops(dentry); if (err == -ENOENT) goto out_splice; if (err) @@ -557,17 +502,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { - err = __recover_dot_dentries(dir, root_ino); - if (err) - goto out_iput; - } - - if (f2fs_has_inline_dots(inode)) { - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) - goto out_iput; - } if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { @@ -577,8 +511,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out_iput; } out_splice: -#if IS_ENABLED(CONFIG_UNICODE) - if (!inode && IS_CASEFOLDED(dir)) { + if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as * well. For now, prevent the negative dentry @@ -587,7 +520,7 @@ out_splice: trace_f2fs_lookup_end(dir, dentry, ino, err); return NULL; } -#endif + new = d_splice_alias(inode, dentry); trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry, ino, IS_ERR(new) ? PTR_ERR(new) : err); @@ -640,16 +573,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); -#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the * negative dentries at f2fs_lookup(), when it is better * supported by the VFS for the CI case. */ - if (IS_CASEFOLDED(dir)) + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_invalidate(dentry); -#endif + if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); fail: @@ -852,7 +784,7 @@ out: static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode, bool is_whiteout, - struct inode **new_inode) + struct inode **new_inode, struct f2fs_filename *fname) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; @@ -880,7 +812,7 @@ static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out; - err = f2fs_do_tmpfile(inode, dir); + err = f2fs_do_tmpfile(inode, dir, fname); if (err) goto release_out; @@ -931,22 +863,24 @@ static int f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL); + err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL, NULL); return finish_open_simple(file, err); } static int f2fs_create_whiteout(struct mnt_idmap *idmap, - struct inode *dir, struct inode **whiteout) + struct inode *dir, struct inode **whiteout, + struct f2fs_filename *fname) { - return __f2fs_tmpfile(idmap, dir, NULL, - S_IFCHR | WHITEOUT_MODE, true, whiteout); + return __f2fs_tmpfile(idmap, dir, NULL, S_IFCHR | WHITEOUT_MODE, + true, whiteout, fname); } int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct inode **new_inode) { - return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, false, new_inode); + return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, + false, new_inode, NULL); } static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, @@ -990,7 +924,14 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(idmap, old_dir, &whiteout); + struct f2fs_filename fname; + + err = f2fs_setup_filename(old_dir, &old_dentry->d_name, + 0, &fname); + if (err) + return err; + + err = f2fs_create_whiteout(idmap, old_dir, &whiteout, &fname); if (err) return err; } @@ -1105,14 +1046,11 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, iput(whiteout); } - if (old_is_dir) { - if (old_dir_entry) - f2fs_set_link(old_inode, old_dir_entry, - old_dir_page, new_dir); - else - f2fs_put_page(old_dir_page, 0); + if (old_dir_entry) + f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); + if (old_is_dir) f2fs_i_links_write(old_dir, false); - } + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); if (S_ISDIR(old_inode->i_mode)) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9b546fd21010..f88392fc4ba9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -20,7 +20,7 @@ #include "iostat.h" #include <trace/events/f2fs.h> -#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) +#define on_f2fs_build_free_nids(nm_i) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; @@ -123,7 +123,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) static void clear_node_page_dirty(struct page *page) { if (PageDirty(page)) { - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(page_folio(page)); clear_page_dirty_for_io(page); dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); } @@ -558,6 +558,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, block_t blkaddr; int i; + ni->flag = 0; ni->nid = nid; retry: /* Check nat cache */ @@ -852,21 +853,29 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && f2fs_sb_has_readonly(sbi)) { - unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn); + unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + unsigned int ofs_in_node = dn->ofs_in_node; + pgoff_t fofs = index; + unsigned int c_len; block_t blkaddr; + /* should align fofs and ofs_in_node to cluster_size */ + if (fofs % cluster_size) { + fofs = round_down(fofs, cluster_size); + ofs_in_node = round_down(ofs_in_node, cluster_size); + } + + c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node); if (!c_len) goto out; - blkaddr = f2fs_data_blkaddr(dn); + blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node); if (blkaddr == COMPRESS_ADDR) blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + 1); + ofs_in_node + 1); f2fs_update_read_extent_tree_range_compressed(dn->inode, - index, blkaddr, - F2FS_I(dn->inode)->i_cluster_size, - c_len); + fofs, blkaddr, cluster_size, c_len); } out: return 0; @@ -897,8 +906,18 @@ static int truncate_node(struct dnode_of_data *dn) if (err) return err; + if (ni.blk_addr != NEW_ADDR && + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { + f2fs_err_ratelimited(sbi, + "nat entry is corrupted, run fsck to fix it, ino:%u, " + "nid:%u, blkaddr:%u", ni.ino, ni.nid, ni.blk_addr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); + return -EFSCORRUPTED; + } + /* Deallocate node address */ - f2fs_invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); set_node_addr(sbi, &ni, NULL_ADDR, false); @@ -911,7 +930,7 @@ static int truncate_node(struct dnode_of_data *dn) clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); - index = dn->node_page->index; + index = page_folio(dn->node_page)->index; f2fs_put_page(dn->node_page, 1); invalidate_mapping_pages(NODE_MAPPING(sbi), @@ -1048,7 +1067,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, int i; int idx = depth - 2; - nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + nid[0] = get_nid(dn->inode_page, offset[0], true); if (!nid[0]) return 0; @@ -1159,7 +1178,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) skip_partial: while (cont) { - dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + dn.nid = get_nid(page, offset[0], true); switch (offset[0]) { case NODE_DIR1_BLOCK: case NODE_DIR2_BLOCK: @@ -1179,15 +1198,22 @@ skip_partial: default: BUG(); } - if (err < 0 && err != -ENOENT) + if (err == -ENOENT) { + set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + f2fs_err_ratelimited(sbi, + "truncate node fail, ino:%lu, nid:%u, " + "offset[0]:%d, offset[1]:%d, nofs:%d", + inode->i_ino, dn.nid, offset[0], + offset[1], nofs); + err = 0; + } + if (err < 0) goto fail; - if (offset[1] == 0 && - ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { + if (offset[1] == 0 && get_nid(page, offset[0], true)) { lock_page(page); BUG_ON(page->mapping != NODE_MAPPING(sbi)); - f2fs_wait_on_page_writeback(page, NODE, true, true); - ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; - set_page_dirty(page); + set_nid(page, offset[0], 0, true); unlock_page(page); } offset[1] = 0; @@ -1249,8 +1275,9 @@ int f2fs_remove_inode_page(struct inode *inode) } /* remove potential inline_data blocks */ - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) + if (!IS_DEVICE_ALIASING(inode) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) f2fs_truncate_data_blocks_range(&dn, 1); /* 0 is possible, after f2fs_new_inode() has failed */ @@ -1311,8 +1338,14 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) } if (unlikely(new_ni.blk_addr != NULL_ADDR)) { err = -EFSCORRUPTED; + dec_valid_node_count(sbi, dn->inode, !ofs); set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + f2fs_warn_ratelimited(sbi, + "f2fs_new_node_page: inconsistent nat entry, " + "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u", + new_ni.ino, new_ni.nid, new_ni.blk_addr, + new_ni.version, new_ni.flag); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); goto fail; } #endif @@ -1337,7 +1370,6 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) if (ofs == 0) inc_valid_inode_count(sbi); return page; - fail: clear_node_page_dirty(page); f2fs_put_page(page, 1); @@ -1351,6 +1383,7 @@ fail: */ static int read_node_page(struct page *page, blk_opf_t op_flags) { + struct folio *folio = page_folio(page); struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; struct f2fs_io_info fio = { @@ -1363,21 +1396,21 @@ static int read_node_page(struct page *page, blk_opf_t op_flags) }; int err; - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { if (!f2fs_inode_chksum_verify(sbi, page)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); return -EFSBADCRC; } return LOCKED_PAGE; } - err = f2fs_get_node_info(sbi, page->index, &ni, false); + err = f2fs_get_node_info(sbi, folio->index, &ni, false); if (err) return err; /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); return -ENOENT; } @@ -1474,7 +1507,7 @@ out_err: out_put_err: /* ENOENT comes from read_node_page which is not an error. */ if (err != -ENOENT) - f2fs_handle_page_eio(sbi, page->index, NODE); + f2fs_handle_page_eio(sbi, page_folio(page), NODE); f2fs_put_page(page, 1); return ERR_PTR(err); } @@ -1517,7 +1550,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!clear_page_dirty_for_io(page)) goto page_out; - ret = f2fs_write_inline_data(inode, page); + ret = f2fs_write_inline_data(inode, page_folio(page)); inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); if (ret) @@ -1590,6 +1623,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, enum iostat_type io_type, unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct folio *folio = page_folio(page); nid_t nid; struct node_info ni; struct f2fs_io_info fio = { @@ -1606,15 +1640,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, }; unsigned int seq; - trace_f2fs_writepage(page, NODE); + trace_f2fs_writepage(folio, NODE); if (unlikely(f2fs_cp_error(sbi))) { /* keep node pages in remount-ro mode */ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) goto redirty_out; - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -1628,7 +1662,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, /* get old block addr of this node page */ nid = nid_of_node(page); - f2fs_bug_on(sbi, page->index != nid); + f2fs_bug_on(sbi, folio->index != nid); if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) goto redirty_out; @@ -1642,10 +1676,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -1656,7 +1690,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, goto redirty_out; } - if (atomic && !test_opt(sbi, NOBARRIER) && !f2fs_sb_has_blkzoned(sbi)) + if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; /* should add to global list before clearing PAGECACHE status */ @@ -1666,7 +1700,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, *seq_id = seq; } - set_page_writeback(page); + folio_start_writeback(folio); fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); @@ -1679,7 +1713,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, submitted = NULL; } - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_write(sbi, NODE); @@ -1693,7 +1727,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return 0; redirty_out: - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); return AOP_WRITEPAGE_ACTIVATE; } @@ -1725,7 +1759,7 @@ int f2fs_move_node_page(struct page *node_page, int gc_type) goto release_page; } else { /* set page dirty and write it */ - if (!PageWriteback(node_page)) + if (!folio_test_writeback(page_folio(node_page))) set_page_dirty(node_page); } out_page: @@ -1849,7 +1883,7 @@ continue_unlock: } if (!ret && atomic && !marked) { f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", - ino, last_page->index); + ino, page_folio(last_page)->index); lock_page(last_page); f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); @@ -1919,7 +1953,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) for (i = 0; i < nr_folios; i++) { struct page *page = &fbatch.folios[i]->page; - if (!IS_DNODE(page)) + if (!IS_INODE(page)) continue; lock_page(page); @@ -2153,7 +2187,7 @@ skip_write: static bool f2fs_dirty_node_folio(struct address_space *mapping, struct folio *folio) { - trace_f2fs_set_page_dirty(&folio->page, NODE); + trace_f2fs_set_page_dirty(folio, NODE); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); @@ -2731,7 +2765,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) if (err) return err; - f2fs_invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); @@ -2841,7 +2875,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, int i, idx, last_offset, nrpages; /* scan the node segment */ - last_offset = sbi->blocks_per_seg; + last_offset = BLKS_PER_SEG(sbi); addr = START_BLOCK(sbi, segno); sum_entry = &sum->entries[0]; @@ -3148,7 +3182,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kvzalloc(sbi, - nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); + F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; @@ -3158,7 +3192,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) return 0; - nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg - + nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) { struct page *page; @@ -3167,7 +3201,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (IS_ERR(page)) return PTR_ERR(page); - memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), + memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i), page_address(page), F2FS_BLKSIZE); f2fs_put_page(page, 1); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 5bd16a95eef8..6aea13024ac1 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -208,10 +208,10 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) block_addr = (pgoff_t)(nm_i->nat_blkaddr + (block_off << 1) - - (block_off & (sbi->blocks_per_seg - 1))); + (block_off & (BLKS_PER_SEG(sbi) - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - block_addr += sbi->blocks_per_seg; + block_addr += BLKS_PER_SEG(sbi); return block_addr; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d0f24ccbd1ac..69a2027e3ebc 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -5,7 +5,7 @@ * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/mm.h> @@ -46,10 +46,6 @@ static struct kmem_cache *fsync_entry_slab; -#if IS_ENABLED(CONFIG_UNICODE) -extern struct kmem_cache *f2fs_cf_name_slab; -#endif - bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) { s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count); @@ -153,11 +149,8 @@ static int init_recovered_filename(const struct inode *dir, if (err) return err; f2fs_hash_filename(dir, fname); -#if IS_ENABLED(CONFIG_UNICODE) /* Case-sensitive match is fine for recovery */ - kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); - fname->cf_name.name = NULL; -#endif + f2fs_free_casefolded_name(fname); } else { f2fs_hash_filename(dir, fname); } @@ -287,6 +280,7 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) static int recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; @@ -309,12 +303,12 @@ static int recover_inode(struct inode *inode, struct page *page) i_projid = (projid_t)le32_to_cpu(raw->i_projid); kprojid = make_kprojid(&init_user_ns, i_projid); - if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) { + if (!projid_eq(kprojid, fi->i_projid)) { err = f2fs_transfer_project_quota(inode, kprojid); if (err) return err; - F2FS_I(inode)->i_projid = kprojid; + fi->i_projid = kprojid; } } } @@ -327,11 +321,10 @@ static int recover_inode(struct inode *inode, struct page *page) inode_set_mtime(inode, le64_to_cpu(raw->i_mtime), le32_to_cpu(raw->i_mtime_nsec)); - F2FS_I(inode)->i_advise = raw->i_advise; - F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + fi->i_advise = raw->i_advise; + fi->i_flags = le32_to_cpu(raw->i_flags); f2fs_set_inode_flags(inode); - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = - le16_to_cpu(raw->i_gc_failures); + fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); @@ -354,7 +347,7 @@ static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi, if (blkaddr + 1 == next_blkaddr) ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS, ra_blocks * 2); - else if (next_blkaddr % sbi->blocks_per_seg) + else if (next_blkaddr % BLKS_PER_SEG(sbi)) ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS, ra_blocks / 2); return ra_blocks; @@ -611,6 +604,19 @@ truncate_out: return 0; } +static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn) +{ + int i, err = 0; + + for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) { + err = f2fs_reserve_new_block(dn); + if (!err) + break; + } + + return err; +} + static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page) { @@ -680,14 +686,12 @@ retry_dn: if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto err; } if (__is_valid_data_blkaddr(dest) && !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto err; } @@ -712,14 +716,8 @@ retry_dn: */ if (dest == NEW_ADDR) { f2fs_truncate_data_blocks_range(&dn, 1); - do { - err = f2fs_reserve_new_block(&dn); - if (err == -ENOSPC) { - f2fs_bug_on(sbi, 1); - break; - } - } while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); + + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; continue; @@ -727,16 +725,8 @@ retry_dn: /* dest is valid block, try to recover from src to dest */ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - if (src == NULL_ADDR) { - do { - err = f2fs_reserve_new_block(&dn); - if (err == -ENOSPC) { - f2fs_bug_on(sbi, 1); - break; - } - } while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; } @@ -756,8 +746,6 @@ retry_prev: f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u", dest, inode->i_ino, dn.ofs_in_node); err = -EFSCORRUPTED; - f2fs_handle_error(sbi, - ERROR_INVALID_BLKADDR); goto err; } @@ -852,7 +840,7 @@ next: f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); } if (!err) - f2fs_allocate_new_segments(sbi); + err = f2fs_allocate_new_segments(sbi); return err; } @@ -864,7 +852,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; - bool fix_curseg_write_pointer = false; if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) f2fs_info(sbi, "recover fsync data on readonly fs"); @@ -895,8 +882,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) else f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE); skip: - fix_curseg_write_pointer = !check_only || list_empty(&inode_list); - destroy_fsync_dnodes(&inode_list, err); destroy_fsync_dnodes(&tmp_inode_list, err); @@ -914,13 +899,8 @@ skip: * and the f2fs is not read only, check and fix zoned block devices' * write pointer consistency. */ - if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) && - f2fs_sb_has_blkzoned(sbi)) { - err = f2fs_fix_curseg_write_pointer(sbi); - if (!err) - err = f2fs_check_write_pointer(sbi); - ret = err; - } + if (!err) + err = f2fs_check_and_fix_write_pointer(sbi); if (!err) clear_sbi_flag(sbi, SBI_POR_DOING); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4c8836ded90f..c282e8a0a2ec 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -192,16 +192,28 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) if (!f2fs_is_atomic_file(inode)) return; + if (clean) + truncate_inode_pages_final(inode->i_mapping); + release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_REPLACE); clear_inode_flag(inode, FI_ATOMIC_FILE); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + /* + * The vfs inode keeps clean during commit, but the f2fs inode + * doesn't. So clear the dirty state after commit and let + * f2fs_mark_inode_dirty_sync ensure a consistent dirty state. + */ + f2fs_inode_synced(inode); + f2fs_mark_inode_dirty_sync(inode, true); + } stat_dec_atomic_inode(inode); F2FS_I(inode)->atomic_write_task = NULL; if (clean) { - truncate_inode_pages_final(inode->i_mapping); f2fs_i_size_write(inode, fi->original_i_size); fi->original_i_size = 0; } @@ -239,7 +251,7 @@ retry: if (!__is_valid_data_blkaddr(new_addr)) { if (new_addr == NULL_ADDR) dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_invalidate_blocks(sbi, dn.data_blkaddr, 1); f2fs_update_data_blkaddr(&dn, new_addr); } else { f2fs_replace_block(sbi, &dn, dn.data_blkaddr, @@ -248,7 +260,7 @@ retry: } else { blkcnt_t count = 1; - err = inc_valid_block_count(sbi, inode, &count); + err = inc_valid_block_count(sbi, inode, &count, true); if (err) { f2fs_put_dnode(&dn); return err; @@ -334,8 +346,6 @@ static int __f2fs_commit_atomic_write(struct inode *inode) DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); ret = -EFSCORRUPTED; - f2fs_handle_error(sbi, - ERROR_INVALID_BLKADDR); goto out; } @@ -366,6 +376,10 @@ out: } else { sbi->committed_atomic_block += fi->atomic_write_cnt; set_inode_flag(inode, FI_ATOMIC_COMMITTED); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } } __complete_revoke_list(inode, &revoke_list, ret ? true : false); @@ -400,6 +414,9 @@ int f2fs_commit_atomic_write(struct inode *inode) */ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { + if (f2fs_cp_error(sbi)) + return; + if (time_to_inject(sbi, FAULT_CHECKPOINT)) f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT); @@ -448,8 +465,8 @@ static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi) unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES); unsigned int meta = get_pages(sbi, F2FS_DIRTY_META); unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA); - unsigned int threshold = sbi->blocks_per_seg * factor * - DEFAULT_DIRTY_THRESHOLD; + unsigned int threshold = + SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD)); unsigned int global_threshold = threshold * 3 / 2; if (dents >= threshold || qdata >= threshold || @@ -768,8 +785,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, block_t valid_blocks = get_valid_blocks(sbi, segno, true); - f2fs_bug_on(sbi, unlikely(!valid_blocks || - valid_blocks == CAP_BLKS_PER_SEC(sbi))); + f2fs_bug_on(sbi, + (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + !valid_blocks) || + valid_blocks == CAP_BLKS_PER_SEC(sbi)); if (!IS_CURSEC(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); @@ -872,7 +891,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) { int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); - block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; + block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs); struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); block_t holes[2] = {0, 0}; /* DATA and NODE */ block_t unusable; @@ -901,11 +920,16 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) { int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); + + if (F2FS_OPTION(sbi).unusable_cap_perc == 100) + return 0; if (unusable > F2FS_OPTION(sbi).unusable_cap) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && dirty_segments(sbi) > ovp_hole_segs) return -EAGAIN; + if (has_not_enough_free_secs(sbi, 0, 0)) + return -EAGAIN; return 0; } @@ -1101,9 +1125,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, dc->error = 0; if (dc->error) - printk_ratelimited( - "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", - KERN_INFO, sbi->sb->s_id, + f2fs_info_ratelimited(sbi, + "Issue discard(%u, %u, %u) failed, ret: %d", dc->di.lstart, dc->di.start, dc->di.len, dc->error); __detach_discard_cmd(dcc, dc); } @@ -1132,8 +1155,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, struct seg_entry *sentry; unsigned int segno; block_t blk = start; - unsigned long offset, size, max_blocks = sbi->blocks_per_seg; - unsigned long *map; + unsigned long offset, size, *map; while (blk < end) { segno = GET_SEGNO(sbi, blk); @@ -1143,7 +1165,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, if (end < START_BLOCK(sbi, segno + 1)) size = GET_BLKOFF_FROM_SEG0(sbi, end); else - size = max_blocks; + size = BLKS_PER_SEG(sbi); map = (unsigned long *)(sentry->cur_valid_map); offset = __find_rev_next_bit(map, size, offset); f2fs_bug_on(sbi, offset != size); @@ -1277,6 +1299,15 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, } #endif + /* + * stop issuing discard for any of below cases: + * 1. device is conventional zone, but it doesn't support discard. + * 2. device is regulare device, after snapshot it doesn't support + * discard. + */ + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; + trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len); lstart = dc->di.lstart; @@ -1971,9 +2002,15 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) { + unsigned int nofs_flags; + int ret; + trace_f2fs_issue_reset_zone(bdev, blkstart); - return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - sector, nr_sects, GFP_NOFS); + nofs_flags = memalloc_nofs_save(); + ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, + sector, nr_sects); + memalloc_nofs_restore(nofs_flags); + return ret; } __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen); @@ -2042,7 +2079,6 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, bool check_only) { int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); - int max_blocks = sbi->blocks_per_seg; struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; @@ -2054,8 +2090,9 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; - if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) || - !f2fs_block_unit_discard(sbi)) + if (se->valid_blocks == BLKS_PER_SEG(sbi) || + !f2fs_hw_support_discard(sbi) || + !f2fs_block_unit_discard(sbi)) return false; if (!force) { @@ -2072,13 +2109,14 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, while (force || SM_I(sbi)->dcc_info->nr_discards <= SM_I(sbi)->dcc_info->max_discards) { - start = __find_rev_next_bit(dmap, max_blocks, end + 1); - if (start >= max_blocks) + start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1); + if (start >= BLKS_PER_SEG(sbi)) break; - end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); - if (force && start && end != max_blocks - && (end - start) < cpc->trim_minlen) + end = __find_rev_next_zero_bit(dmap, + BLKS_PER_SEG(sbi), start + 1); + if (force && start && end != BLKS_PER_SEG(sbi) && + (end - start) < cpc->trim_minlen) continue; if (check_only) @@ -2160,8 +2198,8 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, start + 1); if (section_alignment) { - start = rounddown(start, sbi->segs_per_sec); - end = roundup(end, sbi->segs_per_sec); + start = rounddown(start, SEGS_PER_SEC(sbi)); + end = roundup(end, SEGS_PER_SEC(sbi)); } for (i = start; i < end; i++) { @@ -2180,7 +2218,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, if (!f2fs_sb_has_blkzoned(sbi) && (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), - (end - start) << sbi->log_blocks_per_seg); + SEGS_TO_BLKS(sbi, end - start)); continue; } next: @@ -2189,9 +2227,9 @@ next: if (!IS_CURSEC(sbi, secno) && !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), - sbi->segs_per_sec << sbi->log_blocks_per_seg); + BLKS_PER_SEC(sbi)); - start = start_segno + sbi->segs_per_sec; + start = start_segno + SEGS_PER_SEC(sbi); if (start < end) goto next; else @@ -2210,7 +2248,7 @@ next: find_next: if (is_valid) { next_pos = find_next_zero_bit_le(entry->discard_map, - sbi->blocks_per_seg, cur_pos); + BLKS_PER_SEG(sbi), cur_pos); len = next_pos - cur_pos; if (f2fs_sb_has_blkzoned(sbi) || @@ -2222,13 +2260,13 @@ find_next: total_len += len; } else { next_pos = find_next_bit_le(entry->discard_map, - sbi->blocks_per_seg, cur_pos); + BLKS_PER_SEG(sbi), cur_pos); } skip: cur_pos = next_pos; is_valid = !is_valid; - if (cur_pos < sbi->blocks_per_seg) + if (cur_pos < BLKS_PER_SEG(sbi)) goto find_next; release_discard_addr(entry); @@ -2245,6 +2283,12 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; int err = 0; + if (f2fs_sb_has_readonly(sbi)) { + f2fs_info(sbi, + "Skip to start discard thread for readonly image"); + return 0; + } + if (!f2fs_realtime_discard_enable(sbi)) return 0; @@ -2277,7 +2321,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY; dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE; if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) - dcc->discard_granularity = sbi->blocks_per_seg; + dcc->discard_granularity = BLKS_PER_SEG(sbi); else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) dcc->discard_granularity = BLKS_PER_SEC(sbi); @@ -2291,7 +2335,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->queued_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; - dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi)); dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST; dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; @@ -2388,76 +2432,38 @@ static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, SIT_I(sbi)->max_mtime = ctime; } -static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +/* + * NOTE: when updating multiple blocks at the same time, please ensure + * that the consecutive input blocks belong to the same segment. + */ +static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_entry *se, + block_t blkaddr, unsigned int offset, int del) { - struct seg_entry *se; - unsigned int segno, offset; - long int new_vblocks; bool exist; #ifdef CONFIG_F2FS_CHECK_FS bool mir_exist; #endif + int i; + int del_count = -del; - segno = GET_SEGNO(sbi, blkaddr); - - se = get_seg_entry(sbi, segno); - new_vblocks = se->valid_blocks + del; - offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); - - f2fs_bug_on(sbi, (new_vblocks < 0 || - (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); - - se->valid_blocks = new_vblocks; - - /* Update valid block bitmap */ - if (del > 0) { - exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); -#ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_set_bit(offset, - se->cur_valid_map_mir); - if (unlikely(exist != mir_exist)) { - f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); - f2fs_bug_on(sbi, 1); - } -#endif - if (unlikely(exist)) { - f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", - blkaddr); - f2fs_bug_on(sbi, 1); - se->valid_blocks--; - del = 0; - } - - if (f2fs_block_unit_discard(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) - sbi->discard_blks--; + f2fs_bug_on(sbi, GET_SEGNO(sbi, blkaddr) != GET_SEGNO(sbi, blkaddr + del_count - 1)); - /* - * SSR should never reuse block which is checkpointed - * or newly invalidated. - */ - if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { - if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks++; - } - } else { - exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); + for (i = 0; i < del_count; i++) { + exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_clear_bit(offset, + mir_exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); + blkaddr + i, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(!exist)) { - f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", - blkaddr); + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr + i); f2fs_bug_on(sbi, 1); se->valid_blocks++; - del = 0; + del += 1; } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { /* * If checkpoints are off, we must not reuse data that @@ -2465,7 +2471,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) * before, we must track that to know how much space we * really have. */ - if (f2fs_test_bit(offset, se->ckpt_valid_map)) { + if (f2fs_test_bit(offset + i, se->ckpt_valid_map)) { spin_lock(&sbi->stat_lock); sbi->unusable_block_count++; spin_unlock(&sbi->stat_lock); @@ -2473,12 +2479,91 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) } if (f2fs_block_unit_discard(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) + f2fs_test_and_clear_bit(offset + i, se->discard_map)) sbi->discard_blks++; + + if (!f2fs_test_bit(offset + i, se->ckpt_valid_map)) + se->ckpt_valid_blocks -= 1; } + + return del; +} + +static int update_sit_entry_for_alloc(struct f2fs_sb_info *sbi, struct seg_entry *se, + block_t blkaddr, unsigned int offset, int del) +{ + bool exist; +#ifdef CONFIG_F2FS_CHECK_FS + bool mir_exist; +#endif + + exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + mir_exist = f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); + f2fs_bug_on(sbi, 1); + } +#endif + if (unlikely(exist)) { + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); + se->valid_blocks--; + del = 0; + } + + if (f2fs_block_unit_discard(sbi) && + !f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; + + /* + * SSR should never reuse block which is checkpointed + * or newly invalidated. + */ + if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { + if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks++; + } + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks += del; + return del; +} + +/* + * If releasing blocks, this function supports updating multiple consecutive blocks + * at one time, but please note that these consecutive blocks need to belong to the + * same segment. + */ +static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +{ + struct seg_entry *se; + unsigned int segno, offset; + long int new_vblocks; + + segno = GET_SEGNO(sbi, blkaddr); + if (segno == NULL_SEGNO) + return; + + se = get_seg_entry(sbi, segno); + new_vblocks = se->valid_blocks + del; + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + + f2fs_bug_on(sbi, (new_vblocks < 0 || + (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); + + se->valid_blocks = new_vblocks; + + /* Update valid block bitmap */ + if (del > 0) { + del = update_sit_entry_for_alloc(sbi, se, blkaddr, offset, del); + } else { + del = update_sit_entry_for_release(sbi, se, blkaddr, offset, del); + } + __mark_sit_entry_dirty(sbi, segno); /* update total number of valid blocks to be written in ckpt area */ @@ -2488,25 +2573,43 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) get_sec_entry(sbi, segno)->valid_blocks += del; } -void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len) { unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); + block_t addr_start = addr, addr_end = addr + len - 1; + unsigned int seg_num = GET_SEGNO(sbi, addr_end) - segno + 1; + unsigned int i = 1, max_blocks = sbi->blocks_per_seg, cnt; f2fs_bug_on(sbi, addr == NULL_ADDR); if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; - f2fs_invalidate_internal_cache(sbi, addr); + f2fs_invalidate_internal_cache(sbi, addr, len); /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); - update_segment_mtime(sbi, addr, 0); - update_sit_entry(sbi, addr, -1); + if (seg_num == 1) + cnt = len; + else + cnt = max_blocks - GET_BLKOFF_FROM_SEG0(sbi, addr); - /* add it into dirty seglist */ - locate_dirty_segment(sbi, segno); + do { + update_segment_mtime(sbi, addr_start, 0); + update_sit_entry(sbi, addr_start, -cnt); + + /* add it into dirty seglist */ + locate_dirty_segment(sbi, segno); + + /* update @addr_start and @cnt and @segno */ + addr_start = START_BLOCK(sbi, ++segno); + if (++i == seg_num) + cnt = GET_BLKOFF_FROM_SEG0(sbi, addr_end) + 1; + else + cnt = max_blocks; + } while (i <= seg_num); up_write(&sit_i->sentry_lock); } @@ -2540,7 +2643,7 @@ static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int typ struct curseg_info *curseg = CURSEG_I(sbi, type); if (sbi->ckpt->alloc_type[type] == SSR) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); return curseg->next_blkoff; } @@ -2623,12 +2726,12 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, } static int is_next_segment_free(struct f2fs_sb_info *sbi, - struct curseg_info *curseg, int type) + struct curseg_info *curseg) { unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); - if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) + if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi)) return !test_bit(segno, free_i->free_segmap); return 0; } @@ -2637,54 +2740,76 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG */ -static void get_new_segment(struct f2fs_sb_info *sbi, - unsigned int *newseg, bool new_sec, int dir) +static int get_new_segment(struct f2fs_sb_info *sbi, + unsigned int *newseg, bool new_sec, bool pinning) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); - unsigned int left_start = hint; bool init = true; - int go_left = 0; int i; + int ret = 0; spin_lock(&free_i->segmap_lock); - if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { + if (time_to_inject(sbi, FAULT_NO_SEGMENT)) { + ret = -ENOSPC; + goto out_unlock; + } + + if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) { segno = find_next_zero_bit(free_i->free_segmap, GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) goto got_it; } + +#ifdef CONFIG_BLK_DEV_ZONED + /* + * If we format f2fs on zoned storage, let's try to get pinned sections + * from beginning of the storage, which should be a conventional one. + */ + if (f2fs_sb_has_blkzoned(sbi)) { + /* Prioritize writing to conventional zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning) + segno = 0; + else + segno = max(sbi->first_zoned_segno, *newseg); + hint = GET_SEC_FROM_SEG(sbi, segno); + } +#endif + find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); - if (secno >= MAIN_SECS(sbi)) { - if (dir == ALLOC_RIGHT) { + +#ifdef CONFIG_BLK_DEV_ZONED + if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) { + /* Write only to sequential zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) { + hint = GET_SEC_FROM_SEG(sbi, sbi->first_zoned_segno); + secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + } else secno = find_first_zero_bit(free_i->free_secmap, - MAIN_SECS(sbi)); - f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); - } else { - go_left = 1; - left_start = hint - 1; + MAIN_SECS(sbi)); + if (secno >= MAIN_SECS(sbi)) { + ret = -ENOSPC; + f2fs_bug_on(sbi, 1); + goto out_unlock; } } - if (go_left == 0) - goto skip_left; +#endif - while (test_bit(left_start, free_i->free_secmap)) { - if (left_start > 0) { - left_start--; - continue; - } - left_start = find_first_zero_bit(free_i->free_secmap, + if (secno >= MAIN_SECS(sbi)) { + secno = find_first_zero_bit(free_i->free_secmap, MAIN_SECS(sbi)); - f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); - break; + if (secno >= MAIN_SECS(sbi)) { + ret = -ENOSPC; + f2fs_bug_on(sbi, 1); + goto out_unlock; + } } - secno = left_start; -skip_left: segno = GET_SEG_FROM_SEC(sbi, secno); zoneno = GET_ZONE_FROM_SEC(sbi, secno); @@ -2695,21 +2820,13 @@ skip_left: goto got_it; if (zoneno == old_zoneno) goto got_it; - if (dir == ALLOC_LEFT) { - if (!go_left && zoneno + 1 >= total_zones) - goto got_it; - if (go_left && zoneno == 0) - goto got_it; - } for (i = 0; i < NR_CURSEG_TYPE; i++) if (CURSEG_I(sbi, i)->zone == zoneno) break; if (i < NR_CURSEG_TYPE) { /* zone is in user, try another */ - if (go_left) - hint = zoneno * sbi->secs_per_zone - 1; - else if (zoneno + 1 >= total_zones) + if (zoneno + 1 >= total_zones) hint = 0; else hint = (zoneno + 1) * sbi->secs_per_zone; @@ -2719,9 +2836,21 @@ skip_left: got_it: /* set it as dirty segment in free segmap */ f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); + + /* no free section in conventional zone */ + if (new_sec && pinning && + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { + ret = -EAGAIN; + goto out_unlock; + } __set_inuse(sbi, segno); *newseg = segno; +out_unlock: spin_unlock(&free_i->segmap_lock); + + if (ret == -ENOSPC) + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); + return ret; } static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) @@ -2730,6 +2859,10 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) struct summary_footer *sum_footer; unsigned short seg_type = curseg->seg_type; + /* only happen when get_new_segment() fails */ + if (curseg->next_segno == NULL_SEGNO) + return; + curseg->inited = true; curseg->segno = curseg->next_segno; curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); @@ -2754,12 +2887,19 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) unsigned short seg_type = curseg->seg_type; sanity_check_seg_type(sbi, seg_type); - if (f2fs_need_rand_seg(sbi)) - return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); + if (__is_large_section(sbi)) { + if (f2fs_need_rand_seg(sbi)) { + unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno); - /* if segs_per_sec is large than 1, we need to keep original policy. */ - if (__is_large_section(sbi)) + if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint) + return curseg->segno; + return get_random_u32_inclusive(curseg->segno + 1, + GET_SEG_FROM_SEC(sbi, hint + 1) - 1); + } return curseg->segno; + } else if (f2fs_need_rand_seg(sbi)) { + return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); + } /* inmem log may not locate on any segment after mount */ if (!curseg->inited) @@ -2768,8 +2908,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; - if (test_opt(sbi, NOHEAP) && - (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))) + if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) @@ -2786,30 +2925,31 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) * Allocate a current working segment. * This function always allocates a free segment in LFS manner. */ -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) { struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned short seg_type = curseg->seg_type; unsigned int segno = curseg->segno; - int dir = ALLOC_LEFT; + bool pinning = type == CURSEG_COLD_DATA_PINNED; + int ret; if (curseg->inited) - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, segno)); - if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA) - dir = ALLOC_RIGHT; - - if (test_opt(sbi, NOHEAP)) - dir = ALLOC_RIGHT; + write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); segno = __get_next_segno(sbi, type); - get_new_segment(sbi, &segno, new_sec, dir); + ret = get_new_segment(sbi, &segno, new_sec, pinning); + if (ret) { + if (ret == -ENOSPC) + curseg->segno = NULL_SEGNO; + return ret; + } + curseg->next_segno = segno; reset_curseg(sbi, type, 1); curseg->alloc_type = LFS; if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) curseg->fragment_remained_chunk = get_random_u32_inclusive(1, sbi->max_fragment_chunk); + return 0; } static int __next_free_blkoff(struct f2fs_sb_info *sbi, @@ -2825,7 +2965,7 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi, for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; - return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); + return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start); } static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi, @@ -2836,14 +2976,14 @@ static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi, bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) { - return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg; + return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi); } /* * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ -static void change_curseg(struct f2fs_sb_info *sbi, int type) +static int change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2851,7 +2991,8 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) struct f2fs_summary_block *sum_node; struct page *sum_page; - write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); + if (curseg->inited) + write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); __set_test_and_inuse(sbi, new_segno); @@ -2868,21 +3009,23 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) if (IS_ERR(sum_page)) { /* GC won't be able to use stale summary pages by cp_error */ memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); - return; + return PTR_ERR(sum_page); } sum_node = (struct f2fs_summary_block *)page_address(sum_page); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_put_page(sum_page, 1); + return 0; } static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, int alloc_mode, unsigned long long age); -static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, +static int get_atssr_segment(struct f2fs_sb_info *sbi, int type, int target_type, int alloc_mode, unsigned long long age) { struct curseg_info *curseg = CURSEG_I(sbi, type); + int ret = 0; curseg->seg_type = target_type; @@ -2890,38 +3033,62 @@ static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno); curseg->seg_type = se->type; - change_curseg(sbi, type); + ret = change_curseg(sbi, type); } else { /* allocate cold segment by default */ curseg->seg_type = CURSEG_COLD_DATA; - new_curseg(sbi, type, true); + ret = new_curseg(sbi, type, true); } stat_inc_seg_type(sbi, curseg); + return ret; } -static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) +static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC); + int ret = 0; - if (!sbi->am.atgc_enabled) - return; + if (!sbi->am.atgc_enabled && !force) + return 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); - get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0); + ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, + CURSEG_COLD_DATA, SSR, 0); up_write(&SIT_I(sbi)->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); + return ret; +} +int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) +{ + return __f2fs_init_atgc_curseg(sbi, false); } -void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) + +int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi) { - __f2fs_init_atgc_curseg(sbi); + int ret; + + if (!test_opt(sbi, ATGC)) + return 0; + if (sbi->am.atgc_enabled) + return 0; + if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) < + sbi->am.age_threshold) + return 0; + + ret = __f2fs_init_atgc_curseg(sbi, true); + if (!ret) { + sbi->am.atgc_enabled = true; + f2fs_info(sbi, "reenabled age threshold GC"); + } + return ret; } static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) @@ -2989,7 +3156,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, sanity_check_seg_type(sbi, seg_type); /* f2fs_need_SSR() already forces to do this */ - if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } @@ -3016,7 +3184,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, for (; cnt-- > 0; reversed ? i-- : i++) { if (i == seg_type) continue; - if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, i, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } @@ -3040,8 +3209,7 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type) if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && curseg->seg_type == CURSEG_WARM_NODE) return true; - if (curseg->alloc_type == LFS && - is_next_segment_free(sbi, curseg, type) && + if (curseg->alloc_type == LFS && is_next_segment_free(sbi, curseg) && likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0)) @@ -3049,11 +3217,12 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type) return false; } -void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno; + int ret = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); @@ -3064,9 +3233,9 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, goto unlock; if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) - change_curseg(sbi, type); + ret = change_curseg(sbi, type); else - new_curseg(sbi, type, true); + ret = new_curseg(sbi, type, true); stat_inc_seg_type(sbi, curseg); @@ -3080,45 +3249,85 @@ unlock: mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); + return ret; } -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, bool new_sec, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; + int err = 0; + + if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited) + goto allocate; if (!force && curseg->inited && !curseg->next_blkoff && !get_valid_blocks(sbi, curseg->segno, new_sec) && !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) - return; + return 0; +allocate: old_segno = curseg->segno; - new_curseg(sbi, type, true); + err = new_curseg(sbi, type, true); + if (err) + return err; stat_inc_seg_type(sbi, curseg); locate_dirty_segment(sbi, old_segno); + return 0; } -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { + int ret; + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); - __allocate_new_segment(sbi, type, true, force); + ret = __allocate_new_segment(sbi, type, true, force); up_write(&SIT_I(sbi)->sentry_lock); f2fs_up_read(&SM_I(sbi)->curseg_lock); + + return ret; } -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) +{ + int err; + bool gc_required = true; + +retry: + f2fs_lock_op(sbi); + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); + f2fs_unlock_op(sbi); + + if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) { + f2fs_down_write(&sbi->gc_lock); + err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), + true, ZONED_PIN_SEC_REQUIRED_COUNT); + f2fs_up_write(&sbi->gc_lock); + + gc_required = false; + if (!err) + goto retry; + } + + return err; +} + +int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { int i; + int err = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) - __allocate_new_segment(sbi, i, false, false); + err += __allocate_new_segment(sbi, i, false, false); up_write(&SIT_I(sbi)->sentry_lock); f2fs_up_read(&SM_I(sbi)->curseg_lock); + + return err; } bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, @@ -3236,8 +3445,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); if (need_align) { - start_segno = rounddown(start_segno, sbi->segs_per_sec); - end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1; + start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi)); + end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1; } cpc.reason = CP_DISCARD; @@ -3279,8 +3488,14 @@ out: return err; } -int f2fs_rw_hint_to_seg_type(enum rw_hint hint) +int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint) { + if (F2FS_OPTION(sbi).active_logs == 2) + return CURSEG_HOT_DATA; + else if (F2FS_OPTION(sbi).active_logs == 4) + return CURSEG_COLD_DATA; + + /* active_log == 6 */ switch (hint) { case WRITE_LIFE_SHORT: return CURSEG_HOT_DATA; @@ -3291,6 +3506,65 @@ int f2fs_rw_hint_to_seg_type(enum rw_hint hint) } } +/* + * This returns write hints for each segment type. This hints will be + * passed down to block layer as below by default. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_NONE|REQ_META + * HOT_NODE WRITE_LIFE_NONE + * WARM_NODE WRITE_LIFE_MEDIUM + * COLD_NODE WRITE_LIFE_LONG + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * COLD_DATA WRITE_LIFE_EXTREME + * HOT_DATA WRITE_LIFE_SHORT + * WARM_DATA WRITE_LIFE_NOT_SET + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + */ +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) +{ + switch (type) { + case DATA: + switch (temp) { + case WARM: + return WRITE_LIFE_NOT_SET; + case HOT: + return WRITE_LIFE_SHORT; + case COLD: + return WRITE_LIFE_EXTREME; + default: + return WRITE_LIFE_NONE; + } + case NODE: + switch (temp) { + case WARM: + return WRITE_LIFE_MEDIUM; + case HOT: + return WRITE_LIFE_NONE; + case COLD: + return WRITE_LIFE_LONG; + default: + return WRITE_LIFE_NONE; + } + case META: + return WRITE_LIFE_NONE; + default: + return WRITE_LIFE_NONE; + } +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -3345,7 +3619,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (page_private_gcing(fio->page)) { if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && - (fio->sbi->gc_mode != GC_URGENT_HIGH)) + (fio->sbi->gc_mode != GC_URGENT_HIGH) && + __is_valid_data_blkaddr(fio->old_blkaddr) && + !is_inode_flag_set(inode, FI_OPU_WRITE)) return CURSEG_ALL_DATA_ATGC; else return CURSEG_COLD_DATA; @@ -3353,7 +3629,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; - type = __get_age_segment_type(inode, fio->page->index); + type = __get_age_segment_type(inode, + page_folio(fio->page)->index); if (type != NO_CHECK_TYPE) return type; @@ -3361,7 +3638,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) is_inode_flag_set(inode, FI_HOT_DATA) || f2fs_is_cow_file(inode)) return CURSEG_HOT_DATA; - return f2fs_rw_hint_to_seg_type(inode->i_write_hint); + return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), + inode->i_write_hint); } else { if (IS_DNODE(fio->page)) return is_cold_node(fio->page) ? CURSEG_WARM_NODE : @@ -3370,9 +3648,35 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) } } +enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, + enum log_type type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + enum temp_type temp = COLD; + + switch (curseg->seg_type) { + case CURSEG_HOT_NODE: + case CURSEG_HOT_DATA: + temp = HOT; + break; + case CURSEG_WARM_NODE: + case CURSEG_WARM_DATA: + temp = WARM; + break; + case CURSEG_COLD_NODE: + case CURSEG_COLD_DATA: + temp = COLD; + break; + default: + f2fs_bug_on(sbi, 1); + } + + return temp; +} + static int __get_segment_type(struct f2fs_io_info *fio) { - int type = 0; + enum log_type type = CURSEG_HOT_DATA; switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: @@ -3388,12 +3692,8 @@ static int __get_segment_type(struct f2fs_io_info *fio) f2fs_bug_on(fio->sbi, true); } - if (IS_HOT(type)) - fio->temp = HOT; - else if (IS_WARM(type)) - fio->temp = WARM; - else - fio->temp = COLD; + fio->temp = f2fs_get_segment_temp(fio->sbi, type); + return type; } @@ -3410,7 +3710,14 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi, get_random_u32_inclusive(1, sbi->max_fragment_hole); } -void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +static void reset_curseg_fields(struct curseg_info *curseg) +{ + curseg->inited = false; + curseg->segno = NULL_SEGNO; + curseg->next_segno = 0; +} + +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio) @@ -3421,12 +3728,18 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; bool segment_full = false; + int ret = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); + if (curseg->segno == NULL_SEGNO) { + ret = -ENOSPC; + goto out_err; + } + if (from_gc) { f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO); se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr)); @@ -3435,7 +3748,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg); + f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi)); f2fs_wait_discard_bio(sbi, *new_blkaddr); @@ -3464,25 +3777,37 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, * since SSR needs latest valid block information. */ update_sit_entry(sbi, *new_blkaddr, 1); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) - update_sit_entry(sbi, old_blkaddr, -1); + update_sit_entry(sbi, old_blkaddr, -1); /* * If the current segment is full, flush it out and replace it with a * new segment. */ if (segment_full) { + if (type == CURSEG_COLD_DATA_PINNED && + !((curseg->segno + 1) % sbi->segs_per_sec)) { + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, curseg->segno)); + reset_curseg_fields(curseg); + goto skip_new_segment; + } + if (from_gc) { - get_atssr_segment(sbi, type, se->type, + ret = get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); } else { if (need_new_seg(sbi, type)) - new_curseg(sbi, type, false); + ret = new_curseg(sbi, type, false); else - change_curseg(sbi, type); + ret = change_curseg(sbi, type); stat_inc_seg_type(sbi, curseg); } + + if (ret) + goto out_err; } + +skip_new_segment: /* * segment dirty status should be updated after segment allocation, * so we just need to update status only one time after previous @@ -3491,12 +3816,12 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); - if (IS_DATASEG(type)) + if (IS_DATASEG(curseg->seg_type)) atomic64_inc(&sbi->allocated_data_blocks); up_write(&sit_i->sentry_lock); - if (page && IS_NODESEG(type)) { + if (page && IS_NODESEG(curseg->seg_type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); f2fs_inode_chksum_set(sbi, page); @@ -3505,9 +3830,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (fio) { struct f2fs_bio_info *io; - if (F2FS_IO_ALIGNED(sbi)) - fio->retry = 0; - INIT_LIST_HEAD(&fio->list); fio->in_list = 1; io = sbi->write_io[fio->type] + fio->temp; @@ -3517,8 +3839,15 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } mutex_unlock(&curseg->curseg_mutex); + f2fs_up_read(&SM_I(sbi)->curseg_lock); + return 0; +out_err: + *new_blkaddr = NULL_ADDR; + up_write(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); + return ret; } void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, @@ -3548,33 +3877,61 @@ void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, } } +static int log_type_to_seg_type(enum log_type type) +{ + int seg_type = CURSEG_COLD_DATA; + + switch (type) { + case CURSEG_HOT_DATA: + case CURSEG_WARM_DATA: + case CURSEG_COLD_DATA: + case CURSEG_HOT_NODE: + case CURSEG_WARM_NODE: + case CURSEG_COLD_NODE: + seg_type = (int)type; + break; + case CURSEG_COLD_DATA_PINNED: + case CURSEG_ALL_DATA_ATGC: + seg_type = CURSEG_COLD_DATA; + break; + default: + break; + } + return seg_type; +} + static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(fio); - bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); + enum log_type type = __get_segment_type(fio); + int seg_type = log_type_to_seg_type(type); + bool keep_order = (f2fs_lfs_mode(fio->sbi) && + seg_type == CURSEG_COLD_DATA); if (keep_order) f2fs_down_read(&fio->sbi->io_order_lock); -reallocate: - f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type, fio); + + if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + &fio->new_blkaddr, sum, type, fio)) { + if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host)) + fscrypt_finalize_bounce_page(&fio->encrypted_page); + end_page_writeback(fio->page); + if (f2fs_in_warm_node_list(fio->sbi, fio->page)) + f2fs_del_fsync_node_entry(fio->sbi, fio->page); + goto out; + } if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) - f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr); + f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1); /* writeout dirty page into bdev */ f2fs_submit_page_write(fio); - if (fio->retry) { - fio->old_blkaddr = fio->new_blkaddr; - goto reallocate; - } f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); - +out: if (keep_order) f2fs_up_read(&fio->sbi->io_order_lock); } -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type) { struct f2fs_io_info fio = { @@ -3583,20 +3940,20 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, - .old_blkaddr = page->index, - .new_blkaddr = page->index, - .page = page, + .old_blkaddr = folio->index, + .new_blkaddr = folio->index, + .page = folio_page(folio, 0), .encrypted_page = NULL, .in_list = 0, }; - if (unlikely(page->index >= MAIN_BLKADDR(sbi))) + if (unlikely(folio->index >= MAIN_BLKADDR(sbi))) fio.op_flags &= ~REQ_META; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_submit_page_write(&fio); - stat_inc_meta_count(sbi, page->index); + stat_inc_meta_count(sbi, folio->index); f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE); } @@ -3652,9 +4009,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) goto drop_bio; } - if (fio->post_read) - invalidate_mapping_pages(META_MAPPING(sbi), - fio->new_blkaddr, fio->new_blkaddr); + if (fio->meta_gc) + f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1); stat_inc_inplace_blocks(fio->sbi); @@ -3730,8 +4086,8 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } } - f2fs_bug_on(sbi, !IS_DATASEG(type)); curseg = CURSEG_I(sbi, type); + f2fs_bug_on(sbi, !IS_DATASEG(curseg->seg_type)); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); @@ -3743,7 +4099,8 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; - change_curseg(sbi, type); + if (change_curseg(sbi, type)) + goto out_unlock; } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); @@ -3755,7 +4112,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, update_sit_entry(sbi, new_blkaddr, 1); } if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { - f2fs_invalidate_internal_cache(sbi, old_blkaddr); + f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); if (!from_gc) update_segment_mtime(sbi, old_blkaddr, 0); update_sit_entry(sbi, old_blkaddr, -1); @@ -3769,12 +4126,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; - change_curseg(sbi, type); + if (change_curseg(sbi, type)) + goto out_unlock; } curseg->next_blkoff = old_blkoff; curseg->alloc_type = old_alloc_type; } +out_unlock: up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_write(&SM_I(sbi)->curseg_lock); @@ -3798,7 +4157,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked) { - if (PageWriteback(page)) { + if (folio_test_writeback(page_folio(page))) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); /* submit cached LFS IO */ @@ -3807,7 +4166,8 @@ void f2fs_wait_on_page_writeback(struct page *page, f2fs_submit_merged_ipu_write(sbi, NULL, page); if (ordered) { wait_on_page_writeback(page); - f2fs_bug_on(sbi, locked && PageWriteback(page)); + f2fs_bug_on(sbi, locked && + folio_test_writeback(page_folio(page))); } else { wait_for_stable_page(page); } @@ -3819,7 +4179,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *cpage; - if (!f2fs_post_read_required(inode)) + if (!f2fs_meta_inode_gc_required(inode)) return; if (!__is_valid_data_blkaddr(blkaddr)) @@ -3838,13 +4198,13 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); block_t i; - if (!f2fs_post_read_required(inode)) + if (!f2fs_meta_inode_gc_required(inode)) return; for (i = 0; i < len; i++) f2fs_wait_on_block_writeback(inode, blkaddr + i); - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1); + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); } static int read_compacted_summaries(struct f2fs_sb_info *sbi) @@ -3886,7 +4246,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) seg_i->next_blkoff = blk_off; if (seg_i->alloc_type == SSR) - blk_off = sbi->blocks_per_seg; + blk_off = BLKS_PER_SEG(sbi); for (j = 0; j < blk_off; j++) { struct f2fs_summary *s; @@ -3954,7 +4314,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) struct f2fs_summary *ns = &sum->entries[0]; int i; - for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { + for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) { ns->version = 0; ns->ofs_in_node = 0; } @@ -4460,7 +4820,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) #endif sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); - sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; + sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs); sit_i->written_valid_blocks = 0; sit_i->bitmap_size = sit_bitmap_size; sit_i->dirty_sentries = 0; @@ -4527,15 +4887,8 @@ static int build_curseg(struct f2fs_sb_info *sbi) sizeof(struct f2fs_journal), GFP_KERNEL); if (!array[i].journal) return -ENOMEM; - if (i < NR_PERSISTENT_LOG) - array[i].seg_type = CURSEG_HOT_DATA + i; - else if (i == CURSEG_COLD_DATA_PINNED) - array[i].seg_type = CURSEG_COLD_DATA; - else if (i == CURSEG_ALL_DATA_ATGC) - array[i].seg_type = CURSEG_COLD_DATA; - array[i].segno = NULL_SEGNO; - array[i].next_blkoff = 0; - array[i].inited = false; + array[i].seg_type = log_type_to_seg_type(i); + reset_curseg_fields(&array[i]); } return restore_curseg_summaries(sbi); } @@ -4587,21 +4940,20 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; - if (f2fs_block_unit_discard(sbi)) { - /* build discard map only one time */ - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, + if (!f2fs_block_unit_discard(sbi)) + goto init_discard_map_done; + + /* build discard map only one time */ + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, + goto init_discard_map_done; + } + memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - + sbi->discard_blks += BLKS_PER_SEG(sbi) - se->valid_blocks; - } - } - +init_discard_map_done: if (__is_large_section(sbi)) get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; @@ -4741,7 +5093,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) return; mutex_lock(&dirty_i->seglist_lock); - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { valid_blocks = get_valid_blocks(sbi, segno, true); secno = GET_SEC_FROM_SEG(sbi, segno); @@ -4840,7 +5192,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) if (curseg->alloc_type == SSR) continue; - for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { + for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) { if (!f2fs_test_bit(blkofs, se->cur_valid_map)) continue; out: @@ -4856,7 +5208,6 @@ out: } #ifdef CONFIG_BLK_DEV_ZONED - static int check_zone_write_pointer(struct f2fs_sb_info *sbi, struct f2fs_dev_info *fdev, struct blk_zone *zone) @@ -4865,6 +5216,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, block_t zone_block, valid_block_cnt; unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; int ret; + unsigned int nofs_flags; if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; @@ -4876,14 +5228,19 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, * Skip check of zones cursegs point to, since * fix_curseg_write_pointer() checks them. */ - if (zone_segno >= MAIN_SEGS(sbi) || - IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) + if (zone_segno >= MAIN_SEGS(sbi)) return 0; /* * Get # of valid block of the zone. */ valid_block_cnt = get_valid_blocks(sbi, zone_segno, true); + if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { + f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]", + zone_segno, valid_block_cnt, + blk_zone_cond_str(zone->cond)); + return 0; + } if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) || (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL)) @@ -4891,8 +5248,8 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, if (!valid_block_cnt) { f2fs_notice(sbi, "Zone without valid block has non-zero write " - "pointer. Reset the write pointer: cond[0x%x]", - zone->cond); + "pointer. Reset the write pointer: cond[%s]", + blk_zone_cond_str(zone->cond)); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, zone->len >> log_sectors_per_block); if (ret) @@ -4909,11 +5266,13 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, * selected for write operation until it get discarded. */ f2fs_notice(sbi, "Valid blocks are not aligned with write " - "pointer: valid block[0x%x,0x%x] cond[0x%x]", - zone_segno, valid_block_cnt, zone->cond); + "pointer: valid block[0x%x,0x%x] cond[%s]", + zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond)); + nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH, - zone->start, zone->len, GFP_NOFS); + zone->start, zone->len); + memalloc_nofs_restore(nofs_flags); if (ret == -EOPNOTSUPP) { ret = blkdev_issue_zeroout(fdev->bdev, zone->wp, zone->len - (zone->wp - zone->start), @@ -4952,7 +5311,7 @@ static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, return 0; } -static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) +static int do_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) { struct curseg_info *cs = CURSEG_I(sbi, type); struct f2fs_dev_info *zbd; @@ -5004,7 +5363,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) } /* Allocate a new section if it's not new. */ - if (cs->next_blkoff) { + if (cs->next_blkoff || + cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) { unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff; f2fs_allocate_new_section(sbi, type, true); @@ -5056,12 +5416,12 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) return 0; } -int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) +static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi) { int i, ret; for (i = 0; i < NR_PERSISTENT_LOG; i++) { - ret = fix_curseg_write_pointer(sbi, i); + ret = do_fix_curseg_write_pointer(sbi, i); if (ret) return ret; } @@ -5084,7 +5444,7 @@ static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, return check_zone_write_pointer(args->sbi, args->fdev, zone); } -int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) +static int check_write_pointer(struct f2fs_sb_info *sbi) { int i, ret; struct check_zone_write_pointer_args args; @@ -5104,6 +5464,21 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) return 0; } +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) +{ + int ret; + + if (!f2fs_sb_has_blkzoned(sbi) || f2fs_readonly(sbi->sb) || + f2fs_hw_is_readonly(sbi)) + return 0; + + f2fs_notice(sbi, "Checking entire write pointers"); + ret = fix_curseg_write_pointer(sbi); + if (!ret) + ret = check_write_pointer(sbi); + return ret; +} + /* * Return the number of usable blocks in a segment. The number of blocks * returned is always equal to the number of blocks in a segment for @@ -5119,7 +5494,7 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( unsigned int secno; if (!sbi->unusable_blocks_per_sec) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); secno = GET_SEC_FROM_SEG(sbi, segno); seg_start = START_BLOCK(sbi, segno); @@ -5134,18 +5509,13 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( */ if (seg_start >= sec_cap_blkaddr) return 0; - if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr) + if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr) return sec_cap_blkaddr - seg_start; - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); } #else -int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) -{ - return 0; -} - -int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) { return 0; } @@ -5163,16 +5533,50 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, if (f2fs_sb_has_blkzoned(sbi)) return f2fs_usable_zone_blks_in_seg(sbi, segno); - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); } -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno) +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi) { if (f2fs_sb_has_blkzoned(sbi)) return CAP_SEGS_PER_SEC(sbi); - return sbi->segs_per_sec; + return SEGS_PER_SEC(sbi); +} + +unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); + unsigned int secno = 0, start = 0; + unsigned int total_valid_blocks = 0; + unsigned long long mtime = 0; + unsigned int i = 0; + + secno = GET_SEC_FROM_SEG(sbi, segno); + start = GET_SEG_FROM_SEC(sbi, secno); + + if (!__is_large_section(sbi)) { + mtime = get_seg_entry(sbi, start + i)->mtime; + goto out; + } + + for (i = 0; i < usable_segs_per_sec; i++) { + /* for large section, only check the mtime of valid segments */ + struct seg_entry *se = get_seg_entry(sbi, start+i); + + mtime += se->mtime * se->valid_blocks; + total_valid_blocks += se->valid_blocks; + } + + if (total_valid_blocks == 0) + return INVALID_MTIME; + + mtime = div_u64(mtime, total_valid_blocks); +out: + if (unlikely(mtime == INVALID_MTIME)) + mtime -= 1; + return mtime; } /* @@ -5187,14 +5591,10 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) sit_i->min_mtime = ULLONG_MAX; - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { - unsigned int i; + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { unsigned long long mtime = 0; - for (i = 0; i < sbi->segs_per_sec; i++) - mtime += get_seg_entry(sbi, segno + i)->mtime; - - mtime = div_u64(mtime, sbi->segs_per_sec); + mtime = f2fs_get_section_mtime(sbi, segno); if (sit_i->min_mtime > mtime) sit_i->min_mtime = mtime; @@ -5233,7 +5633,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC); sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; - sm_info->min_seq_blocks = sbi->blocks_per_seg; + sm_info->min_seq_blocks = BLKS_PER_SEG(sbi); sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->min_ssr_sections = reserved_sections(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 8129be788bd5..943be4f1d6d2 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -18,6 +18,8 @@ #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ #define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */ +#define INVALID_MTIME ULLONG_MAX /* no valid blocks in a segment/section */ + /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno) #define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno) @@ -32,10 +34,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG); } -#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA) -#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA) -#define IS_COLD(t) ((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA) - #define IS_CURSEG(sbi, seg) \ (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ @@ -48,21 +46,21 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define IS_CURSEC(sbi, secno) \ (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \ - (sbi)->segs_per_sec)) + SEGS_PER_SEC(sbi))) #define MAIN_BLKADDR(sbi) \ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ @@ -77,40 +75,37 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define TOTAL_SEGS(sbi) \ (SM_I(sbi) ? SM_I(sbi)->segment_count : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) -#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) +#define TOTAL_BLKS(sbi) (SEGS_TO_BLKS(sbi, TOTAL_SEGS(sbi))) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) #define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \ (sbi)->log_blocks_per_seg)) #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ - (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg)) + (SEGS_TO_BLKS(sbi, GET_R2L_SEGNO(FREE_I(sbi), segno)))) #define NEXT_FREE_BLKADDR(sbi, curseg) \ (START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff) #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg) + (BLKS_TO_SEGS(sbi, GET_SEGOFF_FROM_SEG0(sbi, blk_addr))) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (BLKS_PER_SEG(sbi) - 1)) #define GET_SEGNO(sbi, blk_addr) \ ((!__is_valid_data_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) -#define BLKS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) #define CAP_BLKS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \ - (sbi)->unusable_blocks_per_sec) + (BLKS_PER_SEC(sbi) - (sbi)->unusable_blocks_per_sec) #define CAP_SEGS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\ - (sbi)->log_blocks_per_seg)) + (SEGS_PER_SEC(sbi) - \ + BLKS_TO_SEGS(sbi, (sbi)->unusable_blocks_per_sec)) #define GET_SEC_FROM_SEG(sbi, segno) \ - (((segno) == -1) ? -1 : (segno) / (sbi)->segs_per_sec) + (((segno) == -1) ? -1 : (segno) / SEGS_PER_SEC(sbi)) #define GET_SEG_FROM_SEC(sbi, secno) \ - ((secno) * (sbi)->segs_per_sec) + ((secno) * SEGS_PER_SEC(sbi)) #define GET_ZONE_FROM_SEC(sbi, secno) \ (((secno) == -1) ? -1 : (secno) / (sbi)->secs_per_zone) #define GET_ZONE_FROM_SEG(sbi, segno) \ @@ -139,16 +134,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) /* - * indicate a block allocation direction: RIGHT and LEFT. - * RIGHT means allocating new sections towards the end of volume. - * LEFT means the opposite direction. - */ -enum { - ALLOC_RIGHT = 0, - ALLOC_LEFT -}; - -/* * In the victim_sel_policy->alloc_mode, there are three block allocation modes. * LFS writes data sequentially with cleaning operations. * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. @@ -201,6 +186,7 @@ struct victim_sel_policy { unsigned int min_segno; /* segment # having min. cost */ unsigned long long age; /* mtime of GCed section*/ unsigned long long age_threshold;/* age threshold */ + bool one_time_gc; /* one time GC */ }; struct seg_entry { @@ -360,11 +346,12 @@ static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno, bool use_section) { if (use_section && __is_large_section(sbi)) { - unsigned int start_segno = START_SEGNO(segno); + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int blocks = 0; int i; - for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) { + for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) { struct seg_entry *se = get_seg_entry(sbi, start_segno); blocks += se->ckpt_valid_blocks; @@ -442,14 +429,14 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); free_i->free_segments++; next = find_next_bit(free_i->free_segmap, - start_segno + sbi->segs_per_sec, start_segno); + start_segno + SEGS_PER_SEC(sbi), start_segno); if (next >= start_segno + usable_segs) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; @@ -476,7 +463,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); if (test_and_clear_bit(segno, free_i->free_segmap)) { @@ -485,7 +472,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, if (!inmem && IS_CURSEC(sbi, secno)) goto skip_free; next = find_next_bit(free_i->free_segmap, - start_segno + sbi->segs_per_sec, start_segno); + start_segno + SEGS_PER_SEC(sbi), start_segno); if (next >= start_segno + usable_segs) { if (test_and_clear_bit(secno, free_i->free_secmap)) free_i->free_sections++; @@ -535,8 +522,7 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi) static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi) { - return SM_I(sbi)->reserved_segments + - SM_I(sbi)->additional_reserved_segments; + return SM_I(sbi)->reserved_segments; } static inline unsigned int free_sections(struct f2fs_sb_info *sbi) @@ -570,34 +556,37 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) } static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, - unsigned int node_blocks, unsigned int dent_blocks) + unsigned int node_blocks, unsigned int data_blocks, + unsigned int dent_blocks) { - unsigned int segno, left_blocks; + unsigned int segno, left_blocks, blocks; int i; - /* check current node segment */ - for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { + /* check current data/node sections in the worst case. */ + for (i = CURSEG_HOT_DATA; i < NR_PERSISTENT_LOG; i++) { segno = CURSEG_I(sbi, i)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); - if (node_blocks > left_blocks) + blocks = i <= CURSEG_COLD_DATA ? data_blocks : node_blocks; + if (blocks > left_blocks) return false; } - /* check current data segment */ + /* check current data section for dentry blocks. */ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); if (dent_blocks > left_blocks) return false; return true; } /* - * calculate needed sections for dirty node/dentry - * and call has_curseg_enough_space + * calculate needed sections for dirty node/dentry and call + * has_curseg_enough_space, please note that, it needs to account + * dirty data as well in lfs mode when checkpoint is disabled. */ static inline void __get_secs_required(struct f2fs_sb_info *sbi, unsigned int *lower_p, unsigned int *upper_p, bool *curseg_p) @@ -606,19 +595,30 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi, get_pages(sbi, F2FS_DIRTY_DENTS) + get_pages(sbi, F2FS_DIRTY_IMETA); unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int total_data_blocks = 0; unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi); unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi); + unsigned int data_secs = 0; unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); + unsigned int data_blocks = 0; + + if (f2fs_lfs_mode(sbi) && + unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA); + data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi); + data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi); + } if (lower_p) - *lower_p = node_secs + dent_secs; + *lower_p = node_secs + dent_secs + data_secs; if (upper_p) *upper_p = node_secs + dent_secs + - (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0); + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) + + (data_blocks ? 1 : 0); if (curseg_p) *curseg_p = has_curseg_enough_space(sbi, - node_blocks, dent_blocks); + node_blocks, data_blocks, dent_blocks); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, @@ -638,7 +638,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, if (free_secs > upper_secs) return false; - else if (free_secs <= lower_secs) + if (free_secs <= lower_secs) return true; return !curseg_space; } @@ -649,12 +649,30 @@ static inline bool has_enough_free_secs(struct f2fs_sb_info *sbi, return !has_not_enough_free_secs(sbi, freed, needed); } +static inline bool has_enough_free_blks(struct f2fs_sb_info *sbi) +{ + unsigned int total_free_blocks = 0; + unsigned int avail_user_block_count; + + spin_lock(&sbi->stat_lock); + + avail_user_block_count = get_available_block_count(sbi, NULL, true); + total_free_blocks = avail_user_block_count - (unsigned int)valid_user_blocks(sbi); + + spin_unlock(&sbi->stat_lock); + + return total_free_blocks > 0; +} + static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) { if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; if (likely(has_enough_free_secs(sbi, 0, 0))) return true; + if (!f2fs_lfs_mode(sbi) && + likely(has_enough_free_blks(sbi))) + return true; return false; } @@ -793,10 +811,10 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } - if (usable_blks_per_seg < sbi->blocks_per_seg) + if (usable_blks_per_seg < BLKS_PER_SEG(sbi)) f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map, - sbi->blocks_per_seg, - usable_blks_per_seg) != sbi->blocks_per_seg); + BLKS_PER_SEG(sbi), + usable_blks_per_seg) != BLKS_PER_SEG(sbi)); /* check segment usage, and check boundary of a given segment number */ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg @@ -915,9 +933,9 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) return 0; if (type == DATA) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); else if (type == NODE) - return 8 * sbi->blocks_per_seg; + return SEGS_TO_BLKS(sbi, 8); else if (type == META) return 8 * BIO_MAX_VECS; else diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d45ab0992ae5..19b67828ae32 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -11,7 +11,6 @@ #include <linux/fs_context.h> #include <linux/sched/mm.h> #include <linux/statfs.h> -#include <linux/buffer_head.h> #include <linux/kthread.h> #include <linux/parser.h> #include <linux/mount.h> @@ -44,41 +43,53 @@ static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION const char *f2fs_fault_name[FAULT_MAX] = { - [FAULT_KMALLOC] = "kmalloc", - [FAULT_KVMALLOC] = "kvmalloc", - [FAULT_PAGE_ALLOC] = "page alloc", - [FAULT_PAGE_GET] = "page get", - [FAULT_ALLOC_NID] = "alloc nid", - [FAULT_ORPHAN] = "orphan", - [FAULT_BLOCK] = "no more block", - [FAULT_DIR_DEPTH] = "too big dir depth", - [FAULT_EVICT_INODE] = "evict_inode fail", - [FAULT_TRUNCATE] = "truncate fail", - [FAULT_READ_IO] = "read IO error", - [FAULT_CHECKPOINT] = "checkpoint error", - [FAULT_DISCARD] = "discard error", - [FAULT_WRITE_IO] = "write IO error", - [FAULT_SLAB_ALLOC] = "slab alloc", - [FAULT_DQUOT_INIT] = "dquot initialize", - [FAULT_LOCK_OP] = "lock_op", - [FAULT_BLKADDR] = "invalid blkaddr", + [FAULT_KMALLOC] = "kmalloc", + [FAULT_KVMALLOC] = "kvmalloc", + [FAULT_PAGE_ALLOC] = "page alloc", + [FAULT_PAGE_GET] = "page get", + [FAULT_ALLOC_NID] = "alloc nid", + [FAULT_ORPHAN] = "orphan", + [FAULT_BLOCK] = "no more block", + [FAULT_DIR_DEPTH] = "too big dir depth", + [FAULT_EVICT_INODE] = "evict_inode fail", + [FAULT_TRUNCATE] = "truncate fail", + [FAULT_READ_IO] = "read IO error", + [FAULT_CHECKPOINT] = "checkpoint error", + [FAULT_DISCARD] = "discard error", + [FAULT_WRITE_IO] = "write IO error", + [FAULT_SLAB_ALLOC] = "slab alloc", + [FAULT_DQUOT_INIT] = "dquot initialize", + [FAULT_LOCK_OP] = "lock_op", + [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr", + [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr", + [FAULT_NO_SEGMENT] = "no free segment", }; -void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, - unsigned int type) +int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, + unsigned long type) { struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (rate) { + if (rate > INT_MAX) + return -EINVAL; atomic_set(&ffi->inject_ops, 0); - ffi->inject_rate = rate; + ffi->inject_rate = (int)rate; } - if (type) - ffi->inject_type = type; + if (type) { + if (type >= BIT(FAULT_MAX)) + return -EINVAL; + ffi->inject_type = (unsigned int)type; + } if (!rate && !type) memset(ffi, 0, sizeof(struct f2fs_fault_info)); + else + f2fs_info(sbi, + "build fault injection attr: rate: %lu, type: 0x%lx", + rate, type); + return 0; } #endif @@ -137,7 +148,6 @@ enum { Opt_resgid, Opt_resuid, Opt_mode, - Opt_io_size_bits, Opt_fault_injection, Opt_fault_type, Opt_lazytime, @@ -216,7 +226,6 @@ static match_table_t f2fs_tokens = { {Opt_resgid, "resgid=%u"}, {Opt_resuid, "resuid=%u"}, {Opt_mode, "mode=%s"}, - {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, {Opt_fault_type, "fault_type=%u"}, {Opt_lazytime, "lazytime"}, @@ -263,7 +272,8 @@ static match_table_t f2fs_tokens = { {Opt_err, NULL}, }; -void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) +void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, + const char *fmt, ...) { struct va_format vaf; va_list args; @@ -274,8 +284,12 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) level = printk_get_level(fmt); vaf.fmt = printk_skip_level(fmt); vaf.va = &args; - printk("%c%cF2FS-fs (%s): %pV\n", - KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + if (limit_rate) + printk_ratelimited("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); va_end(args); } @@ -306,7 +320,7 @@ struct kmem_cache *f2fs_cf_name_slab; static int __init f2fs_create_casefold_cache(void) { f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name", - F2FS_NAME_LEN); + F2FS_NAME_LEN); return f2fs_cf_name_slab ? 0 : -ENOMEM; } @@ -343,46 +357,6 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).s_resgid)); } -static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi) -{ - unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec; - unsigned int avg_vblocks; - unsigned int wanted_reserved_segments; - block_t avail_user_block_count; - - if (!F2FS_IO_ALIGNED(sbi)) - return 0; - - /* average valid block count in section in worst case */ - avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi); - - /* - * we need enough free space when migrating one section in worst case - */ - wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) * - reserved_segments(sbi); - wanted_reserved_segments -= reserved_segments(sbi); - - avail_user_block_count = sbi->user_block_count - - sbi->current_reserved_blocks - - F2FS_OPTION(sbi).root_reserved_blocks; - - if (wanted_reserved_segments * sbi->blocks_per_seg > - avail_user_block_count) { - f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u", - wanted_reserved_segments, - avail_user_block_count >> sbi->log_blocks_per_seg); - return -ENOSPC; - } - - SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments; - - f2fs_info(sbi, "IO align feature needs additional reserved segment: %u", - wanted_reserved_segments); - - return 0; -} - static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi) { if (!F2FS_OPTION(sbi).unusable_cap_perc) @@ -663,7 +637,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) #ifdef CONFIG_F2FS_FS_ZSTD static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) { - unsigned int level; + int level; int len = 4; if (strlen(str) == len) { @@ -677,9 +651,15 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>"); return -EINVAL; } - if (kstrtouint(str + 1, 10, &level)) + if (kstrtoint(str + 1, 10, &level)) return -EINVAL; + /* f2fs does not support negative compress level now */ + if (level < 0) { + f2fs_info(sbi, "do not support negative compress level: %d", level); + return -ERANGE; + } + if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; @@ -730,6 +710,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) if (!strcmp(name, "on")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; } else if (!strcmp(name, "off")) { + if (f2fs_sb_has_blkzoned(sbi)) { + f2fs_warn(sbi, "zoned devices need bggc"); + kfree(name); + return -EINVAL; + } F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; } else if (!strcmp(name, "sync")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; @@ -763,10 +748,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) clear_opt(sbi, DISCARD); break; case Opt_noheap: - set_opt(sbi, NOHEAP); - break; case Opt_heap: - clear_opt(sbi, NOHEAP); + f2fs_warn(sbi, "heap/no_heap options were deprecated"); break; #ifdef CONFIG_F2FS_FS_XATTR case Opt_user_xattr: @@ -855,6 +838,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) set_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noextent_cache: + if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) { + f2fs_err(sbi, "device aliasing requires extent cache"); + return -EINVAL; + } clear_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noinline_data: @@ -913,28 +900,21 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } kfree(name); break; - case Opt_io_size_bits: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) { - f2fs_warn(sbi, "Not support %ld, larger than %d", - BIT(arg), BIO_MAX_VECS); - return -EINVAL; - } - F2FS_OPTION(sbi).write_io_size_bits = arg; - break; #ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; - f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE); + if (f2fs_build_fault_attr(sbi, arg, + F2FS_ALL_FAULT_TYPE)) + return -EINVAL; set_opt(sbi, FAULT_INJECTION); break; case Opt_fault_type: if (args->from && match_int(args, &arg)) return -EINVAL; - f2fs_build_fault_attr(sbi, 0, arg); + if (f2fs_build_fault_attr(sbi, 0, arg)) + return -EINVAL; set_opt(sbi, FAULT_INJECTION); break; #else @@ -1192,7 +1172,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) break; } - strcpy(ext[ext_cnt], name); + ret = strscpy(ext[ext_cnt], name); + if (ret < 0) { + kfree(name); + return ret; + } F2FS_OPTION(sbi).compress_ext_cnt++; kfree(name); break; @@ -1221,7 +1205,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) break; } - strcpy(noext[noext_cnt], name); + ret = strscpy(noext[noext_cnt], name); + if (ret < 0) { + kfree(name); + return ret; + } F2FS_OPTION(sbi).nocompress_ext_cnt++; kfree(name); break; @@ -1354,13 +1342,13 @@ default_check: return -EINVAL; } #endif -#if !IS_ENABLED(CONFIG_UNICODE) - if (f2fs_sb_has_casefold(sbi)) { + + if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) { f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); return -EINVAL; } -#endif + /* * The BLKZONED feature indicates that the drive was formatted with * zone alignment optimization. This is optional for host-aware @@ -1392,12 +1380,6 @@ default_check: } #endif - if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) { - f2fs_err(sbi, "Should set mode=lfs with %luKB-sized IO", - F2FS_IO_SIZE_KB(sbi)); - return -EINVAL; - } - if (test_opt(sbi, INLINE_XATTR_SIZE)) { int min_size, max_size; @@ -1605,7 +1587,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { if (i > 0) - bdev_release(FDEV(i).bdev_handle); + bdev_fput(FDEV(i).bdev_file); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif @@ -1712,13 +1694,10 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->ckpt); - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->raw_super); f2fs_destroy_page_array_cache(sbi); f2fs_destroy_xattr_caches(sbi); - mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) kfree(F2FS_OPTION(sbi).s_qf_names[i]); @@ -1779,6 +1758,18 @@ static int f2fs_freeze(struct super_block *sb) static int f2fs_unfreeze(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + /* + * It will update discard_max_bytes of mounted lvm device to zero + * after creating snapshot on this lvm device, let's drop all + * remained discards. + * We don't need to disable real-time discard because discard_max_bytes + * will recover after removal of snapshot. + */ + if (test_opt(sbi, DISCARD) && !f2fs_hw_support_discard(sbi)) + f2fs_issue_discard_timeout(sbi); + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } @@ -2009,10 +2000,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) } else { seq_puts(seq, ",nodiscard"); } - if (test_opt(sbi, NOHEAP)) - seq_puts(seq, ",no_heap"); - else - seq_puts(seq, ",heap"); #ifdef CONFIG_F2FS_FS_XATTR if (test_opt(sbi, XATTR_USER)) seq_puts(seq, ",user_xattr"); @@ -2078,9 +2065,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) F2FS_OPTION(sbi).s_resuid), from_kgid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resgid)); - if (F2FS_IO_SIZE_BITS(sbi)) - seq_printf(seq, ",io_bits=%u", - F2FS_OPTION(sbi).write_io_size_bits); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) { seq_printf(seq, ",fault_injection=%u", @@ -2187,12 +2171,9 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount) F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL; F2FS_OPTION(sbi).errors = MOUNT_ERRORS_CONTINUE; - sbi->sb->s_flags &= ~SB_INLINECRYPT; - set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); - set_opt(sbi, NOHEAP); set_opt(sbi, MERGE_CHECKPOINT); F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; @@ -2247,6 +2228,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) .init_gc_type = FG_GC, .should_migrate_blocks = false, .err_gc_skipped = true, + .no_bg_gc = true, .nr_free_secs = 1 }; f2fs_down_write(&sbi->gc_lock); @@ -2332,7 +2314,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE); bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE); bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT); - bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); bool no_discard = !test_opt(sbi, DISCARD); bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE); @@ -2382,6 +2363,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (err) goto restore_opts; +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && + sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { + f2fs_err(sbi, + "zoned: max open zones %u is too small, need at least %u open zones", + sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); + err = -EINVAL; + goto restore_opts; + } +#endif + /* flush outstanding errors before changing fs state */ flush_work(&sbi->s_error_work); @@ -2440,12 +2432,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) { - err = -EINVAL; - f2fs_warn(sbi, "switch io_bits option is not allowed"); - goto restore_opts; - } - if (no_compress_cache == !!test_opt(sbi, COMPRESS_CACHE)) { err = -EINVAL; f2fs_warn(sbi, "switch compress_cache option is not allowed"); @@ -2520,6 +2506,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } } + adjust_unusable_cap_perc(sbi); if (enable_checkpoint == !!test_opt(sbi, DISABLE_CHECKPOINT)) { if (test_opt(sbi, DISABLE_CHECKPOINT)) { err = f2fs_disable_checkpoint(sbi); @@ -2564,7 +2551,6 @@ skip: (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); limit_reserve_root(sbi); - adjust_unusable_cap_perc(sbi); *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); return 0; restore_checkpoint: @@ -2609,6 +2595,11 @@ restore_opts: return err; } +static void f2fs_shutdown(struct super_block *sb) +{ + f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false, false); +} + #ifdef CONFIG_QUOTA static bool f2fs_need_recovery(struct f2fs_sb_info *sbi) { @@ -2722,7 +2713,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, const struct address_space_operations *a_ops = mapping->a_ops; int offset = off & (sb->s_blocksize - 1); size_t towrite = len; - struct page *page; + struct folio *folio; void *fsdata = NULL; int err = 0; int tocopy; @@ -2732,7 +2723,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, towrite); retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, - &page, &fsdata); + &folio, &fsdata); if (unlikely(err)) { if (err == -ENOMEM) { f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); @@ -2742,10 +2733,10 @@ retry: break; } - memcpy_to_page(page, offset, data, tocopy); + memcpy_to_folio(folio, offset_in_folio(folio, off), data, tocopy); a_ops->write_end(NULL, mapping, off, tocopy, tocopy, - page, fsdata); + folio, fsdata); offset = 0; towrite -= tocopy; off += tocopy; @@ -2768,7 +2759,7 @@ int f2fs_dquot_initialize(struct inode *inode) return dquot_initialize(inode); } -static struct dquot **f2fs_get_dquots(struct inode *inode) +static struct dquot __rcu **f2fs_get_dquots(struct inode *inode) { return F2FS_I(inode)->i_dquot; } @@ -3208,6 +3199,7 @@ static const struct super_operations f2fs_sops = { .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, .remount_fs = f2fs_remount, + .shutdown = f2fs_shutdown, }; #ifdef CONFIG_FS_ENCRYPTION @@ -3362,29 +3354,47 @@ loff_t max_file_blocks(struct inode *inode) * fit within U32_MAX + 1 data units. */ - result = min(result, (((loff_t)U32_MAX + 1) * 4096) >> F2FS_BLKSIZE_BITS); + result = umin(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096)); return result; } -static int __f2fs_commit_super(struct buffer_head *bh, - struct f2fs_super_block *super) +static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio, + pgoff_t index, bool update) { - lock_buffer(bh); - if (super) - memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_dirty(bh); - unlock_buffer(bh); - + struct bio *bio; /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); + blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA; + int ret; + + folio_lock(folio); + folio_wait_writeback(folio); + if (update) + memcpy(F2FS_SUPER_BLOCK(folio, index), F2FS_RAW_SUPER(sbi), + sizeof(struct f2fs_super_block)); + folio_mark_dirty(folio); + folio_clear_dirty_for_io(folio); + folio_start_writeback(folio); + folio_unlock(folio); + + bio = bio_alloc(sbi->sb->s_bdev, 1, opf, GFP_NOFS); + + /* it doesn't need to set crypto context for superblock update */ + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio_index(folio)); + + if (!bio_add_folio(bio, folio, folio_size(folio), 0)) + f2fs_bug_on(sbi, 1); + + ret = submit_bio_wait(bio); + folio_end_writeback(folio); + + return ret; } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); struct super_block *sb = sbi->sb; u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); @@ -3400,9 +3410,9 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, u32 segment_count = le32_to_cpu(raw_super->segment_count); u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); u64 main_end_blkaddr = main_blkaddr + - (segment_count_main << log_blocks_per_seg); + ((u64)segment_count_main << log_blocks_per_seg); u64 seg_end_blkaddr = segment0_blkaddr + - (segment_count << log_blocks_per_seg); + ((u64)segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", @@ -3459,7 +3469,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, set_sbi_flag(sbi, SBI_NEED_SB_WRITE); res = "internally"; } else { - err = __f2fs_commit_super(bh, NULL); + err = __f2fs_commit_super(sbi, folio, index, false); res = err ? "failed" : "done"; } f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)", @@ -3472,12 +3482,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, } static int sanity_check_raw_super(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main; block_t total_sections, blocks_per_seg; - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); size_t crc_offset = 0; __u32 crc = 0; @@ -3503,7 +3512,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } } - /* Currently, support only 4KB block size */ + /* only support block_size equals to PAGE_SIZE */ if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) { f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u", le32_to_cpu(raw_super->log_blocksize), @@ -3635,7 +3644,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ - if (sanity_check_area_boundary(sbi, bh)) + if (sanity_check_area_boundary(sbi, folio, index)) return -EFSCORRUPTED; return 0; @@ -3706,7 +3715,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) } main_segs = le32_to_cpu(raw_super->segment_count_main); - blocks_per_seg = sbi->blocks_per_seg; + blocks_per_seg = BLKS_PER_SEG(sbi); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || @@ -3818,9 +3827,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); sbi->total_sections = le32_to_cpu(raw_super->section_count); - sbi->total_node_count = - (le32_to_cpu(raw_super->segment_count_nat) / 2) - * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; + sbi->total_node_count = SEGS_TO_BLKS(sbi, + ((le32_to_cpu(raw_super->segment_count_nat) / 2) * + NAT_ENTRY_PER_BLOCK)); F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino); F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino); F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino); @@ -3829,7 +3838,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->next_victim_seg[BG_GC] = NULL_SEGNO; sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; - sbi->migration_granularity = sbi->segs_per_sec; + sbi->migration_granularity = SEGS_PER_SEC(sbi); + sbi->migration_window_granularity = f2fs_sb_has_blkzoned(sbi) ? + DEF_MIGRATION_WINDOW_GRANULARITY_ZONED : SEGS_PER_SEC(sbi); sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; @@ -3924,17 +3935,25 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) sector_t nr_sectors = bdev_nr_sectors(bdev); struct f2fs_report_zones_args rep_zone_arg; u64 zone_sectors; + unsigned int max_open_zones; int ret; if (!f2fs_sb_has_blkzoned(sbi)) return 0; - zone_sectors = bdev_zone_sectors(bdev); - if (!is_power_of_2(zone_sectors)) { - f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n"); - return -EINVAL; + if (bdev_is_zoned(FDEV(devi).bdev)) { + max_open_zones = bdev_max_open_zones(bdev); + if (max_open_zones && (max_open_zones < sbi->max_open_zones)) + sbi->max_open_zones = max_open_zones; + if (sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { + f2fs_err(sbi, + "zoned: max open zones %u is too small, need at least %u open zones", + sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); + return -EINVAL; + } } + zone_sectors = bdev_zone_sectors(bdev); if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != SECTOR_TO_BLOCK(zone_sectors)) return -EINVAL; @@ -3974,7 +3993,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, { struct super_block *sb = sbi->sb; int block; - struct buffer_head *bh; + struct folio *folio; struct f2fs_super_block *super; int err = 0; @@ -3983,32 +4002,32 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, return -ENOMEM; for (block = 0; block < 2; block++) { - bh = sb_bread(sb, block); - if (!bh) { + folio = read_mapping_folio(sb->s_bdev->bd_mapping, block, NULL); + if (IS_ERR(folio)) { f2fs_err(sbi, "Unable to read %dth superblock", block + 1); - err = -EIO; + err = PTR_ERR(folio); *recovery = 1; continue; } /* sanity checking of raw super */ - err = sanity_check_raw_super(sbi, bh); + err = sanity_check_raw_super(sbi, folio, block); if (err) { f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - brelse(bh); + folio_put(folio); *recovery = 1; continue; } if (!*raw_super) { - memcpy(super, bh->b_data + F2FS_SUPER_OFFSET, + memcpy(super, F2FS_SUPER_BLOCK(folio, block), sizeof(*super)); *valid_super_block = block; *raw_super = super; } - brelse(bh); + folio_put(folio); } /* No valid superblock */ @@ -4022,7 +4041,8 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { - struct buffer_head *bh; + struct folio *folio; + pgoff_t index; __u32 crc = 0; int err; @@ -4040,22 +4060,24 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* write back-up superblock first */ - bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block ? 0 : 1; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); /* if we are in recovery path, skip writing valid superblock */ if (recover || err) return err; /* write current valid superblock */ - bh = sb_bread(sbi->sb, sbi->valid_super_block); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); return err; } @@ -4090,7 +4112,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi) f2fs_up_write(&sbi->sb_lock); if (err) - f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err); + f2fs_err_ratelimited(sbi, + "f2fs_commit_super fails to record stop_reason, err:%d", + err); } void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag) @@ -4133,8 +4157,9 @@ static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error) err = f2fs_commit_super(sbi, false); if (err) - f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d", - error, err); + f2fs_err_ratelimited(sbi, + "f2fs_commit_super fails to record errors:%u, err:%d", + error, err); out_unlock: f2fs_up_write(&sbi->sb_lock); } @@ -4162,8 +4187,7 @@ static bool system_going_down(void) || system_state == SYSTEM_RESTART; } -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, - bool irq_context) +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason) { struct super_block *sb = sbi->sb; bool shutdown = reason == STOP_CP_REASON_SHUTDOWN; @@ -4175,10 +4199,12 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, if (!f2fs_hw_is_readonly(sbi)) { save_stop_reason(sbi, reason); - if (irq_context && !shutdown) - schedule_work(&sbi->s_error_work); - else - f2fs_record_stop_reason(sbi); + /* + * always create an asynchronous task to record stop_reason + * in order to avoid potential deadlock when running into + * f2fs_record_stop_reason() synchronously. + */ + schedule_work(&sbi->s_error_work); } /* @@ -4195,17 +4221,25 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, if (shutdown) set_sbi_flag(sbi, SBI_IS_SHUTDOWN); - /* continue filesystem operators if errors=continue */ - if (continue_fs || f2fs_readonly(sb)) + /* + * Continue filesystem operators if errors=continue. Should not set + * RO by shutdown, since RO bypasses thaw_super which can hang the + * system. + */ + if (continue_fs || f2fs_readonly(sb) || shutdown) { + f2fs_warn(sbi, "Stopped filesystem due to reason: %d", reason); return; + } f2fs_warn(sbi, "Remounting filesystem read-only"); + /* - * Make sure updated value of ->s_mount_flags will be visible before - * ->s_flags update + * We have already set CP_ERROR_FLAG flag to stop all updates + * to filesystem, so it doesn't need to set SB_RDONLY flag here + * because the flag should be set covered w/ sb->s_umount semaphore + * via remount procedure, otherwise, it will confuse code like + * freeze_super() which will lead to deadlocks and other problems. */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; } static void f2fs_record_error_work(struct work_struct *work) @@ -4216,6 +4250,16 @@ static void f2fs_record_error_work(struct work_struct *work) f2fs_record_stop_reason(sbi); } +static inline unsigned int get_first_zoned_segno(struct f2fs_sb_info *sbi) +{ + int devi; + + for (devi = 0; devi < sbi->s_ndevs; devi++) + if (bdev_is_zoned(FDEV(devi).bdev)) + return GET_SEGNO(sbi, FDEV(devi).start_blk); + return 0; +} + static int f2fs_scan_devices(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); @@ -4244,10 +4288,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev); sbi->aligned_blksize = true; +#ifdef CONFIG_BLK_DEV_ZONED + sbi->max_open_zones = UINT_MAX; + sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ; +#endif for (i = 0; i < max_devices; i++) { if (i == 0) - FDEV(0).bdev_handle = sbi->sb->s_bdev_handle; + FDEV(0).bdev_file = sbi->sb->s_bdev_file; else if (!RDEV(i).path[0]) break; @@ -4259,22 +4307,22 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) if (i == 0) { FDEV(i).start_blk = 0; FDEV(i).end_blk = FDEV(i).start_blk + - (FDEV(i).total_segments << - sbi->log_blocks_per_seg) - 1 + - le32_to_cpu(raw_super->segment0_blkaddr); + SEGS_TO_BLKS(sbi, + FDEV(i).total_segments) - 1 + + le32_to_cpu(raw_super->segment0_blkaddr); } else { FDEV(i).start_blk = FDEV(i - 1).end_blk + 1; FDEV(i).end_blk = FDEV(i).start_blk + - (FDEV(i).total_segments << - sbi->log_blocks_per_seg) - 1; - FDEV(i).bdev_handle = bdev_open_by_path( + SEGS_TO_BLKS(sbi, + FDEV(i).total_segments) - 1; + FDEV(i).bdev_file = bdev_file_open_by_path( FDEV(i).path, mode, sbi->sb, NULL); } } - if (IS_ERR(FDEV(i).bdev_handle)) - return PTR_ERR(FDEV(i).bdev_handle); + if (IS_ERR(FDEV(i).bdev_file)) + return PTR_ERR(FDEV(i).bdev_file); - FDEV(i).bdev = FDEV(i).bdev_handle->bdev; + FDEV(i).bdev = file_bdev(FDEV(i).bdev_file); /* to release errored devices */ sbi->s_ndevs = i + 1; @@ -4305,8 +4353,6 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) FDEV(i).total_segments, FDEV(i).start_blk, FDEV(i).end_blk); } - f2fs_info(sbi, - "IO Block Size: %8ld KB", F2FS_IO_SIZE_KB(sbi)); return 0; } @@ -4418,15 +4464,6 @@ try_onemore: } mutex_init(&sbi->flush_lock); - /* Load the checksum driver */ - sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - f2fs_err(sbi, "Cannot load crc32 driver."); - err = PTR_ERR(sbi->s_chksum_driver); - sbi->s_chksum_driver = NULL; - goto free_sbi; - } - /* set a block size */ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { f2fs_err(sbi, "unable to set blocksize"); @@ -4496,7 +4533,8 @@ try_onemore: sb->s_time_gran = 1; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); - memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid)); + super_set_sysfs_name_bdev(sb); sb->s_iflags |= SB_I_CGROUPWB; /* init f2fs-specific super block info */ @@ -4519,19 +4557,10 @@ try_onemore: if (err) goto free_iostat; - if (F2FS_IO_ALIGNED(sbi)) { - sbi->write_io_dummy = - mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0); - if (!sbi->write_io_dummy) { - err = -ENOMEM; - goto free_percpu; - } - } - /* init per sbi slab cache */ err = f2fs_init_xattr_caches(sbi); if (err) - goto free_io_dummy; + goto free_percpu; err = f2fs_init_page_array_cache(sbi); if (err) goto free_xattr_cache; @@ -4619,13 +4648,12 @@ try_onemore: goto free_nm; } - err = adjust_reserved_segment(sbi); - if (err) - goto free_nm; - /* For write statistics */ sbi->sectors_written_start = f2fs_get_sectors_written(sbi); + /* get segno of first zoned block device */ + sbi->first_zoned_segno = get_first_zoned_segno(sbi); + /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); if (__exist_node_summaries(sbi)) @@ -4660,6 +4688,7 @@ try_onemore: goto free_node_inode; } + generic_set_sb_d_ops(sb); sb->s_root = d_make_root(root); /* allocate root dentry */ if (!sb->s_root) { err = -ENOMEM; @@ -4746,19 +4775,23 @@ try_onemore: reset_checkpoint: /* * If the f2fs is not readonly and fsync data recovery succeeds, - * check zoned block devices' write pointer consistency. + * write pointer consistency of cursegs and other zones are already + * checked and fixed during recovery. However, if recovery fails, + * write pointers are left untouched, and retry-mount should check + * them here. */ - if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) { - err = f2fs_check_write_pointer(sbi); - if (err) - goto free_meta; - } - - f2fs_init_inmem_curseg(sbi); + if (skip_recovery) + err = f2fs_check_and_fix_write_pointer(sbi); + if (err) + goto free_meta; /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); + err = f2fs_init_inmem_curseg(sbi); + if (err) + goto sync_free_meta; + if (test_opt(sbi, DISABLE_CHECKPOINT)) { err = f2fs_disable_checkpoint(sbi); if (err) @@ -4853,8 +4886,6 @@ free_page_array_cache: f2fs_destroy_page_array_cache(sbi); free_xattr_cache: f2fs_destroy_xattr_caches(sbi); -free_io_dummy: - mempool_destroy(sbi->write_io_dummy); free_percpu: destroy_percpu_info(sbi); free_iostat: @@ -4877,8 +4908,6 @@ free_options: free_sb_buf: kfree(raw_super); free_sbi: - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi); sb->s_fs_info = NULL; @@ -4967,12 +4996,6 @@ static int __init init_f2fs_fs(void) { int err; - if (PAGE_SIZE != F2FS_BLKSIZE) { - printk("F2FS not supported on PAGE_SIZE(%lu) != BLOCK_SIZE(%lu)\n", - PAGE_SIZE, F2FS_BLKSIZE); - return -EINVAL; - } - err = init_inodecache(); if (err) goto fail; @@ -5000,9 +5023,6 @@ static int __init init_f2fs_fs(void) err = f2fs_init_shrinker(); if (err) goto free_sysfs; - err = register_filesystem(&f2fs_fs_type); - if (err) - goto free_shrinker; f2fs_create_root_stats(); err = f2fs_init_post_read_processing(); if (err) @@ -5025,7 +5045,12 @@ static int __init init_f2fs_fs(void) err = f2fs_create_casefold_cache(); if (err) goto free_compress_cache; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_casefold_cache; return 0; +free_casefold_cache: + f2fs_destroy_casefold_cache(); free_compress_cache: f2fs_destroy_compress_cache(); free_compress_mempool: @@ -5040,8 +5065,6 @@ free_post_read: f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); -free_shrinker: f2fs_exit_shrinker(); free_sysfs: f2fs_exit_sysfs(); @@ -5065,6 +5088,7 @@ fail: static void __exit exit_f2fs_fs(void) { + unregister_filesystem(&f2fs_fs_type); f2fs_destroy_casefold_cache(); f2fs_destroy_compress_cache(); f2fs_destroy_compress_mempool(); @@ -5073,7 +5097,6 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_iostat_processing(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); f2fs_exit_shrinker(); f2fs_exit_sysfs(); f2fs_destroy_garbage_collection_cache(); @@ -5091,5 +5114,3 @@ module_exit(exit_f2fs_fs) MODULE_AUTHOR("Samsung Electronics's Praesto Team"); MODULE_DESCRIPTION("Flash Friendly File System"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32"); - diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a7ec55c7bb20..d15c68b28952 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -170,6 +170,12 @@ static ssize_t undiscard_blks_show(struct f2fs_attr *a, SM_I(sbi)->dcc_info->undiscard_blks); } +static ssize_t atgc_enabled_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%d\n", sbi->am.atgc_enabled ? 1 : 0); +} + static ssize_t gc_mode_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -182,50 +188,50 @@ static ssize_t features_show(struct f2fs_attr *a, int len = 0; if (f2fs_sb_has_encrypt(sbi)) - len += scnprintf(buf, PAGE_SIZE - len, "%s", + len += sysfs_emit_at(buf, len, "%s", "encryption"); if (f2fs_sb_has_blkzoned(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "extra_attr"); if (f2fs_sb_has_project_quota(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "projquota"); if (f2fs_sb_has_inode_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_checksum"); if (f2fs_sb_has_flexible_inline_xattr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); if (f2fs_sb_has_quota_ino(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "quota_ino"); if (f2fs_sb_has_inode_crtime(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_crtime"); if (f2fs_sb_has_lost_found(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "lost_found"); if (f2fs_sb_has_verity(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "sb_checksum"); if (f2fs_sb_has_casefold(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "casefold"); if (f2fs_sb_has_readonly(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "readonly"); if (f2fs_sb_has_compression(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "compression"); - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "pin_file"); - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); return len; } @@ -323,30 +329,27 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, int hot_count = sbi->raw_super->hot_ext_count; int len = 0, i; - len += scnprintf(buf + len, PAGE_SIZE - len, - "cold file extension:\n"); + len += sysfs_emit_at(buf, len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); - len += scnprintf(buf + len, PAGE_SIZE - len, - "hot file extension:\n"); + len += sysfs_emit_at(buf, len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); + return len; } if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) { struct ckpt_req_control *cprc = &sbi->cprc_info; int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio); - int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio); + int level = IOPRIO_PRIO_LEVEL(cprc->ckpt_thread_ioprio); if (class != IOPRIO_CLASS_RT && class != IOPRIO_CLASS_BE) return -EINVAL; return sysfs_emit(buf, "%s,%d\n", - class == IOPRIO_CLASS_RT ? "rt" : "be", data); + class == IOPRIO_CLASS_RT ? "rt" : "be", level); } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -450,7 +453,7 @@ out: const char *name = strim((char *)buf); struct ckpt_req_control *cprc = &sbi->cprc_info; int class; - long data; + long level; int ret; if (!strncmp(name, "rt,", 3)) @@ -461,13 +464,13 @@ out: return -EINVAL; name += 3; - ret = kstrtol(name, 10, &data); + ret = kstrtol(name, 10, &level); if (ret) return ret; - if (data >= IOPRIO_NR_LEVELS || data < 0) + if (level >= IOPRIO_NR_LEVELS || level < 0) return -EINVAL; - cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data); + cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, level); if (test_opt(sbi, MERGE_CHECKPOINT)) { ret = set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio); @@ -484,17 +487,21 @@ out: if (ret < 0) return ret; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (a->struct_type == FAULT_INFO_TYPE && t >= BIT(FAULT_MAX)) - return -EINVAL; - if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX) - return -EINVAL; + if (a->struct_type == FAULT_INFO_TYPE) { + if (f2fs_build_fault_attr(sbi, 0, t)) + return -EINVAL; + return count; + } + if (a->struct_type == FAULT_INFO_RATE) { + if (f2fs_build_fault_attr(sbi, t, 0)) + return -EINVAL; + return count; + } #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - F2FS_OPTION(sbi).root_reserved_blocks - - sbi->blocks_per_seg * - SM_I(sbi)->additional_reserved_segments)) { + F2FS_OPTION(sbi).root_reserved_blocks)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } @@ -551,7 +558,12 @@ out: } if (!strcmp(a->attr.name, "migration_granularity")) { - if (t == 0 || t > sbi->segs_per_sec) + if (t == 0 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + } + + if (!strcmp(a->attr.name, "migration_window_granularity")) { + if (t == 0 || t > SEGS_PER_SEC(sbi)) return -EINVAL; } @@ -621,6 +633,15 @@ out: } #endif +#ifdef CONFIG_BLK_DEV_ZONED + if (!strcmp(a->attr.name, "blkzone_alloc_policy")) { + if (t < BLKZONE_ALLOC_PRIOR_SEQ || t > BLKZONE_ALLOC_PRIOR_CONV) + return -EINVAL; + sbi->blkzone_alloc_policy = t; + return count; + } +#endif + #ifdef CONFIG_F2FS_FS_COMPRESSION if (!strcmp(a->attr.name, "compr_written_block") || !strcmp(a->attr.name, "compr_saved_block")) { @@ -675,6 +696,13 @@ out: return count; } + if (!strcmp(a->attr.name, "gc_pin_file_threshold")) { + if (t > MAX_GC_FAILED_PINNED_FILES) + return -EINVAL; + sbi->gc_pin_file_threshold = t; + return count; + } + if (!strcmp(a->attr.name, "gc_reclaimed_segments")) { if (t != 0) return -EINVAL; @@ -759,10 +787,18 @@ out: return count; } + if (!strcmp(a->attr.name, "max_read_extent_count")) { + if (t > UINT_MAX) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + if (!strcmp(a->attr.name, "ipu_policy")) { if (t >= BIT(F2FS_IPU_MAX)) return -EINVAL; - if (t && f2fs_lfs_mode(sbi)) + /* allow F2FS_IPU_NOCACHE only for IPU in the pinned file */ + if (f2fs_lfs_mode(sbi) && (t & ~BIT(F2FS_IPU_NOCACHE))) return -EINVAL; SM_I(sbi)->ipu_policy = (unsigned int)t; return count; @@ -947,6 +983,9 @@ GC_THREAD_RW_ATTR(gc_urgent_sleep_time, urgent_sleep_time); GC_THREAD_RW_ATTR(gc_min_sleep_time, min_sleep_time); GC_THREAD_RW_ATTR(gc_max_sleep_time, max_sleep_time); GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -956,6 +995,7 @@ SM_INFO_GENERAL_RW_ATTR(min_fsync_blocks); SM_INFO_GENERAL_RW_ATTR(min_seq_blocks); SM_INFO_GENERAL_RW_ATTR(min_hot_blocks); SM_INFO_GENERAL_RW_ATTR(min_ssr_sections); +SM_INFO_GENERAL_RW_ATTR(reserved_segments); /* DCC_INFO ATTR */ DCC_INFO_RW_ATTR(max_small_discards, max_discards); @@ -988,6 +1028,7 @@ F2FS_SBI_RW_ATTR(gc_pin_file_thresh, gc_pin_file_threshold); F2FS_SBI_RW_ATTR(gc_reclaimed_segments, gc_reclaimed_segs); F2FS_SBI_GENERAL_RW_ATTR(max_victim_search); F2FS_SBI_GENERAL_RW_ATTR(migration_granularity); +F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity); F2FS_SBI_GENERAL_RW_ATTR(dir_level); #ifdef CONFIG_F2FS_IOSTAT F2FS_SBI_GENERAL_RW_ATTR(iostat_enable); @@ -1018,8 +1059,11 @@ F2FS_SBI_GENERAL_RW_ATTR(revoked_atomic_block); F2FS_SBI_GENERAL_RW_ATTR(hot_data_age_threshold); F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold); F2FS_SBI_GENERAL_RW_ATTR(last_age_weight); +/* read extent cache */ +F2FS_SBI_GENERAL_RW_ATTR(max_read_extent_count); #ifdef CONFIG_BLK_DEV_ZONED F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); +F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif /* STAT_INFO ATTR */ @@ -1059,6 +1103,7 @@ F2FS_GENERAL_RO_ATTR(encoding); F2FS_GENERAL_RO_ATTR(mounted_time_sec); F2FS_GENERAL_RO_ATTR(main_blkaddr); F2FS_GENERAL_RO_ATTR(pending_discard); +F2FS_GENERAL_RO_ATTR(atgc_enabled); F2FS_GENERAL_RO_ATTR(gc_mode); #ifdef CONFIG_F2FS_STAT_FS F2FS_GENERAL_RO_ATTR(moved_blocks_background); @@ -1103,6 +1148,9 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_min_sleep_time), ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_no_zoned_gc_percent), + ATTR_LIST(gc_boost_zoned_gc_percent), + ATTR_LIST(gc_valid_thresh_ratio), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), @@ -1125,8 +1173,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_seq_blocks), ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), + ATTR_LIST(reserved_segments), ATTR_LIST(max_victim_search), ATTR_LIST(migration_granularity), + ATTR_LIST(migration_window_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), @@ -1174,6 +1224,7 @@ static struct attribute *f2fs_attrs[] = { #endif #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(unusable_blocks_per_sec), + ATTR_LIST(blkzone_alloc_policy), #endif #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compr_written_block), @@ -1187,6 +1238,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(atgc_candidate_count), ATTR_LIST(atgc_age_weight), ATTR_LIST(atgc_age_threshold), + ATTR_LIST(atgc_enabled), ATTR_LIST(seq_file_ra_mul), ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_reclaimed_segments), @@ -1199,6 +1251,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(hot_data_age_threshold), ATTR_LIST(warm_data_age_threshold), ATTR_LIST(last_age_weight), + ATTR_LIST(max_read_extent_count), NULL, }; ATTRIBUTE_GROUPS(f2fs); @@ -1268,6 +1321,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); F2FS_SB_FEATURE_RO_ATTR(readonly, RO); +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_encryption), @@ -1284,6 +1338,7 @@ static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_casefold), ATTR_LIST(sb_compression), ATTR_LIST(sb_readonly), + ATTR_LIST(sb_device_alias), NULL, }; ATTRIBUTE_GROUPS(f2fs_sb_feat); @@ -1417,7 +1472,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, le32_to_cpu(sbi->raw_super->segment_count_main); int i, j; - seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" + seq_puts(seq, "format: segment_type|valid_blocks|bitmaps|mtime\n" "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); for (i = 0; i < total_segs; i++) { @@ -1427,6 +1482,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); + seq_printf(seq, "| %llx", se->mtime); seq_putc(seq, '\n'); } return 0; @@ -1492,6 +1548,50 @@ static int __maybe_unused discard_plist_seq_show(struct seq_file *seq, return 0; } +static int __maybe_unused disk_map_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; + + seq_printf(seq, "Address Layout : %5luB Block address (# of Segments)\n", + F2FS_BLKSIZE); + seq_printf(seq, " SB : %12s\n", "0/1024B"); + seq_printf(seq, " seg0_blkaddr : 0x%010x\n", SEG0_BLKADDR(sbi)); + seq_printf(seq, " Checkpoint : 0x%010x (%10d)\n", + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr), 2); + seq_printf(seq, " SIT : 0x%010x (%10d)\n", + SIT_I(sbi)->sit_base_addr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_sit)); + seq_printf(seq, " NAT : 0x%010x (%10d)\n", + NM_I(sbi)->nat_blkaddr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_nat)); + seq_printf(seq, " SSA : 0x%010x (%10d)\n", + SM_I(sbi)->ssa_blkaddr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_ssa)); + seq_printf(seq, " Main : 0x%010x (%10d)\n", + SM_I(sbi)->main_blkaddr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main)); + seq_printf(seq, " # of Sections : %12d\n", + le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count)); + seq_printf(seq, " Segs/Sections : %12d\n", + SEGS_PER_SEC(sbi)); + seq_printf(seq, " Section size : %12d MB\n", + SEGS_PER_SEC(sbi) << 1); + + if (!f2fs_is_multi_device(sbi)) + return 0; + + seq_puts(seq, "\nDisk Map for multi devices:\n"); + for (i = 0; i < sbi->s_ndevs; i++) + seq_printf(seq, "Disk:%2d (zoned=%d): 0x%010x - 0x%010x on %s\n", + i, bdev_is_zoned(FDEV(i).bdev), + FDEV(i).start_blk, FDEV(i).end_blk, + FDEV(i).path); + return 0; +} + int __init f2fs_init_sysfs(void) { int ret; @@ -1573,6 +1673,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) victim_bits_seq_show, sb); proc_create_single_data("discard_plist_info", 0444, sbi->s_proc, discard_plist_seq_show, sb); + proc_create_single_data("disk_map", 0444, sbi->s_proc, + disk_map_seq_show, sb); return 0; put_feature_list_kobj: kobject_put(&sbi->s_feature_list_kobj); diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 4fc95f353a7a..2287f238ae09 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -74,23 +74,23 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; - if (pos + count > inode->i_sb->s_maxbytes) + if (pos + count > F2FS_BLK_TO_BYTES(max_file_blocks(inode))) return -EFBIG; while (count) { size_t n = min_t(size_t, count, PAGE_SIZE - offset_in_page(pos)); - struct page *page; + struct folio *folio; void *fsdata = NULL; int res; - res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata); + res = aops->write_begin(NULL, mapping, pos, n, &folio, &fsdata); if (res) return res; - memcpy_to_page(page, offset_in_page(pos), buf, n); + memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, n); - res = aops->write_end(NULL, mapping, pos, n, n, page, fsdata); + res = aops->write_end(NULL, mapping, pos, n, n, folio, fsdata); if (res < 0) return res; if (res != n) @@ -237,7 +237,8 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, pos = le64_to_cpu(dloc.pos); /* Get the descriptor */ - if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes || + if (pos + size < pos || + pos + size > F2FS_BLK_TO_BYTES(max_file_blocks(inode)) || pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) { f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); f2fs_handle_error(F2FS_I_SB(inode), @@ -258,21 +259,23 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - struct page *page; + struct folio *folio; index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); - if (!page || !PageUptodate(page)) { + folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); + if (IS_ERR(folio) || !folio_test_uptodate(folio)) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); - if (page) - put_page(page); + if (!IS_ERR(folio)) + folio_put(folio); else if (num_ra_pages > 1) page_cache_ra_unbounded(&ractl, num_ra_pages, 0); - page = read_mapping_page(inode->i_mapping, index, NULL); + folio = read_mapping_folio(inode->i_mapping, index, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); } - return page; + return folio_file_page(folio, index); } static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index f290fe9327c4..3f3874943679 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -629,6 +629,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *ipage, int flags) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_entry *here, *last; void *base_addr, *last_base_addr; int found, newsize; @@ -772,9 +773,18 @@ retry: if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - if (S_ISDIR(inode->i_mode)) - set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); + if (!S_ISDIR(inode->i_mode)) + goto same; + /* + * In restrict mode, fsync() always try to trigger checkpoint for all + * metadata consistency, in other mode, it triggers checkpoint when + * parent's xattr metadata was updated. + */ + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + set_sbi_flag(sbi, SBI_NEED_CP); + else + f2fs_add_ino_entry(sbi, inode->i_ino, XATTR_DIR_INO); same: if (is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; |