summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-11-04 09:26:23 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2023-11-04 09:26:23 -1000
commitaea6bf908d730b01bd264a8821159db9463c111c (patch)
treefa345142ac6a976879f4d9610b75c4401eb03914
parentc9b93cafb69cbbbe375de29c1ebf410dbc33ebfc (diff)
parent1e7bef5f90ed69d903768d78369f251b77e5d2f5 (diff)
Merge tag 'f2fs-for-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this cycle, we introduce a bigger page size support by changing the internal f2fs's block size aligned to the page size. We also continue to improve zoned block device support regarding the power off recovery. As usual, there are some bug fixes regarding the error handling routines in compression and ioctl. Enhancements: - Support Block Size == Page Size - let f2fs_precache_extents() traverses in file range - stop iterating f2fs_map_block if hole exists - preload extent_cache for POSIX_FADV_WILLNEED - compress: fix to avoid fragment w/ OPU during f2fs_ioc_compress_file() Bug fixes: - do not return EFSCORRUPTED, but try to run online repair - finish previous checkpoints before returning from remount - fix error handling of __get_node_page and __f2fs_build_free_nids - clean up zones when not successfully unmounted - fix to initialize map.m_pblk in f2fs_precache_extents() - fix to drop meta_inode's page cache in f2fs_put_super() - set the default compress_level on ioctl - fix to avoid use-after-free on dic - fix to avoid redundant compress extension - do sanity check on cluster when CONFIG_F2FS_CHECK_FS is on - fix deadloop in f2fs_write_cache_pages()" * tag 'f2fs-for-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: f2fs: finish previous checkpoints before returning from remount f2fs: fix error handling of __get_node_page f2fs: do not return EFSCORRUPTED, but try to run online repair f2fs: fix error path of __f2fs_build_free_nids f2fs: Clean up errors in segment.h f2fs: clean up zones when not successfully unmounted f2fs: let f2fs_precache_extents() traverses in file range f2fs: avoid format-overflow warning f2fs: fix to initialize map.m_pblk in f2fs_precache_extents() f2fs: Support Block Size == Page Size f2fs: stop iterating f2fs_map_block if hole exists f2fs: preload extent_cache for POSIX_FADV_WILLNEED f2fs: set the default compress_level on ioctl f2fs: compress: fix to avoid fragment w/ OPU during f2fs_ioc_compress_file() f2fs: fix to drop meta_inode's page cache in f2fs_put_super() f2fs: split initial and dynamic conditions for extent_cache f2fs: compress: fix to avoid redundant compress extension f2fs: compress: do sanity check on cluster when CONFIG_F2FS_CHECK_FS is on f2fs: compress: fix to avoid use-after-free on dic f2fs: compress: fix deadloop in f2fs_write_cache_pages()
-rw-r--r--fs/f2fs/compress.c63
-rw-r--r--fs/f2fs/data.c35
-rw-r--r--fs/f2fs/extent_cache.c53
-rw-r--r--fs/f2fs/file.c17
-rw-r--r--fs/f2fs/inode.c2
-rw-r--r--fs/f2fs/node.c20
-rw-r--r--fs/f2fs/segment.c92
-rw-r--r--fs/f2fs/segment.h4
-rw-r--r--fs/f2fs/super.c98
-rw-r--r--fs/f2fs/xattr.c20
-rw-r--r--include/linux/f2fs_fs.h70
11 files changed, 296 insertions, 178 deletions
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index d820801f473e..36e5dab6baae 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -893,14 +893,15 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc)
bool f2fs_sanity_check_cluster(struct dnode_of_data *dn)
{
+#ifdef CONFIG_F2FS_CHECK_FS
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
- bool compressed = dn->data_blkaddr == COMPRESS_ADDR;
int cluster_end = 0;
+ unsigned int count;
int i;
char *reason = "";
- if (!compressed)
+ if (dn->data_blkaddr != COMPRESS_ADDR)
return false;
/* [..., COMPR_ADDR, ...] */
@@ -909,7 +910,7 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn)
goto out;
}
- for (i = 1; i < cluster_size; i++) {
+ for (i = 1, count = 1; i < cluster_size; i++, count++) {
block_t blkaddr = data_blkaddr(dn->inode, dn->node_page,
dn->ofs_in_node + i);
@@ -929,19 +930,42 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn)
goto out;
}
}
+
+ f2fs_bug_on(F2FS_I_SB(dn->inode), count != cluster_size &&
+ !is_inode_flag_set(dn->inode, FI_COMPRESS_RELEASED));
+
return false;
out:
f2fs_warn(sbi, "access invalid cluster, ino:%lu, nid:%u, ofs_in_node:%u, reason:%s",
dn->inode->i_ino, dn->nid, dn->ofs_in_node, reason);
set_sbi_flag(sbi, SBI_NEED_FSCK);
return true;
+#else
+ return false;
+#endif
+}
+
+static int __f2fs_get_cluster_blocks(struct inode *inode,
+ struct dnode_of_data *dn)
+{
+ unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
+ int count, i;
+
+ for (i = 1, count = 1; i < cluster_size; i++) {
+ block_t blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + i);
+
+ if (__is_valid_data_blkaddr(blkaddr))
+ count++;
+ }
+
+ return count;
}
static int __f2fs_cluster_blocks(struct inode *inode,
- unsigned int cluster_idx, bool compr)
+ unsigned int cluster_idx, bool compr_blks)
{
struct dnode_of_data dn;
- unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
unsigned int start_idx = cluster_idx <<
F2FS_I(inode)->i_log_cluster_size;
int ret;
@@ -956,31 +980,14 @@ static int __f2fs_cluster_blocks(struct inode *inode,
if (f2fs_sanity_check_cluster(&dn)) {
ret = -EFSCORRUPTED;
- f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_CLUSTER);
goto fail;
}
if (dn.data_blkaddr == COMPRESS_ADDR) {
- int i;
-
- ret = 1;
- for (i = 1; i < cluster_size; i++) {
- block_t blkaddr;
-
- blkaddr = data_blkaddr(dn.inode,
- dn.node_page, dn.ofs_in_node + i);
- if (compr) {
- if (__is_valid_data_blkaddr(blkaddr))
- ret++;
- } else {
- if (blkaddr != NULL_ADDR)
- ret++;
- }
- }
-
- f2fs_bug_on(F2FS_I_SB(inode),
- !compr && ret != cluster_size &&
- !is_inode_flag_set(inode, FI_COMPRESS_RELEASED));
+ if (compr_blks)
+ ret = __f2fs_get_cluster_blocks(inode, &dn);
+ else
+ ret = 1;
}
fail:
f2fs_put_dnode(&dn);
@@ -993,7 +1000,7 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc)
return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, true);
}
-/* return # of valid blocks in compressed cluster */
+/* return whether cluster is compressed one or not */
int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index)
{
return __f2fs_cluster_blocks(inode,
@@ -1976,7 +1983,7 @@ void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi)
int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
- char slab_name[32];
+ char slab_name[35];
if (!f2fs_sb_has_compression(sbi))
return 0;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 916e317ac925..4e42b5f24deb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1690,9 +1690,7 @@ next_block:
map->m_flags |= F2FS_MAP_NEW;
} else if (is_hole) {
if (f2fs_compressed_file(inode) &&
- f2fs_sanity_check_cluster(&dn) &&
- (flag != F2FS_GET_BLOCK_FIEMAP ||
- IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
+ f2fs_sanity_check_cluster(&dn)) {
err = -EFSCORRUPTED;
f2fs_handle_error(sbi,
ERROR_CORRUPTED_CLUSTER);
@@ -2344,8 +2342,10 @@ skip_reading_dnode:
f2fs_wait_on_block_writeback(inode, blkaddr);
if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
- if (atomic_dec_and_test(&dic->remaining_pages))
+ if (atomic_dec_and_test(&dic->remaining_pages)) {
f2fs_decompress_cluster(dic, true);
+ break;
+ }
continue;
}
@@ -2665,6 +2665,11 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (f2fs_is_atomic_file(inode))
return true;
+ /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */
+ if (f2fs_compressed_file(inode) &&
+ F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER &&
+ is_inode_flag_set(inode, FI_ENABLE_COMPRESS))
+ return true;
/* swap file is migrating in aligned write mode */
if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
@@ -3023,7 +3028,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
{
int ret = 0;
int done = 0, retry = 0;
- struct page *pages[F2FS_ONSTACK_PAGES];
+ struct page *pages_local[F2FS_ONSTACK_PAGES];
+ struct page **pages = pages_local;
struct folio_batch fbatch;
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
struct bio *bio = NULL;
@@ -3047,6 +3053,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
#endif
int nr_folios, p, idx;
int nr_pages;
+ unsigned int max_pages = F2FS_ONSTACK_PAGES;
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
@@ -3056,6 +3063,15 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
int submitted = 0;
int i;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_compressed_file(inode) &&
+ 1 << cc.log_cluster_size > F2FS_ONSTACK_PAGES) {
+ pages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
+ cc.log_cluster_size, GFP_NOFS | __GFP_NOFAIL);
+ max_pages = 1 << cc.log_cluster_size;
+ }
+#endif
+
folio_batch_init(&fbatch);
if (get_dirty_pages(mapping->host) <=
@@ -3101,7 +3117,7 @@ again:
add_more:
pages[nr_pages] = folio_page(folio, idx);
folio_get(folio);
- if (++nr_pages == F2FS_ONSTACK_PAGES) {
+ if (++nr_pages == max_pages) {
index = folio->index + idx + 1;
folio_batch_release(&fbatch);
goto write;
@@ -3283,6 +3299,11 @@ next:
if (bio)
f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (pages != pages_local)
+ kfree(pages);
+#endif
+
return ret;
}
@@ -4055,7 +4076,7 @@ next:
sis->highest_bit = cur_lblock - 1;
out:
if (not_aligned)
- f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
+ f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)",
not_aligned, blks_per_sec * F2FS_BLKSIZE);
return ret;
}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 0e2d49140c07..ad8dfac73bd4 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -74,40 +74,14 @@ static void __set_extent_info(struct extent_info *ei,
}
}
-static bool __may_read_extent_tree(struct inode *inode)
-{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-
- if (!test_opt(sbi, READ_EXTENT_CACHE))
- return false;
- if (is_inode_flag_set(inode, FI_NO_EXTENT))
- return false;
- if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
- !f2fs_sb_has_readonly(sbi))
- return false;
- return S_ISREG(inode->i_mode);
-}
-
-static bool __may_age_extent_tree(struct inode *inode)
-{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-
- if (!test_opt(sbi, AGE_EXTENT_CACHE))
- return false;
- if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
- return false;
- if (file_is_cold(inode))
- return false;
-
- return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode);
-}
-
static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
{
if (type == EX_READ)
- return __may_read_extent_tree(inode);
- else if (type == EX_BLOCK_AGE)
- return __may_age_extent_tree(inode);
+ return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
+ S_ISREG(inode->i_mode);
+ if (type == EX_BLOCK_AGE)
+ return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
+ (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
return false;
}
@@ -120,7 +94,22 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type)
if (list_empty(&F2FS_I_SB(inode)->s_list))
return false;
- return __init_may_extent_tree(inode, type);
+ if (!__init_may_extent_tree(inode, type))
+ return false;
+
+ if (type == EX_READ) {
+ if (is_inode_flag_set(inode, FI_NO_EXTENT))
+ return false;
+ if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
+ !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
+ return false;
+ } else if (type == EX_BLOCK_AGE) {
+ if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
+ return false;
+ if (file_is_cold(inode))
+ return false;
+ }
+ return true;
}
static void __try_update_largest_extent(struct extent_tree *et,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index dd99abbb7186..e50363583f01 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3258,11 +3258,12 @@ int f2fs_precache_extents(struct inode *inode)
return -EOPNOTSUPP;
map.m_lblk = 0;
+ map.m_pblk = 0;
map.m_next_pgofs = NULL;
map.m_next_extent = &m_next_extent;
map.m_seg_type = NO_CHECK_TYPE;
map.m_may_create = false;
- end = max_file_blocks(inode);
+ end = F2FS_BLK_ALIGN(i_size_read(inode));
while (map.m_lblk < end) {
map.m_len = end - map.m_lblk;
@@ -3270,7 +3271,7 @@ int f2fs_precache_extents(struct inode *inode)
f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE);
f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
- if (err)
+ if (err || !map.m_len)
return err;
map.m_lblk = m_next_extent;
@@ -4005,6 +4006,15 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
F2FS_I(inode)->i_compress_algorithm = option.algorithm;
F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size;
F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size);
+ /* Set default level */
+ if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD)
+ F2FS_I(inode)->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
+ else
+ F2FS_I(inode)->i_compress_level = 0;
+ /* Adjust mount option level */
+ if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
+ F2FS_OPTION(sbi).compress_level)
+ F2FS_I(inode)->i_compress_level = F2FS_OPTION(sbi).compress_level;
f2fs_mark_inode_dirty_sync(inode, true);
if (!f2fs_is_compress_backend_ready(inode))
@@ -4849,6 +4859,9 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
filp->f_mode &= ~FMODE_RANDOM;
spin_unlock(&filp->f_lock);
return 0;
+ } else if (advice == POSIX_FADV_WILLNEED && offset == 0) {
+ /* Load extent cache at the first readahead. */
+ f2fs_precache_extents(inode);
}
err = generic_fadvise(filp, offset, len, advice);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 5779c7edd49b..560bfcad1af2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -315,7 +315,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
f2fs_has_inline_xattr(inode) &&
(!fi->i_inline_xattr_size ||
fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
- f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %lu",
__func__, inode->i_ino, fi->i_inline_xattr_size,
MAX_INLINE_XATTR_SIZE);
return false;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index ee2e1dd64f25..6c7f6a649d27 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -633,7 +633,7 @@ static void f2fs_ra_node_pages(struct page *parent, int start, int n)
/* Then, try readahead for siblings of the desired node */
end = start + n;
- end = min(end, NIDS_PER_BLOCK);
+ end = min(end, (int)NIDS_PER_BLOCK);
for (i = start; i < end; i++) {
nid = get_nid(parent, i, false);
f2fs_ra_node_page(sbi, nid);
@@ -1467,7 +1467,8 @@ page_hit:
ofs_of_node(page), cpver_of_node(page),
next_blkaddr_of_node(page));
set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
+ err = -EFSCORRUPTED;
out_err:
ClearPageUptodate(page);
out_put_err:
@@ -2389,7 +2390,7 @@ static int scan_nat_page(struct f2fs_sb_info *sbi,
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
if (blk_addr == NEW_ADDR)
- return -EINVAL;
+ return -EFSCORRUPTED;
if (blk_addr == NULL_ADDR) {
add_free_nid(sbi, start_nid, true, true);
@@ -2504,7 +2505,14 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
if (ret) {
f2fs_up_read(&nm_i->nat_tree_lock);
- f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
+
+ if (ret == -EFSCORRUPTED) {
+ f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_handle_error(sbi,
+ ERROR_INCONSISTENT_NAT);
+ }
+
return ret;
}
}
@@ -2743,7 +2751,9 @@ recover_xnid:
f2fs_update_inode_page(inode);
/* 3: update and set xattr node page dirty */
- memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE);
+ if (page)
+ memcpy(F2FS_NODE(xpage), F2FS_NODE(page),
+ VALID_XATTR_BLOCK_SIZE);
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index d05b41608fc0..727d016318f9 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -4910,22 +4910,31 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
}
/*
- * The write pointer matches with the valid blocks or
- * already points to the end of the zone.
+ * When safely unmounted in the previous mount, we can trust write
+ * pointers. Otherwise, finish zones.
*/
- if ((last_valid_block + 1 == wp_block) ||
- (zone->wp == zone->start + zone->len))
- return 0;
-
- if (last_valid_block + 1 == zone_block) {
+ if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
/*
- * If there is no valid block in the zone and if write pointer
- * is not at zone start, reset the write pointer.
+ * The write pointer matches with the valid blocks or
+ * already points to the end of the zone.
*/
- f2fs_notice(sbi,
- "Zone without valid block has non-zero write "
- "pointer. Reset the write pointer: wp[0x%x,0x%x]",
- wp_segno, wp_blkoff);
+ if ((last_valid_block + 1 == wp_block) ||
+ (zone->wp == zone->start + zone->len))
+ return 0;
+ }
+
+ if (last_valid_block + 1 == zone_block) {
+ if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ /*
+ * If there is no valid block in the zone and if write
+ * pointer is not at zone start, reset the write
+ * pointer.
+ */
+ f2fs_notice(sbi,
+ "Zone without valid block has non-zero write "
+ "pointer. Reset the write pointer: wp[0x%x,0x%x]",
+ wp_segno, wp_blkoff);
+ }
ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
zone->len >> log_sectors_per_block);
if (ret)
@@ -4935,18 +4944,20 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
return ret;
}
- /*
- * If there are valid blocks and the write pointer doesn't
- * match with them, we need to report the inconsistency and
- * fill the zone till the end to close the zone. This inconsistency
- * does not cause write error because the zone will not be selected
- * for write operation until it get discarded.
- */
- f2fs_notice(sbi, "Valid blocks are not aligned with write pointer: "
- "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
- GET_SEGNO(sbi, last_valid_block),
- GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
- wp_segno, wp_blkoff);
+ if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ /*
+ * If there are valid blocks and the write pointer doesn't match
+ * with them, we need to report the inconsistency and fill
+ * the zone till the end to close the zone. This inconsistency
+ * does not cause write error because the zone will not be
+ * selected for write operation until it get discarded.
+ */
+ f2fs_notice(sbi, "Valid blocks are not aligned with write "
+ "pointer: valid block[0x%x,0x%x] wp[0x%x,0x%x]",
+ GET_SEGNO(sbi, last_valid_block),
+ GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
+ wp_segno, wp_blkoff);
+ }
ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
zone->start, zone->len, GFP_NOFS);
@@ -5020,18 +5031,27 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
- wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
- wp_segno = GET_SEGNO(sbi, wp_block);
- wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
- wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
-
- if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
- wp_sector_off == 0)
- return 0;
+ /*
+ * When safely unmounted in the previous mount, we could use current
+ * segments. Otherwise, allocate new sections.
+ */
+ if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
+ wp_segno = GET_SEGNO(sbi, wp_block);
+ wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
+ wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
+
+ if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
+ wp_sector_off == 0)
+ return 0;
- f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
- "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
- type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
+ f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
+ "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno,
+ cs->next_blkoff, wp_segno, wp_blkoff);
+ } else {
+ f2fs_notice(sbi, "Not successfully unmounted in the previous "
+ "mount");
+ }
f2fs_notice(sbi, "Assign new section to curseg[%d]: "
"curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 2ca8fb5d0dc4..8129be788bd5 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -108,11 +108,11 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\
(sbi)->log_blocks_per_seg))
#define GET_SEC_FROM_SEG(sbi, segno) \
- (((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec)
+ (((segno) == -1) ? -1 : (segno) / (sbi)->segs_per_sec)
#define GET_SEG_FROM_SEC(sbi, secno) \
((secno) * (sbi)->segs_per_sec)
#define GET_ZONE_FROM_SEC(sbi, secno) \
- (((secno) == -1) ? -1: (secno) / (sbi)->secs_per_zone)
+ (((secno) == -1) ? -1 : (secno) / (sbi)->secs_per_zone)
#define GET_ZONE_FROM_SEG(sbi, segno) \
GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno))
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 05f9f7b6ebf8..1ed68158bac5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -562,6 +562,29 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb,
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
+static bool is_compress_extension_exist(struct f2fs_sb_info *sbi,
+ const char *new_ext, bool is_ext)
+{
+ unsigned char (*ext)[F2FS_EXTENSION_LEN];
+ int ext_cnt;
+ int i;
+
+ if (is_ext) {
+ ext = F2FS_OPTION(sbi).extensions;
+ ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+ } else {
+ ext = F2FS_OPTION(sbi).noextensions;
+ ext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ }
+
+ for (i = 0; i < ext_cnt; i++) {
+ if (!strcasecmp(new_ext, ext[i]))
+ return true;
+ }
+
+ return false;
+}
+
/*
* 1. The same extension name cannot not appear in both compress and non-compress extension
* at the same time.
@@ -1164,6 +1187,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
}
+ if (is_compress_extension_exist(sbi, name, true)) {
+ kfree(name);
+ break;
+ }
+
strcpy(ext[ext_cnt], name);
F2FS_OPTION(sbi).compress_ext_cnt++;
kfree(name);
@@ -1188,6 +1216,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
}
+ if (is_compress_extension_exist(sbi, name, false)) {
+ kfree(name);
+ break;
+ }
+
strcpy(noext[noext_cnt], name);
F2FS_OPTION(sbi).nocompress_ext_cnt++;
kfree(name);
@@ -1644,7 +1677,7 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
- if (err) {
+ if (err || f2fs_cp_error(sbi)) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
truncate_inode_pages_final(META_MAPPING(sbi));
}
@@ -2301,9 +2334,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
unsigned long old_sb_flags;
int err;
bool need_restart_gc = false, need_stop_gc = false;
- bool need_restart_ckpt = false, need_stop_ckpt = false;
bool need_restart_flush = false, need_stop_flush = false;
bool need_restart_discard = false, need_stop_discard = false;
+ bool need_enable_checkpoint = false, need_disable_checkpoint = false;
bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE);
bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
@@ -2467,24 +2500,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
clear_sbi_flag(sbi, SBI_IS_CLOSE);
}
- if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
- !test_opt(sbi, MERGE_CHECKPOINT)) {
- f2fs_stop_ckpt_thread(sbi);
- need_restart_ckpt = true;
- } else {
- /* Flush if the prevous checkpoint, if exists. */
- f2fs_flush_ckpt_thread(sbi);
-
- err = f2fs_start_ckpt_thread(sbi);
- if (err) {
- f2fs_err(sbi,
- "Failed to start F2FS issue_checkpoint_thread (%d)",
- err);
- goto restore_gc;
- }
- need_stop_ckpt = true;
- }
-
/*
* We stop issue flush thread if FS is mounted as RO
* or if flush_merge is not passed in mount option.
@@ -2496,7 +2511,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
} else {
err = f2fs_create_flush_cmd_control(sbi);
if (err)
- goto restore_ckpt;
+ goto restore_gc;
need_stop_flush = true;
}
@@ -2518,8 +2533,31 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
err = f2fs_disable_checkpoint(sbi);
if (err)
goto restore_discard;
+ need_enable_checkpoint = true;
} else {
f2fs_enable_checkpoint(sbi);
+ need_disable_checkpoint = true;
+ }
+ }
+
+ /*
+ * Place this routine at the end, since a new checkpoint would be
+ * triggered while remount and we need to take care of it before
+ * returning from remount.
+ */
+ if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
+ !test_opt(sbi, MERGE_CHECKPOINT)) {
+ f2fs_stop_ckpt_thread(sbi);
+ } else {
+ /* Flush if the prevous checkpoint, if exists. */
+ f2fs_flush_ckpt_thread(sbi);
+
+ err = f2fs_start_ckpt_thread(sbi);
+ if (err) {
+ f2fs_err(sbi,
+ "Failed to start F2FS issue_checkpoint_thread (%d)",
+ err);
+ goto restore_checkpoint;
}
}
@@ -2537,6 +2575,13 @@ skip:
adjust_unusable_cap_perc(sbi);
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
+restore_checkpoint:
+ if (need_enable_checkpoint) {
+ f2fs_enable_checkpoint(sbi);
+ } else if (need_disable_checkpoint) {
+ if (f2fs_disable_checkpoint(sbi))
+ f2fs_warn(sbi, "checkpoint has not been disabled");
+ }
restore_discard:
if (need_restart_discard) {
if (f2fs_start_discard_thread(sbi))
@@ -2552,13 +2597,6 @@ restore_flush:
clear_opt(sbi, FLUSH_MERGE);
f2fs_destroy_flush_cmd_control(sbi, false);
}
-restore_ckpt:
- if (need_restart_ckpt) {
- if (f2fs_start_ckpt_thread(sbi))
- f2fs_warn(sbi, "background ckpt thread has stopped");
- } else if (need_stop_ckpt) {
- f2fs_stop_ckpt_thread(sbi);
- }
restore_gc:
if (need_restart_gc) {
if (f2fs_start_gc_thread(sbi))
@@ -3479,7 +3517,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
- /* Currently, support 512/1024/2048/4096 bytes sector size */
+ /* Currently, support 512/1024/2048/4096/16K bytes sector size */
if (le32_to_cpu(raw_super->log_sectorsize) >
F2FS_MAX_LOG_SECTOR_SIZE ||
le32_to_cpu(raw_super->log_sectorsize) <
@@ -4926,7 +4964,7 @@ static int __init init_f2fs_fs(void)
int err;
if (PAGE_SIZE != F2FS_BLKSIZE) {
- printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n",
+ printk("F2FS not supported on PAGE_SIZE(%lu) != BLOCK_SIZE(%lu)\n",
PAGE_SIZE, F2FS_BLKSIZE);
return -EINVAL;
}
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 4314456854f6..47e88b4d4e7d 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -364,10 +364,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
*xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name);
if (!*xe) {
- f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ f2fs_err(F2FS_I_SB(inode), "lookup inode (%lu) has corrupted xattr",
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
- err = -EFSCORRUPTED;
+ err = -ENODATA;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
goto out;
@@ -584,13 +584,12 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
(void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) {
- f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ f2fs_err(F2FS_I_SB(inode), "list inode (%lu) has corrupted xattr",
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
- error = -EFSCORRUPTED;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
- goto cleanup;
+ break;
}
if (!prefix)
@@ -650,7 +649,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (size > MAX_VALUE_LEN(inode))
return -E2BIG;
-
+retry:
error = read_all_xattrs(inode, ipage, &base_addr);
if (error)
return error;
@@ -660,7 +659,14 @@ static int __f2fs_setxattr(struct inode *inode, int index,
/* find entry with wanted name. */
here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name);
if (!here) {
- f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ if (!F2FS_I(inode)->i_xattr_nid) {
+ f2fs_notice(F2FS_I_SB(inode),
+ "recover xattr in inode (%lu)", inode->i_ino);
+ f2fs_recover_xattr_data(inode, NULL);
+ kfree(base_addr);
+ goto retry;
+ }
+ f2fs_err(F2FS_I_SB(inode), "set inode (%lu) has corrupted xattr",
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index a82a4bb6ce68..039fe0ce8d83 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -13,10 +13,10 @@
#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */
#define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */
-#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */
-#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */
-#define F2FS_BLKSIZE 4096 /* support only 4KB block */
-#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */
+#define F2FS_MAX_LOG_SECTOR_SIZE PAGE_SHIFT /* Max is Block Size */
+#define F2FS_LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) /* log number for sector/blk */
+#define F2FS_BLKSIZE PAGE_SIZE /* support only block == page */
+#define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */
#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
#define F2FS_EXTENSION_LEN 8 /* max size of extension */
#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS)
@@ -104,6 +104,7 @@ enum f2fs_error {
ERROR_CORRUPTED_VERITY_XATTR,
ERROR_CORRUPTED_XATTR,
ERROR_INVALID_NODE_REFERENCE,
+ ERROR_INCONSISTENT_NAT,
ERROR_MAX,
};
@@ -210,14 +211,14 @@ struct f2fs_checkpoint {
unsigned char sit_nat_version_bitmap[];
} __packed;
-#define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */
+#define CP_CHKSUM_OFFSET (F2FS_BLKSIZE - sizeof(__le32)) /* default chksum offset in checkpoint */
#define CP_MIN_CHKSUM_OFFSET \
(offsetof(struct f2fs_checkpoint, sit_nat_version_bitmap))
/*
* For orphan inode management
*/
-#define F2FS_ORPHANS_PER_BLOCK 1020
+#define F2FS_ORPHANS_PER_BLOCK ((F2FS_BLKSIZE - 4 * sizeof(__le32)) / sizeof(__le32))
#define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \
F2FS_ORPHANS_PER_BLOCK)
@@ -243,14 +244,31 @@ struct f2fs_extent {
#define F2FS_NAME_LEN 255
/* 200 bytes for inline xattrs by default */
#define DEFAULT_INLINE_XATTR_ADDRS 50
-#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
+
+#define OFFSET_OF_END_OF_I_EXT 360
+#define SIZE_OF_I_NID 20
+
+struct node_footer {
+ __le32 nid; /* node id */
+ __le32 ino; /* inode number */
+ __le32 flag; /* include cold/fsync/dentry marks and offset */
+ __le64 cp_ver; /* checkpoint version */
+ __le32 next_blkaddr; /* next node page block address */
+} __packed;
+
+/* Address Pointers in an Inode */
+#define DEF_ADDRS_PER_INODE ((F2FS_BLKSIZE - OFFSET_OF_END_OF_I_EXT \
+ - SIZE_OF_I_NID \
+ - sizeof(struct node_footer)) / sizeof(__le32))
#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \
get_extra_isize(inode))
#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */
#define ADDRS_PER_INODE(inode) addrs_per_inode(inode)
-#define DEF_ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
+/* Address Pointers in a Direct Block */
+#define DEF_ADDRS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
#define ADDRS_PER_BLOCK(inode) addrs_per_block(inode)
-#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */
+/* Node IDs in an Indirect Block */
+#define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
#define ADDRS_PER_PAGE(page, inode) \
(IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode))
@@ -342,14 +360,6 @@ enum {
#define OFFSET_BIT_MASK GENMASK(OFFSET_BIT_SHIFT - 1, 0)
-struct node_footer {
- __le32 nid; /* node id */
- __le32 ino; /* inode number */
- __le32 flag; /* include cold/fsync/dentry marks and offset */
- __le64 cp_ver; /* checkpoint version */
- __le32 next_blkaddr; /* next node page block address */
-} __packed;
-
struct f2fs_node {
/* can be one of three types: inode, direct, and indirect types */
union {
@@ -363,7 +373,7 @@ struct f2fs_node {
/*
* For NAT entries
*/
-#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
+#define NAT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_nat_entry))
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
@@ -378,12 +388,13 @@ struct f2fs_nat_block {
/*
* For SIT entries
*
- * Each segment is 2MB in size by default so that a bitmap for validity of
- * there-in blocks should occupy 64 bytes, 512 bits.
+ * A validity bitmap of 64 bytes covers 512 blocks of area. For a 4K page size,
+ * this results in a segment size of 2MB. For 16k pages, the default segment size
+ * is 8MB.
* Not allow to change this.
*/
#define SIT_VBLOCK_MAP_SIZE 64
-#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry))
+#define SIT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_sit_entry))
/*
* F2FS uses 4 bytes to represent block address. As a result, supported size of
@@ -418,7 +429,7 @@ struct f2fs_sit_block {
* For segment summary
*
* One summary block contains exactly 512 summary entries, which represents
- * exactly 2MB segment by default. Not allow to change the basic units.
+ * exactly one segment by default. Not allow to change the basic units.
*
* NOTE: For initializing fields, you must use set_summary
*
@@ -429,12 +440,12 @@ struct f2fs_sit_block {
* from node's page's beginning to get a data block address.
* ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
*/
-#define ENTRIES_IN_SUM 512
+#define ENTRIES_IN_SUM (F2FS_BLKSIZE / 8)
#define SUMMARY_SIZE (7) /* sizeof(struct summary) */
#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */
#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM)
-/* a summary entry for a 4KB-sized block in a segment */
+/* a summary entry for a block in a segment */
struct f2fs_summary {
__le32 nid; /* parent node id */
union {
@@ -518,7 +529,7 @@ struct f2fs_journal {
};
} __packed;
-/* 4KB-sized summary block structure */
+/* Block-sized summary block structure */
struct f2fs_summary_block {
struct f2fs_summary entries[ENTRIES_IN_SUM];
struct f2fs_journal journal;
@@ -559,11 +570,14 @@ typedef __le32 f2fs_hash_t;
* Note: there are more reserved space in inline dentry than in regular
* dentry, when converting inline dentry we should handle this carefully.
*/
-#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */
+
+/* the number of dentry in a block */
+#define NR_DENTRY_IN_BLOCK ((BITS_PER_BYTE * F2FS_BLKSIZE) / \
+ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * BITS_PER_BYTE + 1))
#define SIZE_OF_DIR_ENTRY 11 /* by byte */
#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
BITS_PER_BYTE)
-#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
+#define SIZE_OF_RESERVED (F2FS_BLKSIZE - ((SIZE_OF_DIR_ENTRY + \
F2FS_SLOT_LEN) * \
NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
#define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */
@@ -576,7 +590,7 @@ struct f2fs_dir_entry {
__u8 file_type; /* file type */
} __packed;
-/* 4KB-sized directory entry block */
+/* Block-sized directory entry block */
struct f2fs_dentry_block {
/* validity bitmap for directory entries in each block */
__u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP];