summaryrefslogtreecommitdiff
path: root/fs/f2fs/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs/super.c')
-rw-r--r--fs/f2fs/super.c523
1 files changed, 272 insertions, 251 deletions
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index d45ab0992ae5..19b67828ae32 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -11,7 +11,6 @@
#include <linux/fs_context.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
-#include <linux/buffer_head.h>
#include <linux/kthread.h>
#include <linux/parser.h>
#include <linux/mount.h>
@@ -44,41 +43,53 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
const char *f2fs_fault_name[FAULT_MAX] = {
- [FAULT_KMALLOC] = "kmalloc",
- [FAULT_KVMALLOC] = "kvmalloc",
- [FAULT_PAGE_ALLOC] = "page alloc",
- [FAULT_PAGE_GET] = "page get",
- [FAULT_ALLOC_NID] = "alloc nid",
- [FAULT_ORPHAN] = "orphan",
- [FAULT_BLOCK] = "no more block",
- [FAULT_DIR_DEPTH] = "too big dir depth",
- [FAULT_EVICT_INODE] = "evict_inode fail",
- [FAULT_TRUNCATE] = "truncate fail",
- [FAULT_READ_IO] = "read IO error",
- [FAULT_CHECKPOINT] = "checkpoint error",
- [FAULT_DISCARD] = "discard error",
- [FAULT_WRITE_IO] = "write IO error",
- [FAULT_SLAB_ALLOC] = "slab alloc",
- [FAULT_DQUOT_INIT] = "dquot initialize",
- [FAULT_LOCK_OP] = "lock_op",
- [FAULT_BLKADDR] = "invalid blkaddr",
+ [FAULT_KMALLOC] = "kmalloc",
+ [FAULT_KVMALLOC] = "kvmalloc",
+ [FAULT_PAGE_ALLOC] = "page alloc",
+ [FAULT_PAGE_GET] = "page get",
+ [FAULT_ALLOC_NID] = "alloc nid",
+ [FAULT_ORPHAN] = "orphan",
+ [FAULT_BLOCK] = "no more block",
+ [FAULT_DIR_DEPTH] = "too big dir depth",
+ [FAULT_EVICT_INODE] = "evict_inode fail",
+ [FAULT_TRUNCATE] = "truncate fail",
+ [FAULT_READ_IO] = "read IO error",
+ [FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
+ [FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
+ [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr",
+ [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr",
+ [FAULT_NO_SEGMENT] = "no free segment",
};
-void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
- unsigned int type)
+int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate,
+ unsigned long type)
{
struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
if (rate) {
+ if (rate > INT_MAX)
+ return -EINVAL;
atomic_set(&ffi->inject_ops, 0);
- ffi->inject_rate = rate;
+ ffi->inject_rate = (int)rate;
}
- if (type)
- ffi->inject_type = type;
+ if (type) {
+ if (type >= BIT(FAULT_MAX))
+ return -EINVAL;
+ ffi->inject_type = (unsigned int)type;
+ }
if (!rate && !type)
memset(ffi, 0, sizeof(struct f2fs_fault_info));
+ else
+ f2fs_info(sbi,
+ "build fault injection attr: rate: %lu, type: 0x%lx",
+ rate, type);
+ return 0;
}
#endif
@@ -137,7 +148,6 @@ enum {
Opt_resgid,
Opt_resuid,
Opt_mode,
- Opt_io_size_bits,
Opt_fault_injection,
Opt_fault_type,
Opt_lazytime,
@@ -216,7 +226,6 @@ static match_table_t f2fs_tokens = {
{Opt_resgid, "resgid=%u"},
{Opt_resuid, "resuid=%u"},
{Opt_mode, "mode=%s"},
- {Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
{Opt_fault_type, "fault_type=%u"},
{Opt_lazytime, "lazytime"},
@@ -263,7 +272,8 @@ static match_table_t f2fs_tokens = {
{Opt_err, NULL},
};
-void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
+void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate,
+ const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -274,8 +284,12 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
level = printk_get_level(fmt);
vaf.fmt = printk_skip_level(fmt);
vaf.va = &args;
- printk("%c%cF2FS-fs (%s): %pV\n",
- KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ if (limit_rate)
+ printk_ratelimited("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ else
+ printk("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
va_end(args);
}
@@ -306,7 +320,7 @@ struct kmem_cache *f2fs_cf_name_slab;
static int __init f2fs_create_casefold_cache(void)
{
f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name",
- F2FS_NAME_LEN);
+ F2FS_NAME_LEN);
return f2fs_cf_name_slab ? 0 : -ENOMEM;
}
@@ -343,46 +357,6 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).s_resgid));
}
-static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
-{
- unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
- unsigned int avg_vblocks;
- unsigned int wanted_reserved_segments;
- block_t avail_user_block_count;
-
- if (!F2FS_IO_ALIGNED(sbi))
- return 0;
-
- /* average valid block count in section in worst case */
- avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
-
- /*
- * we need enough free space when migrating one section in worst case
- */
- wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
- reserved_segments(sbi);
- wanted_reserved_segments -= reserved_segments(sbi);
-
- avail_user_block_count = sbi->user_block_count -
- sbi->current_reserved_blocks -
- F2FS_OPTION(sbi).root_reserved_blocks;
-
- if (wanted_reserved_segments * sbi->blocks_per_seg >
- avail_user_block_count) {
- f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
- wanted_reserved_segments,
- avail_user_block_count >> sbi->log_blocks_per_seg);
- return -ENOSPC;
- }
-
- SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
-
- f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
- wanted_reserved_segments);
-
- return 0;
-}
-
static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
{
if (!F2FS_OPTION(sbi).unusable_cap_perc)
@@ -663,7 +637,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
#ifdef CONFIG_F2FS_FS_ZSTD
static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
{
- unsigned int level;
+ int level;
int len = 4;
if (strlen(str) == len) {
@@ -677,9 +651,15 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>");
return -EINVAL;
}
- if (kstrtouint(str + 1, 10, &level))
+ if (kstrtoint(str + 1, 10, &level))
return -EINVAL;
+ /* f2fs does not support negative compress level now */
+ if (level < 0) {
+ f2fs_info(sbi, "do not support negative compress level: %d", level);
+ return -ERANGE;
+ }
+
if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) {
f2fs_info(sbi, "invalid zstd compress level: %d", level);
return -EINVAL;
@@ -730,6 +710,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (!strcmp(name, "on")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
} else if (!strcmp(name, "off")) {
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_warn(sbi, "zoned devices need bggc");
+ kfree(name);
+ return -EINVAL;
+ }
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF;
} else if (!strcmp(name, "sync")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC;
@@ -763,10 +748,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
clear_opt(sbi, DISCARD);
break;
case Opt_noheap:
- set_opt(sbi, NOHEAP);
- break;
case Opt_heap:
- clear_opt(sbi, NOHEAP);
+ f2fs_warn(sbi, "heap/no_heap options were deprecated");
break;
#ifdef CONFIG_F2FS_FS_XATTR
case Opt_user_xattr:
@@ -855,6 +838,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
set_opt(sbi, READ_EXTENT_CACHE);
break;
case Opt_noextent_cache:
+ if (F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_DEVICE_ALIAS)) {
+ f2fs_err(sbi, "device aliasing requires extent cache");
+ return -EINVAL;
+ }
clear_opt(sbi, READ_EXTENT_CACHE);
break;
case Opt_noinline_data:
@@ -913,28 +900,21 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
}
kfree(name);
break;
- case Opt_io_size_bits:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) {
- f2fs_warn(sbi, "Not support %ld, larger than %d",
- BIT(arg), BIO_MAX_VECS);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).write_io_size_bits = arg;
- break;
#ifdef CONFIG_F2FS_FAULT_INJECTION
case Opt_fault_injection:
if (args->from && match_int(args, &arg))
return -EINVAL;
- f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
+ if (f2fs_build_fault_attr(sbi, arg,
+ F2FS_ALL_FAULT_TYPE))
+ return -EINVAL;
set_opt(sbi, FAULT_INJECTION);
break;
case Opt_fault_type:
if (args->from && match_int(args, &arg))
return -EINVAL;
- f2fs_build_fault_attr(sbi, 0, arg);
+ if (f2fs_build_fault_attr(sbi, 0, arg))
+ return -EINVAL;
set_opt(sbi, FAULT_INJECTION);
break;
#else
@@ -1192,7 +1172,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
break;
}
- strcpy(ext[ext_cnt], name);
+ ret = strscpy(ext[ext_cnt], name);
+ if (ret < 0) {
+ kfree(name);
+ return ret;
+ }
F2FS_OPTION(sbi).compress_ext_cnt++;
kfree(name);
break;
@@ -1221,7 +1205,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
break;
}
- strcpy(noext[noext_cnt], name);
+ ret = strscpy(noext[noext_cnt], name);
+ if (ret < 0) {
+ kfree(name);
+ return ret;
+ }
F2FS_OPTION(sbi).nocompress_ext_cnt++;
kfree(name);
break;
@@ -1354,13 +1342,13 @@ default_check:
return -EINVAL;
}
#endif
-#if !IS_ENABLED(CONFIG_UNICODE)
- if (f2fs_sb_has_casefold(sbi)) {
+
+ if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi,
"Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
return -EINVAL;
}
-#endif
+
/*
* The BLKZONED feature indicates that the drive was formatted with
* zone alignment optimization. This is optional for host-aware
@@ -1392,12 +1380,6 @@ default_check:
}
#endif
- if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) {
- f2fs_err(sbi, "Should set mode=lfs with %luKB-sized IO",
- F2FS_IO_SIZE_KB(sbi));
- return -EINVAL;
- }
-
if (test_opt(sbi, INLINE_XATTR_SIZE)) {
int min_size, max_size;
@@ -1605,7 +1587,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
if (i > 0)
- bdev_release(FDEV(i).bdev_handle);
+ bdev_fput(FDEV(i).bdev_file);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
#endif
@@ -1712,13 +1694,10 @@ static void f2fs_put_super(struct super_block *sb)
kvfree(sbi->ckpt);
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->raw_super);
f2fs_destroy_page_array_cache(sbi);
f2fs_destroy_xattr_caches(sbi);
- mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
kfree(F2FS_OPTION(sbi).s_qf_names[i]);
@@ -1779,6 +1758,18 @@ static int f2fs_freeze(struct super_block *sb)
static int f2fs_unfreeze(struct super_block *sb)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ /*
+ * It will update discard_max_bytes of mounted lvm device to zero
+ * after creating snapshot on this lvm device, let's drop all
+ * remained discards.
+ * We don't need to disable real-time discard because discard_max_bytes
+ * will recover after removal of snapshot.
+ */
+ if (test_opt(sbi, DISCARD) && !f2fs_hw_support_discard(sbi))
+ f2fs_issue_discard_timeout(sbi);
+
clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
return 0;
}
@@ -2009,10 +2000,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
} else {
seq_puts(seq, ",nodiscard");
}
- if (test_opt(sbi, NOHEAP))
- seq_puts(seq, ",no_heap");
- else
- seq_puts(seq, ",heap");
#ifdef CONFIG_F2FS_FS_XATTR
if (test_opt(sbi, XATTR_USER))
seq_puts(seq, ",user_xattr");
@@ -2078,9 +2065,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
F2FS_OPTION(sbi).s_resuid),
from_kgid_munged(&init_user_ns,
F2FS_OPTION(sbi).s_resgid));
- if (F2FS_IO_SIZE_BITS(sbi))
- seq_printf(seq, ",io_bits=%u",
- F2FS_OPTION(sbi).write_io_size_bits);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
@@ -2187,12 +2171,9 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount)
F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
F2FS_OPTION(sbi).errors = MOUNT_ERRORS_CONTINUE;
- sbi->sb->s_flags &= ~SB_INLINECRYPT;
-
set_opt(sbi, INLINE_XATTR);
set_opt(sbi, INLINE_DATA);
set_opt(sbi, INLINE_DENTRY);
- set_opt(sbi, NOHEAP);
set_opt(sbi, MERGE_CHECKPOINT);
F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
@@ -2247,6 +2228,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
.init_gc_type = FG_GC,
.should_migrate_blocks = false,
.err_gc_skipped = true,
+ .no_bg_gc = true,
.nr_free_secs = 1 };
f2fs_down_write(&sbi->gc_lock);
@@ -2332,7 +2314,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE);
bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
- bool no_io_align = !F2FS_IO_ALIGNED(sbi);
bool no_atgc = !test_opt(sbi, ATGC);
bool no_discard = !test_opt(sbi, DISCARD);
bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE);
@@ -2382,6 +2363,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (err)
goto restore_opts;
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi) &&
+ sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
+ f2fs_err(sbi,
+ "zoned: max open zones %u is too small, need at least %u open zones",
+ sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
+ err = -EINVAL;
+ goto restore_opts;
+ }
+#endif
+
/* flush outstanding errors before changing fs state */
flush_work(&sbi->s_error_work);
@@ -2440,12 +2432,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) {
- err = -EINVAL;
- f2fs_warn(sbi, "switch io_bits option is not allowed");
- goto restore_opts;
- }
-
if (no_compress_cache == !!test_opt(sbi, COMPRESS_CACHE)) {
err = -EINVAL;
f2fs_warn(sbi, "switch compress_cache option is not allowed");
@@ -2520,6 +2506,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
}
}
+ adjust_unusable_cap_perc(sbi);
if (enable_checkpoint == !!test_opt(sbi, DISABLE_CHECKPOINT)) {
if (test_opt(sbi, DISABLE_CHECKPOINT)) {
err = f2fs_disable_checkpoint(sbi);
@@ -2564,7 +2551,6 @@ skip:
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
limit_reserve_root(sbi);
- adjust_unusable_cap_perc(sbi);
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
restore_checkpoint:
@@ -2609,6 +2595,11 @@ restore_opts:
return err;
}
+static void f2fs_shutdown(struct super_block *sb)
+{
+ f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false, false);
+}
+
#ifdef CONFIG_QUOTA
static bool f2fs_need_recovery(struct f2fs_sb_info *sbi)
{
@@ -2722,7 +2713,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
const struct address_space_operations *a_ops = mapping->a_ops;
int offset = off & (sb->s_blocksize - 1);
size_t towrite = len;
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
int err = 0;
int tocopy;
@@ -2732,7 +2723,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
towrite);
retry:
err = a_ops->write_begin(NULL, mapping, off, tocopy,
- &page, &fsdata);
+ &folio, &fsdata);
if (unlikely(err)) {
if (err == -ENOMEM) {
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
@@ -2742,10 +2733,10 @@ retry:
break;
}
- memcpy_to_page(page, offset, data, tocopy);
+ memcpy_to_folio(folio, offset_in_folio(folio, off), data, tocopy);
a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
- page, fsdata);
+ folio, fsdata);
offset = 0;
towrite -= tocopy;
off += tocopy;
@@ -2768,7 +2759,7 @@ int f2fs_dquot_initialize(struct inode *inode)
return dquot_initialize(inode);
}
-static struct dquot **f2fs_get_dquots(struct inode *inode)
+static struct dquot __rcu **f2fs_get_dquots(struct inode *inode)
{
return F2FS_I(inode)->i_dquot;
}
@@ -3208,6 +3199,7 @@ static const struct super_operations f2fs_sops = {
.unfreeze_fs = f2fs_unfreeze,
.statfs = f2fs_statfs,
.remount_fs = f2fs_remount,
+ .shutdown = f2fs_shutdown,
};
#ifdef CONFIG_FS_ENCRYPTION
@@ -3362,29 +3354,47 @@ loff_t max_file_blocks(struct inode *inode)
* fit within U32_MAX + 1 data units.
*/
- result = min(result, (((loff_t)U32_MAX + 1) * 4096) >> F2FS_BLKSIZE_BITS);
+ result = umin(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096));
return result;
}
-static int __f2fs_commit_super(struct buffer_head *bh,
- struct f2fs_super_block *super)
+static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio,
+ pgoff_t index, bool update)
{
- lock_buffer(bh);
- if (super)
- memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
- set_buffer_dirty(bh);
- unlock_buffer(bh);
-
+ struct bio *bio;
/* it's rare case, we can do fua all the time */
- return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
+ blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA;
+ int ret;
+
+ folio_lock(folio);
+ folio_wait_writeback(folio);
+ if (update)
+ memcpy(F2FS_SUPER_BLOCK(folio, index), F2FS_RAW_SUPER(sbi),
+ sizeof(struct f2fs_super_block));
+ folio_mark_dirty(folio);
+ folio_clear_dirty_for_io(folio);
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+
+ bio = bio_alloc(sbi->sb->s_bdev, 1, opf, GFP_NOFS);
+
+ /* it doesn't need to set crypto context for superblock update */
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio_index(folio));
+
+ if (!bio_add_folio(bio, folio, folio_size(folio), 0))
+ f2fs_bug_on(sbi, 1);
+
+ ret = submit_bio_wait(bio);
+ folio_end_writeback(folio);
+
+ return ret;
}
static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
- struct buffer_head *bh)
+ struct folio *folio, pgoff_t index)
{
- struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
- (bh->b_data + F2FS_SUPER_OFFSET);
+ struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index);
struct super_block *sb = sbi->sb;
u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
@@ -3400,9 +3410,9 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
u32 segment_count = le32_to_cpu(raw_super->segment_count);
u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
u64 main_end_blkaddr = main_blkaddr +
- (segment_count_main << log_blocks_per_seg);
+ ((u64)segment_count_main << log_blocks_per_seg);
u64 seg_end_blkaddr = segment0_blkaddr +
- (segment_count << log_blocks_per_seg);
+ ((u64)segment_count << log_blocks_per_seg);
if (segment0_blkaddr != cp_blkaddr) {
f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
@@ -3459,7 +3469,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
res = "internally";
} else {
- err = __f2fs_commit_super(bh, NULL);
+ err = __f2fs_commit_super(sbi, folio, index, false);
res = err ? "failed" : "done";
}
f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)",
@@ -3472,12 +3482,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
}
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
- struct buffer_head *bh)
+ struct folio *folio, pgoff_t index)
{
block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main;
block_t total_sections, blocks_per_seg;
- struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
- (bh->b_data + F2FS_SUPER_OFFSET);
+ struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index);
size_t crc_offset = 0;
__u32 crc = 0;
@@ -3503,7 +3512,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
}
}
- /* Currently, support only 4KB block size */
+ /* only support block_size equals to PAGE_SIZE */
if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) {
f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u",
le32_to_cpu(raw_super->log_blocksize),
@@ -3635,7 +3644,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
}
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
- if (sanity_check_area_boundary(sbi, bh))
+ if (sanity_check_area_boundary(sbi, folio, index))
return -EFSCORRUPTED;
return 0;
@@ -3706,7 +3715,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
}
main_segs = le32_to_cpu(raw_super->segment_count_main);
- blocks_per_seg = sbi->blocks_per_seg;
+ blocks_per_seg = BLKS_PER_SEG(sbi);
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
@@ -3818,9 +3827,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
sbi->total_sections = le32_to_cpu(raw_super->section_count);
- sbi->total_node_count =
- (le32_to_cpu(raw_super->segment_count_nat) / 2)
- * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
+ sbi->total_node_count = SEGS_TO_BLKS(sbi,
+ ((le32_to_cpu(raw_super->segment_count_nat) / 2) *
+ NAT_ENTRY_PER_BLOCK));
F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
@@ -3829,7 +3838,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
- sbi->migration_granularity = sbi->segs_per_sec;
+ sbi->migration_granularity = SEGS_PER_SEC(sbi);
+ sbi->migration_window_granularity = f2fs_sb_has_blkzoned(sbi) ?
+ DEF_MIGRATION_WINDOW_GRANULARITY_ZONED : SEGS_PER_SEC(sbi);
sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
@@ -3924,17 +3935,25 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
sector_t nr_sectors = bdev_nr_sectors(bdev);
struct f2fs_report_zones_args rep_zone_arg;
u64 zone_sectors;
+ unsigned int max_open_zones;
int ret;
if (!f2fs_sb_has_blkzoned(sbi))
return 0;
- zone_sectors = bdev_zone_sectors(bdev);
- if (!is_power_of_2(zone_sectors)) {
- f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n");
- return -EINVAL;
+ if (bdev_is_zoned(FDEV(devi).bdev)) {
+ max_open_zones = bdev_max_open_zones(bdev);
+ if (max_open_zones && (max_open_zones < sbi->max_open_zones))
+ sbi->max_open_zones = max_open_zones;
+ if (sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
+ f2fs_err(sbi,
+ "zoned: max open zones %u is too small, need at least %u open zones",
+ sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
+ return -EINVAL;
+ }
}
+ zone_sectors = bdev_zone_sectors(bdev);
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
SECTOR_TO_BLOCK(zone_sectors))
return -EINVAL;
@@ -3974,7 +3993,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
{
struct super_block *sb = sbi->sb;
int block;
- struct buffer_head *bh;
+ struct folio *folio;
struct f2fs_super_block *super;
int err = 0;
@@ -3983,32 +4002,32 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
return -ENOMEM;
for (block = 0; block < 2; block++) {
- bh = sb_bread(sb, block);
- if (!bh) {
+ folio = read_mapping_folio(sb->s_bdev->bd_mapping, block, NULL);
+ if (IS_ERR(folio)) {
f2fs_err(sbi, "Unable to read %dth superblock",
block + 1);
- err = -EIO;
+ err = PTR_ERR(folio);
*recovery = 1;
continue;
}
/* sanity checking of raw super */
- err = sanity_check_raw_super(sbi, bh);
+ err = sanity_check_raw_super(sbi, folio, block);
if (err) {
f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
block + 1);
- brelse(bh);
+ folio_put(folio);
*recovery = 1;
continue;
}
if (!*raw_super) {
- memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
+ memcpy(super, F2FS_SUPER_BLOCK(folio, block),
sizeof(*super));
*valid_super_block = block;
*raw_super = super;
}
- brelse(bh);
+ folio_put(folio);
}
/* No valid superblock */
@@ -4022,7 +4041,8 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
{
- struct buffer_head *bh;
+ struct folio *folio;
+ pgoff_t index;
__u32 crc = 0;
int err;
@@ -4040,22 +4060,24 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
}
/* write back-up superblock first */
- bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
- if (!bh)
- return -EIO;
- err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
- brelse(bh);
+ index = sbi->valid_super_block ? 0 : 1;
+ folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ err = __f2fs_commit_super(sbi, folio, index, true);
+ folio_put(folio);
/* if we are in recovery path, skip writing valid superblock */
if (recover || err)
return err;
/* write current valid superblock */
- bh = sb_bread(sbi->sb, sbi->valid_super_block);
- if (!bh)
- return -EIO;
- err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
- brelse(bh);
+ index = sbi->valid_super_block;
+ folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ err = __f2fs_commit_super(sbi, folio, index, true);
+ folio_put(folio);
return err;
}
@@ -4090,7 +4112,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
f2fs_up_write(&sbi->sb_lock);
if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_commit_super fails to record stop_reason, err:%d",
+ err);
}
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
@@ -4133,8 +4157,9 @@ static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error)
err = f2fs_commit_super(sbi, false);
if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d",
- error, err);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_commit_super fails to record errors:%u, err:%d",
+ error, err);
out_unlock:
f2fs_up_write(&sbi->sb_lock);
}
@@ -4162,8 +4187,7 @@ static bool system_going_down(void)
|| system_state == SYSTEM_RESTART;
}
-void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
- bool irq_context)
+void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason)
{
struct super_block *sb = sbi->sb;
bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
@@ -4175,10 +4199,12 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
if (!f2fs_hw_is_readonly(sbi)) {
save_stop_reason(sbi, reason);
- if (irq_context && !shutdown)
- schedule_work(&sbi->s_error_work);
- else
- f2fs_record_stop_reason(sbi);
+ /*
+ * always create an asynchronous task to record stop_reason
+ * in order to avoid potential deadlock when running into
+ * f2fs_record_stop_reason() synchronously.
+ */
+ schedule_work(&sbi->s_error_work);
}
/*
@@ -4195,17 +4221,25 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
if (shutdown)
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
- /* continue filesystem operators if errors=continue */
- if (continue_fs || f2fs_readonly(sb))
+ /*
+ * Continue filesystem operators if errors=continue. Should not set
+ * RO by shutdown, since RO bypasses thaw_super which can hang the
+ * system.
+ */
+ if (continue_fs || f2fs_readonly(sb) || shutdown) {
+ f2fs_warn(sbi, "Stopped filesystem due to reason: %d", reason);
return;
+ }
f2fs_warn(sbi, "Remounting filesystem read-only");
+
/*
- * Make sure updated value of ->s_mount_flags will be visible before
- * ->s_flags update
+ * We have already set CP_ERROR_FLAG flag to stop all updates
+ * to filesystem, so it doesn't need to set SB_RDONLY flag here
+ * because the flag should be set covered w/ sb->s_umount semaphore
+ * via remount procedure, otherwise, it will confuse code like
+ * freeze_super() which will lead to deadlocks and other problems.
*/
- smp_wmb();
- sb->s_flags |= SB_RDONLY;
}
static void f2fs_record_error_work(struct work_struct *work)
@@ -4216,6 +4250,16 @@ static void f2fs_record_error_work(struct work_struct *work)
f2fs_record_stop_reason(sbi);
}
+static inline unsigned int get_first_zoned_segno(struct f2fs_sb_info *sbi)
+{
+ int devi;
+
+ for (devi = 0; devi < sbi->s_ndevs; devi++)
+ if (bdev_is_zoned(FDEV(devi).bdev))
+ return GET_SEGNO(sbi, FDEV(devi).start_blk);
+ return 0;
+}
+
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
@@ -4244,10 +4288,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
sbi->aligned_blksize = true;
+#ifdef CONFIG_BLK_DEV_ZONED
+ sbi->max_open_zones = UINT_MAX;
+ sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ;
+#endif
for (i = 0; i < max_devices; i++) {
if (i == 0)
- FDEV(0).bdev_handle = sbi->sb->s_bdev_handle;
+ FDEV(0).bdev_file = sbi->sb->s_bdev_file;
else if (!RDEV(i).path[0])
break;
@@ -4259,22 +4307,22 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
if (i == 0) {
FDEV(i).start_blk = 0;
FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1 +
- le32_to_cpu(raw_super->segment0_blkaddr);
+ SEGS_TO_BLKS(sbi,
+ FDEV(i).total_segments) - 1 +
+ le32_to_cpu(raw_super->segment0_blkaddr);
} else {
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1;
- FDEV(i).bdev_handle = bdev_open_by_path(
+ SEGS_TO_BLKS(sbi,
+ FDEV(i).total_segments) - 1;
+ FDEV(i).bdev_file = bdev_file_open_by_path(
FDEV(i).path, mode, sbi->sb, NULL);
}
}
- if (IS_ERR(FDEV(i).bdev_handle))
- return PTR_ERR(FDEV(i).bdev_handle);
+ if (IS_ERR(FDEV(i).bdev_file))
+ return PTR_ERR(FDEV(i).bdev_file);
- FDEV(i).bdev = FDEV(i).bdev_handle->bdev;
+ FDEV(i).bdev = file_bdev(FDEV(i).bdev_file);
/* to release errored devices */
sbi->s_ndevs = i + 1;
@@ -4305,8 +4353,6 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
FDEV(i).total_segments,
FDEV(i).start_blk, FDEV(i).end_blk);
}
- f2fs_info(sbi,
- "IO Block Size: %8ld KB", F2FS_IO_SIZE_KB(sbi));
return 0;
}
@@ -4418,15 +4464,6 @@ try_onemore:
}
mutex_init(&sbi->flush_lock);
- /* Load the checksum driver */
- sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
- if (IS_ERR(sbi->s_chksum_driver)) {
- f2fs_err(sbi, "Cannot load crc32 driver.");
- err = PTR_ERR(sbi->s_chksum_driver);
- sbi->s_chksum_driver = NULL;
- goto free_sbi;
- }
-
/* set a block size */
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
f2fs_err(sbi, "unable to set blocksize");
@@ -4496,7 +4533,8 @@ try_onemore:
sb->s_time_gran = 1;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
- memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+ super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid));
+ super_set_sysfs_name_bdev(sb);
sb->s_iflags |= SB_I_CGROUPWB;
/* init f2fs-specific super block info */
@@ -4519,19 +4557,10 @@ try_onemore:
if (err)
goto free_iostat;
- if (F2FS_IO_ALIGNED(sbi)) {
- sbi->write_io_dummy =
- mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
- if (!sbi->write_io_dummy) {
- err = -ENOMEM;
- goto free_percpu;
- }
- }
-
/* init per sbi slab cache */
err = f2fs_init_xattr_caches(sbi);
if (err)
- goto free_io_dummy;
+ goto free_percpu;
err = f2fs_init_page_array_cache(sbi);
if (err)
goto free_xattr_cache;
@@ -4619,13 +4648,12 @@ try_onemore:
goto free_nm;
}
- err = adjust_reserved_segment(sbi);
- if (err)
- goto free_nm;
-
/* For write statistics */
sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
+ /* get segno of first zoned block device */
+ sbi->first_zoned_segno = get_first_zoned_segno(sbi);
+
/* Read accumulated write IO statistics if exists */
seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
if (__exist_node_summaries(sbi))
@@ -4660,6 +4688,7 @@ try_onemore:
goto free_node_inode;
}
+ generic_set_sb_d_ops(sb);
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
err = -ENOMEM;
@@ -4746,19 +4775,23 @@ try_onemore:
reset_checkpoint:
/*
* If the f2fs is not readonly and fsync data recovery succeeds,
- * check zoned block devices' write pointer consistency.
+ * write pointer consistency of cursegs and other zones are already
+ * checked and fixed during recovery. However, if recovery fails,
+ * write pointers are left untouched, and retry-mount should check
+ * them here.
*/
- if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) {
- err = f2fs_check_write_pointer(sbi);
- if (err)
- goto free_meta;
- }
-
- f2fs_init_inmem_curseg(sbi);
+ if (skip_recovery)
+ err = f2fs_check_and_fix_write_pointer(sbi);
+ if (err)
+ goto free_meta;
/* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
+ err = f2fs_init_inmem_curseg(sbi);
+ if (err)
+ goto sync_free_meta;
+
if (test_opt(sbi, DISABLE_CHECKPOINT)) {
err = f2fs_disable_checkpoint(sbi);
if (err)
@@ -4853,8 +4886,6 @@ free_page_array_cache:
f2fs_destroy_page_array_cache(sbi);
free_xattr_cache:
f2fs_destroy_xattr_caches(sbi);
-free_io_dummy:
- mempool_destroy(sbi->write_io_dummy);
free_percpu:
destroy_percpu_info(sbi);
free_iostat:
@@ -4877,8 +4908,6 @@ free_options:
free_sb_buf:
kfree(raw_super);
free_sbi:
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi);
sb->s_fs_info = NULL;
@@ -4967,12 +4996,6 @@ static int __init init_f2fs_fs(void)
{
int err;
- if (PAGE_SIZE != F2FS_BLKSIZE) {
- printk("F2FS not supported on PAGE_SIZE(%lu) != BLOCK_SIZE(%lu)\n",
- PAGE_SIZE, F2FS_BLKSIZE);
- return -EINVAL;
- }
-
err = init_inodecache();
if (err)
goto fail;
@@ -5000,9 +5023,6 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_shrinker();
if (err)
goto free_sysfs;
- err = register_filesystem(&f2fs_fs_type);
- if (err)
- goto free_shrinker;
f2fs_create_root_stats();
err = f2fs_init_post_read_processing();
if (err)
@@ -5025,7 +5045,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_create_casefold_cache();
if (err)
goto free_compress_cache;
+ err = register_filesystem(&f2fs_fs_type);
+ if (err)
+ goto free_casefold_cache;
return 0;
+free_casefold_cache:
+ f2fs_destroy_casefold_cache();
free_compress_cache:
f2fs_destroy_compress_cache();
free_compress_mempool:
@@ -5040,8 +5065,6 @@ free_post_read:
f2fs_destroy_post_read_processing();
free_root_stats:
f2fs_destroy_root_stats();
- unregister_filesystem(&f2fs_fs_type);
-free_shrinker:
f2fs_exit_shrinker();
free_sysfs:
f2fs_exit_sysfs();
@@ -5065,6 +5088,7 @@ fail:
static void __exit exit_f2fs_fs(void)
{
+ unregister_filesystem(&f2fs_fs_type);
f2fs_destroy_casefold_cache();
f2fs_destroy_compress_cache();
f2fs_destroy_compress_mempool();
@@ -5073,7 +5097,6 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_iostat_processing();
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
- unregister_filesystem(&f2fs_fs_type);
f2fs_exit_shrinker();
f2fs_exit_sysfs();
f2fs_destroy_garbage_collection_cache();
@@ -5091,5 +5114,3 @@ module_exit(exit_f2fs_fs)
MODULE_AUTHOR("Samsung Electronics's Praesto Team");
MODULE_DESCRIPTION("Flash Friendly File System");
MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: crc32");
-