summaryrefslogtreecommitdiff
path: root/fs/f2fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/checkpoint.c53
-rw-r--r--fs/f2fs/compress.c43
-rw-r--r--fs/f2fs/data.c59
-rw-r--r--fs/f2fs/dir.c17
-rw-r--r--fs/f2fs/extent_cache.c15
-rw-r--r--fs/f2fs/f2fs.h94
-rw-r--r--fs/f2fs/file.c49
-rw-r--r--fs/f2fs/gc.c25
-rw-r--r--fs/f2fs/node.c77
-rw-r--r--fs/f2fs/node.h1
-rw-r--r--fs/f2fs/recovery.c2
-rw-r--r--fs/f2fs/segment.c30
-rw-r--r--fs/f2fs/segment.h28
-rw-r--r--fs/f2fs/super.c137
-rw-r--r--fs/f2fs/sysfs.c119
-rw-r--r--fs/f2fs/verity.c2
16 files changed, 590 insertions, 161 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index db3831f7f2f5..bbe07e3a6c75 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1442,6 +1442,34 @@ u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi)
return get_sectors_written(sbi->sb->s_bdev);
}
+static inline void stat_cp_time(struct cp_control *cpc, enum cp_time type)
+{
+ cpc->stats.times[type] = ktime_get();
+}
+
+static inline void check_cp_time(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+ unsigned long long sb_diff, cur_diff;
+ enum cp_time ct;
+
+ sb_diff = (u64)ktime_ms_delta(sbi->cp_stats.times[CP_TIME_END],
+ sbi->cp_stats.times[CP_TIME_START]);
+ cur_diff = (u64)ktime_ms_delta(cpc->stats.times[CP_TIME_END],
+ cpc->stats.times[CP_TIME_START]);
+
+ if (cur_diff > sb_diff) {
+ sbi->cp_stats = cpc->stats;
+ if (cur_diff < CP_LONG_LATENCY_THRESHOLD)
+ return;
+
+ f2fs_warn(sbi, "checkpoint was blocked for %llu ms", cur_diff);
+ for (ct = CP_TIME_START; ct < CP_TIME_MAX - 1; ct++)
+ f2fs_warn(sbi, "Step#%d: %llu ms", ct,
+ (u64)ktime_ms_delta(cpc->stats.times[ct + 1],
+ cpc->stats.times[ct]));
+ }
+}
+
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1459,6 +1487,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Flush all the NAT/SIT pages */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ stat_cp_time(cpc, CP_TIME_SYNC_META);
+
/* start to update checkpoint, cp ver is already updated previously */
ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
@@ -1555,20 +1585,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
+ stat_cp_time(cpc, CP_TIME_SYNC_CP_META);
+
/* Wait for all dirty meta pages to be submitted for IO */
f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
+ stat_cp_time(cpc, CP_TIME_WAIT_DIRTY_META);
/* wait for previous submitted meta pages writeback */
f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
+ stat_cp_time(cpc, CP_TIME_WAIT_CP_DATA);
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
if (err)
return err;
+ stat_cp_time(cpc, CP_TIME_FLUSH_DEVICE);
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
+ stat_cp_time(cpc, CP_TIME_WAIT_LAST_CP);
/*
* invalidate intermediate page cache borrowed from meta inode which are
@@ -1613,6 +1649,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned long long ckpt_ver;
int err = 0;
+ stat_cp_time(cpc, CP_TIME_START);
+
if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi))
return -EROFS;
@@ -1624,6 +1662,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (cpc->reason != CP_RESIZE)
f2fs_down_write(&sbi->cp_global_sem);
+ stat_cp_time(cpc, CP_TIME_LOCK);
+
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
@@ -1639,6 +1679,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (err)
goto out;
+ stat_cp_time(cpc, CP_TIME_OP_LOCK);
+
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
f2fs_flush_merged_writes(sbi);
@@ -1678,6 +1720,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_sit_entries(sbi, cpc);
+ stat_cp_time(cpc, CP_TIME_FLUSH_META);
+
/* save inmem log status */
f2fs_save_inmem_curseg(sbi);
@@ -1695,6 +1739,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
stat_inc_cp_count(sbi);
stop:
unblock_operations(sbi);
+ stat_cp_time(cpc, CP_TIME_END);
+ check_cp_time(sbi, cpc);
if (cpc->reason & CP_RECOVERY)
f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);
@@ -1778,6 +1824,7 @@ static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
llist_for_each_entry_safe(req, next, dispatch_list, llnode) {
diff = (u64)ktime_ms_delta(ktime_get(), req->queue_time);
req->ret = ret;
+ req->delta_time = diff;
complete(&req->wait);
sum_diff += diff;
@@ -1873,6 +1920,12 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
else
flush_remained_ckpt_reqs(sbi, &req);
+ if (unlikely(req.delta_time >= CP_LONG_LATENCY_THRESHOLD)) {
+ f2fs_warn_ratelimited(sbi,
+ "blocked on checkpoint for %u ms", cprc->peak_time);
+ dump_stack();
+ }
+
return req.ret;
}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 5c1f47e45dab..6ad8d3bc6df7 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1215,9 +1215,11 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
{
void *fsdata = NULL;
struct page *pagep;
+ struct page **rpages;
int log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
pgoff_t start_idx = from >> (PAGE_SHIFT + log_cluster_size) <<
log_cluster_size;
+ int i;
int err;
err = f2fs_is_compressed_cluster(inode, start_idx);
@@ -1238,27 +1240,30 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
if (err <= 0)
return err;
- if (err > 0) {
- struct page **rpages = fsdata;
- int cluster_size = F2FS_I(inode)->i_cluster_size;
- int i;
-
- for (i = cluster_size - 1; i >= 0; i--) {
- struct folio *folio = page_folio(rpages[i]);
- loff_t start = folio->index << PAGE_SHIFT;
-
- if (from <= start) {
- folio_zero_segment(folio, 0, folio_size(folio));
- } else {
- folio_zero_segment(folio, from - start,
- folio_size(folio));
- break;
- }
- }
+ rpages = fsdata;
+
+ for (i = (1 << log_cluster_size) - 1; i >= 0; i--) {
+ struct folio *folio = page_folio(rpages[i]);
+ loff_t start = (loff_t)folio->index << PAGE_SHIFT;
+ loff_t offset = from > start ? from - start : 0;
- f2fs_compress_write_end(inode, fsdata, start_idx, true);
+ folio_zero_segment(folio, offset, folio_size(folio));
+
+ if (from >= start)
+ break;
}
- return 0;
+
+ f2fs_compress_write_end(inode, fsdata, start_idx, true);
+
+ err = filemap_write_and_wait_range(inode->i_mapping,
+ round_down(from, 1 << log_cluster_size << PAGE_SHIFT),
+ LLONG_MAX);
+ if (err)
+ return err;
+
+ truncate_pagecache(inode, from);
+
+ return f2fs_do_truncate_blocks(inode, round_up(from, PAGE_SIZE), lock);
}
static int f2fs_write_compressed_pages(struct compress_ctx *cc,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7961e0ddfca3..ef38e62cda8f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -733,9 +733,11 @@ static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
static bool io_type_is_mergeable(struct f2fs_bio_info *io,
struct f2fs_io_info *fio)
{
+ blk_opf_t mask = ~(REQ_PREFLUSH | REQ_FUA);
+
if (io->fio.op != fio->op)
return false;
- return io->fio.op_flags == fio->op_flags;
+ return (io->fio.op_flags & mask) == (fio->op_flags & mask);
}
static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
@@ -911,7 +913,7 @@ alloc_new:
if (fio->io_wbc)
wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio));
- inc_page_count(fio->sbi, WB_DATA_TYPE(data_folio, false));
+ inc_page_count(fio->sbi, WB_DATA_TYPE(folio, false));
*fio->last_block = fio->new_blkaddr;
*fio->bio = bio;
@@ -1083,7 +1085,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
}
/* This can handle encryption stuffs */
-static int f2fs_submit_page_read(struct inode *inode, struct folio *folio,
+static void f2fs_submit_page_read(struct inode *inode, struct folio *folio,
block_t blkaddr, blk_opf_t op_flags,
bool for_write)
{
@@ -1092,23 +1094,16 @@ static int f2fs_submit_page_read(struct inode *inode, struct folio *folio,
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
folio->index, for_write);
- if (IS_ERR(bio))
- return PTR_ERR(bio);
/* wait for GCed page writeback via META_MAPPING */
f2fs_wait_on_block_writeback(inode, blkaddr);
- if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) {
- iostat_update_and_unbind_ctx(bio);
- if (bio->bi_private)
- mempool_free(bio->bi_private, bio_post_read_ctx_pool);
- bio_put(bio);
- return -EFAULT;
- }
+ if (!bio_add_folio(bio, folio, PAGE_SIZE, 0))
+ f2fs_bug_on(sbi, 1);
+
inc_page_count(sbi, F2FS_RD_DATA);
f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
f2fs_submit_read_bio(sbi, bio, DATA);
- return 0;
}
static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
@@ -1265,10 +1260,8 @@ got_it:
return folio;
}
- err = f2fs_submit_page_read(inode, folio, dn.data_blkaddr,
+ f2fs_submit_page_read(inode, folio, dn.data_blkaddr,
op_flags, for_write);
- if (err)
- goto put_err;
return folio;
put_err:
@@ -1572,6 +1565,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
pgofs = (pgoff_t)map->m_lblk;
end = pgofs + maxblocks;
+ if (flag == F2FS_GET_BLOCK_PRECACHE)
+ mode = LOOKUP_NODE_RA;
+
next_dnode:
if (map->m_may_create) {
if (f2fs_lfs_mode(sbi))
@@ -1778,12 +1774,13 @@ sync_out:
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
- f2fs_update_read_extent_cache_range(&dn,
- start_pgofs, map->m_pblk + ofs,
- map->m_len - ofs);
+ if (map->m_len > ofs)
+ f2fs_update_read_extent_cache_range(&dn,
+ start_pgofs, map->m_pblk + ofs,
+ map->m_len - ofs);
}
if (map->m_next_extent)
- *map->m_next_extent = pgofs + 1;
+ *map->m_next_extent = is_hole ? pgofs + 1 : pgofs;
}
f2fs_put_dnode(&dn);
unlock_out:
@@ -2145,16 +2142,10 @@ submit_and_realloc:
f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
}
- if (bio == NULL) {
+ if (bio == NULL)
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
f2fs_ra_op_flags(rac), index,
false);
- if (IS_ERR(bio)) {
- ret = PTR_ERR(bio);
- bio = NULL;
- goto out;
- }
- }
/*
* If the page is under writeback, we need to wait for
@@ -2303,18 +2294,10 @@ submit_and_realloc:
bio = NULL;
}
- if (!bio) {
+ if (!bio)
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i,
f2fs_ra_op_flags(rac),
folio->index, for_write);
- if (IS_ERR(bio)) {
- ret = PTR_ERR(bio);
- f2fs_decompress_end_io(dic, ret, true);
- f2fs_put_dnode(&dn);
- *bio_ret = NULL;
- return ret;
- }
- }
if (!bio_add_folio(bio, folio, blocksize, 0))
goto submit_and_realloc;
@@ -3639,11 +3622,9 @@ repeat:
err = -EFSCORRUPTED;
goto put_folio;
}
- err = f2fs_submit_page_read(use_cow ?
+ f2fs_submit_page_read(use_cow ?
F2FS_I(inode)->cow_inode : inode,
folio, blkaddr, 0, true);
- if (err)
- goto put_folio;
folio_lock(folio);
if (unlikely(folio->mapping != mapping)) {
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index fffd7749d6d1..48f4f98afb01 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -16,6 +16,21 @@
#include "xattr.h"
#include <trace/events/f2fs.h>
+static inline bool f2fs_should_fallback_to_linear(struct inode *dir)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+
+ switch (F2FS_OPTION(sbi).lookup_mode) {
+ case LOOKUP_PERF:
+ return false;
+ case LOOKUP_COMPAT:
+ return true;
+ case LOOKUP_AUTO:
+ return !sb_no_casefold_compat_fallback(sbi->sb);
+ }
+ return false;
+}
+
#if IS_ENABLED(CONFIG_UNICODE)
extern struct kmem_cache *f2fs_cf_name_slab;
#endif
@@ -366,7 +381,7 @@ start_find_entry:
out:
#if IS_ENABLED(CONFIG_UNICODE)
- if (!sb_no_casefold_compat_fallback(dir->i_sb) &&
+ if (f2fs_should_fallback_to_linear(dir) &&
IS_CASEFOLDED(dir) && !de && use_hash) {
use_hash = false;
goto start_find_entry;
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 199c1e7a83ef..33e09c453c70 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -604,7 +604,13 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
p = &(*p)->rb_right;
leftmost = false;
} else {
+ f2fs_err_ratelimited(sbi, "%s: corrupted extent, type: %d, "
+ "extent node in rb tree [%u, %u, %u], age [%llu, %llu], "
+ "extent node to insert [%u, %u, %u], age [%llu, %llu]",
+ __func__, et->type, en->ei.fofs, en->ei.blk, en->ei.len, en->ei.age,
+ en->ei.last_blocks, ei->fofs, ei->blk, ei->len, ei->age, ei->last_blocks);
f2fs_bug_on(sbi, 1);
+ return NULL;
}
}
@@ -664,6 +670,15 @@ static void __update_extent_tree_range(struct inode *inode,
if (!et)
return;
+ if (unlikely(len == 0)) {
+ f2fs_err_ratelimited(sbi, "%s: extent len is zero, type: %d, "
+ "extent [%u, %u, %u], age [%llu, %llu]",
+ __func__, type, tei->fofs, tei->blk, tei->len,
+ tei->age, tei->last_blocks);
+ f2fs_bug_on(sbi, 1);
+ return;
+ }
+
if (type == EX_READ)
trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
tei->blk, 0);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 46be7560548c..5b4e9548a231 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -131,6 +131,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
* string rather than using the MS_LAZYTIME flag, so this must remain.
*/
#define F2FS_MOUNT_LAZYTIME 0x40000000
+#define F2FS_MOUNT_RESERVE_NODE 0x80000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -155,6 +156,18 @@ enum blkzone_allocation_policy {
BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */
};
+enum bggc_io_aware_policy {
+ AWARE_ALL_IO, /* skip background GC if there is any kind of pending IO */
+ AWARE_READ_IO, /* skip background GC if there is pending read IO */
+ AWARE_NONE, /* don't aware IO for background GC */
+};
+
+enum device_allocation_policy {
+ ALLOCATE_FORWARD_NOHINT,
+ ALLOCATE_FORWARD_WITHIN_HINT,
+ ALLOCATE_FORWARD_FROM_HINT,
+};
+
/*
* An implementation of an rwsem that is explicitly unfair to readers. This
* prevents priority inversion when a low-priority reader acquires the read lock
@@ -172,6 +185,7 @@ struct f2fs_rwsem {
struct f2fs_mount_info {
unsigned int opt;
block_t root_reserved_blocks; /* root reserved blocks */
+ block_t root_reserved_nodes; /* root reserved nodes */
kuid_t s_resuid; /* reserved blocks for uid */
kgid_t s_resgid; /* reserved blocks for gid */
int active_logs; /* # of active logs */
@@ -212,6 +226,7 @@ struct f2fs_mount_info {
int compress_mode; /* compression mode */
unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
+ unsigned int lookup_mode;
};
#define F2FS_FEATURE_ENCRYPT 0x00000001
@@ -266,14 +281,36 @@ enum {
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
+#define DEF_ENABLE_INTERVAL 16 /* 16 secs */
#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */
#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
+enum cp_time {
+ CP_TIME_START, /* begin */
+ CP_TIME_LOCK, /* after cp_global_sem */
+ CP_TIME_OP_LOCK, /* after block_operation */
+ CP_TIME_FLUSH_META, /* after flush sit/nat */
+ CP_TIME_SYNC_META, /* after sync_meta_pages */
+ CP_TIME_SYNC_CP_META, /* after sync cp meta pages */
+ CP_TIME_WAIT_DIRTY_META,/* after wait on dirty meta */
+ CP_TIME_WAIT_CP_DATA, /* after wait on cp data */
+ CP_TIME_FLUSH_DEVICE, /* after flush device cache */
+ CP_TIME_WAIT_LAST_CP, /* after wait on last cp pack */
+ CP_TIME_END, /* after unblock_operation */
+ CP_TIME_MAX,
+};
+
+/* time cost stats of checkpoint */
+struct cp_stats {
+ ktime_t times[CP_TIME_MAX];
+};
+
struct cp_control {
int reason;
__u64 trim_start;
__u64 trim_end;
__u64 trim_minlen;
+ struct cp_stats stats;
};
/*
@@ -334,7 +371,10 @@ struct ckpt_req {
struct completion wait; /* completion for checkpoint done */
struct llist_node llnode; /* llist_node to be linked in wait queue */
int ret; /* return code of checkpoint */
- ktime_t queue_time; /* request queued time */
+ union {
+ ktime_t queue_time; /* request queued time */
+ ktime_t delta_time; /* time in queue */
+ };
};
struct ckpt_req_control {
@@ -350,6 +390,9 @@ struct ckpt_req_control {
unsigned int peak_time; /* peak wait time in msec until now */
};
+/* a time threshold that checkpoint was blocked for, unit: ms */
+#define CP_LONG_LATENCY_THRESHOLD 5000
+
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
@@ -907,6 +950,12 @@ struct f2fs_inode_info {
unsigned int atomic_write_cnt;
loff_t original_i_size; /* original i_size before atomic write */
+#ifdef CONFIG_FS_ENCRYPTION
+ struct fscrypt_inode_info *i_crypt_info; /* filesystem encryption info */
+#endif
+#ifdef CONFIG_FS_VERITY
+ struct fsverity_info *i_verity_info; /* filesystem verity info */
+#endif
};
static inline void get_read_extent_info(struct extent_info *ext,
@@ -1369,6 +1418,7 @@ enum {
DISCARD_TIME,
GC_TIME,
DISABLE_TIME,
+ ENABLE_TIME,
UMOUNT_DISCARD_TIMEOUT,
MAX_TIME,
};
@@ -1448,6 +1498,12 @@ enum {
TOTAL_CALL = FOREGROUND,
};
+enum f2fs_lookup_mode {
+ LOOKUP_PERF,
+ LOOKUP_COMPAT,
+ LOOKUP_AUTO,
+};
+
static inline int f2fs_test_bit(unsigned int nr, char *addr);
static inline void f2fs_set_bit(unsigned int nr, char *addr);
static inline void f2fs_clear_bit(unsigned int nr, char *addr);
@@ -1637,6 +1693,7 @@ struct f2fs_sb_info {
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
long interval_time[MAX_TIME]; /* to store thresholds */
struct ckpt_req_control cprc_info; /* for checkpoint request control */
+ struct cp_stats cp_stats; /* for time stat of checkpoint */
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
@@ -1804,6 +1861,9 @@ struct f2fs_sb_info {
spinlock_t dev_lock; /* protect dirty_device */
bool aligned_blksize; /* all devices has the same logical blksize */
unsigned int first_seq_zone_segno; /* first segno in sequential zone */
+ unsigned int bggc_io_aware; /* For adjust the BG_GC priority when pending IO */
+ unsigned int allocate_section_hint; /* the boundary position between devices */
+ unsigned int allocate_section_policy; /* determine the section writing priority */
/* For write statistics */
u64 sectors_written_start;
@@ -2356,13 +2416,11 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
return ofs == XATTR_NODE_OFFSET;
}
-static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
+static inline bool __allow_reserved_root(struct f2fs_sb_info *sbi,
struct inode *inode, bool cap)
{
if (!inode)
return true;
- if (!test_opt(sbi, RESERVE_ROOT))
- return false;
if (IS_NOQUOTA(inode))
return true;
if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid()))
@@ -2383,7 +2441,7 @@ static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi,
avail_user_block_count = sbi->user_block_count -
sbi->current_reserved_blocks;
- if (!__allow_reserved_blocks(sbi, inode, cap))
+ if (test_opt(sbi, RESERVE_ROOT) && !__allow_reserved_root(sbi, inode, cap))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
@@ -2741,7 +2799,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
struct inode *inode, bool is_inode)
{
block_t valid_block_count;
- unsigned int valid_node_count;
+ unsigned int valid_node_count, avail_user_node_count;
unsigned int avail_user_block_count;
int err;
@@ -2763,15 +2821,20 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
valid_block_count = sbi->total_valid_block_count + 1;
- avail_user_block_count = get_available_block_count(sbi, inode, false);
+ avail_user_block_count = get_available_block_count(sbi, inode,
+ test_opt(sbi, RESERVE_NODE));
if (unlikely(valid_block_count > avail_user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
}
+ avail_user_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+ if (test_opt(sbi, RESERVE_NODE) &&
+ !__allow_reserved_root(sbi, inode, true))
+ avail_user_node_count -= F2FS_OPTION(sbi).root_reserved_nodes;
valid_node_count = sbi->total_valid_node_count + 1;
- if (unlikely(valid_node_count > sbi->total_node_count)) {
+ if (unlikely(valid_node_count > avail_user_node_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
}
@@ -2998,13 +3061,10 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
if (sbi->gc_mode == GC_URGENT_HIGH)
return true;
- if (zoned_gc) {
- if (is_inflight_read_io(sbi))
- return false;
- } else {
- if (is_inflight_io(sbi, type))
- return false;
- }
+ if (sbi->bggc_io_aware == AWARE_READ_IO && is_inflight_read_io(sbi))
+ return false;
+ if (sbi->bggc_io_aware == AWARE_ALL_IO && is_inflight_io(sbi, type))
+ return false;
if (sbi->gc_mode == GC_URGENT_MID)
return true;
@@ -3764,6 +3824,7 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname);
* node.c
*/
struct node_info;
+enum node_type;
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
@@ -3786,7 +3847,8 @@ int f2fs_remove_inode_page(struct inode *inode);
struct folio *f2fs_new_inode_folio(struct inode *inode);
struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs);
void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
-struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid);
+struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid,
+ enum node_type node_type);
struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino);
struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid);
int f2fs_move_node_folio(struct folio *node_folio, int gc_type);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 42faaed6a02d..ffa045b39c01 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,15 +35,23 @@
#include <trace/events/f2fs.h>
#include <uapi/linux/f2fs.h>
-static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
+static void f2fs_zero_post_eof_page(struct inode *inode,
+ loff_t new_size, bool lock)
{
loff_t old_size = i_size_read(inode);
if (old_size >= new_size)
return;
+ if (mapping_empty(inode->i_mapping))
+ return;
+
+ if (lock)
+ filemap_invalidate_lock(inode->i_mapping);
/* zero or drop pages only in range of [old_size, new_size] */
- truncate_pagecache(inode, old_size);
+ truncate_inode_pages_range(inode->i_mapping, old_size, new_size);
+ if (lock)
+ filemap_invalidate_unlock(inode->i_mapping);
}
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
@@ -114,9 +122,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true);
file_update_time(vmf->vma->vm_file);
filemap_invalidate_lock_shared(inode->i_mapping);
@@ -904,8 +910,16 @@ int f2fs_truncate(struct inode *inode)
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
- if (err)
+ if (err) {
+ /*
+ * Always truncate page #0 to avoid page cache
+ * leak in evict() path.
+ */
+ truncate_inode_pages_range(inode->i_mapping,
+ F2FS_BLK_TO_BYTES(0),
+ F2FS_BLK_END_BYTES(0));
return err;
+ }
}
err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
@@ -1141,7 +1155,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
filemap_invalidate_lock(inode->i_mapping);
if (attr->ia_size > old_size)
- f2fs_zero_post_eof_page(inode, attr->ia_size);
+ f2fs_zero_post_eof_page(inode, attr->ia_size, false);
truncate_setsize(inode, attr->ia_size);
if (attr->ia_size <= old_size)
@@ -1260,9 +1274,7 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len, true);
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1547,7 +1559,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
+ f2fs_zero_post_eof_page(inode, offset + len, false);
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
@@ -1670,9 +1682,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
- filemap_invalidate_lock(mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
- filemap_invalidate_unlock(mapping);
+ f2fs_zero_post_eof_page(inode, offset + len, true);
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1806,7 +1816,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
+ f2fs_zero_post_eof_page(inode, offset + len, false);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@@ -1864,9 +1874,7 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
if (err)
return err;
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len, true);
f2fs_balance_fs(sbi, true);
@@ -4914,9 +4922,8 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
if (err)
return err;
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode,
+ iocb->ki_pos + iov_iter_count(from), true);
return count;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 098e9f71421e..a7708cf80c04 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1071,7 +1071,7 @@ next_step:
}
/* phase == 2 */
- node_folio = f2fs_get_node_folio(sbi, nid);
+ node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
if (IS_ERR(node_folio))
continue;
@@ -1145,7 +1145,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
nid = le32_to_cpu(sum->nid);
ofs_in_node = le16_to_cpu(sum->ofs_in_node);
- node_folio = f2fs_get_node_folio(sbi, nid);
+ node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
if (IS_ERR(node_folio))
return false;
@@ -1794,6 +1794,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct folio *sum_folio = filemap_get_folio(META_MAPPING(sbi),
GET_SUM_BLOCK(sbi, segno));
+ if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) {
+ f2fs_err(sbi, "%s: segment %u is used by log",
+ __func__, segno);
+ f2fs_bug_on(sbi, 1);
+ goto skip;
+ }
+
if (get_valid_blocks(sbi, segno, false) == 0)
goto freed;
if (gc_type == BG_GC && __is_large_section(sbi) &&
@@ -1805,7 +1812,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
sum = folio_address(sum_folio);
if (type != GET_SUM_TYPE((&sum->footer))) {
- f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
+ f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SIT and SSA",
segno, type, GET_SUM_TYPE((&sum->footer)));
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_CORRUPTED_SUMMARY);
@@ -2068,6 +2075,13 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
+ /*
+ * avoid migrating empty section, as it can be allocated by
+ * log in parallel.
+ */
+ if (!get_valid_blocks(sbi, segno, true))
+ continue;
+
if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno)))
continue;
@@ -2182,6 +2196,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
MAIN_SECS(sbi) += secs;
+ if (sbi->allocate_section_hint > MAIN_SECS(sbi))
+ sbi->allocate_section_hint = MAIN_SECS(sbi);
FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks);
@@ -2189,6 +2205,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
if (f2fs_is_multi_device(sbi)) {
int last_dev = sbi->s_ndevs - 1;
+ sbi->allocate_section_hint = FDEV(0).total_segments /
+ SEGS_PER_SEC(sbi);
+
FDEV(last_dev).total_segments =
(int)FDEV(last_dev).total_segments + segs;
FDEV(last_dev).end_blk =
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 27743b93e186..482a362f2625 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -27,12 +27,17 @@ static struct kmem_cache *free_nid_slab;
static struct kmem_cache *nat_entry_set_slab;
static struct kmem_cache *fsync_node_entry_slab;
+static inline bool is_invalid_nid(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ return nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid;
+}
+
/*
* Check whether the given nid is within node id range.
*/
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
{
- if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
+ if (unlikely(is_invalid_nid(sbi, nid))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
__func__, nid);
@@ -871,7 +876,8 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
}
if (!done) {
- nfolio[i] = f2fs_get_node_folio(sbi, nids[i]);
+ nfolio[i] = f2fs_get_node_folio(sbi, nids[i],
+ NODE_TYPE_NON_INODE);
if (IS_ERR(nfolio[i])) {
err = PTR_ERR(nfolio[i]);
f2fs_folio_put(nfolio[0], false);
@@ -989,7 +995,7 @@ static int truncate_dnode(struct dnode_of_data *dn)
return 1;
/* get direct node */
- folio = f2fs_get_node_folio(sbi, dn->nid);
+ folio = f2fs_get_node_folio(sbi, dn->nid, NODE_TYPE_NON_INODE);
if (PTR_ERR(folio) == -ENOENT)
return 1;
else if (IS_ERR(folio))
@@ -1033,7 +1039,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
- folio = f2fs_get_node_folio(F2FS_I_SB(dn->inode), dn->nid);
+ folio = f2fs_get_node_folio(F2FS_I_SB(dn->inode), dn->nid,
+ NODE_TYPE_NON_INODE);
if (IS_ERR(folio)) {
trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(folio));
return PTR_ERR(folio);
@@ -1111,7 +1118,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
/* get indirect nodes in the path */
for (i = 0; i < idx + 1; i++) {
/* reference count'll be increased */
- folios[i] = f2fs_get_node_folio(F2FS_I_SB(dn->inode), nid[i]);
+ folios[i] = f2fs_get_node_folio(F2FS_I_SB(dn->inode), nid[i],
+ NODE_TYPE_NON_INODE);
if (IS_ERR(folios[i])) {
err = PTR_ERR(folios[i]);
idx = i - 1;
@@ -1496,21 +1504,37 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi,
struct folio *folio, pgoff_t nid,
enum node_type ntype)
{
- if (unlikely(nid != nid_of_node(folio) ||
- (ntype == NODE_TYPE_INODE && !IS_INODE(folio)) ||
- (ntype == NODE_TYPE_XATTR &&
- !f2fs_has_xattr_block(ofs_of_node(folio))) ||
- time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) {
- f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, "
- "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
- ntype, nid, nid_of_node(folio), ino_of_node(folio),
- ofs_of_node(folio), cpver_of_node(folio),
- next_blkaddr_of_node(folio));
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
- return -EFSCORRUPTED;
+ if (unlikely(nid != nid_of_node(folio)))
+ goto out_err;
+
+ switch (ntype) {
+ case NODE_TYPE_INODE:
+ if (!IS_INODE(folio))
+ goto out_err;
+ break;
+ case NODE_TYPE_XATTR:
+ if (!f2fs_has_xattr_block(ofs_of_node(folio)))
+ goto out_err;
+ break;
+ case NODE_TYPE_NON_INODE:
+ if (IS_INODE(folio))
+ goto out_err;
+ break;
+ default:
+ break;
}
+ if (time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))
+ goto out_err;
return 0;
+out_err:
+ f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, "
+ "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
+ ntype, nid, nid_of_node(folio), ino_of_node(folio),
+ ofs_of_node(folio), cpver_of_node(folio),
+ next_blkaddr_of_node(folio));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
+ return -EFSCORRUPTED;
}
static struct folio *__get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid,
@@ -1546,7 +1570,7 @@ repeat:
if (unlikely(!folio_test_uptodate(folio))) {
err = -EIO;
- goto out_err;
+ goto out_put_err;
}
if (!f2fs_inode_chksum_verify(sbi, folio)) {
@@ -1567,9 +1591,10 @@ out_put_err:
return ERR_PTR(err);
}
-struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid)
+struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid,
+ enum node_type node_type)
{
- return __get_node_folio(sbi, nid, NULL, 0, NODE_TYPE_REGULAR);
+ return __get_node_folio(sbi, nid, NULL, 0, node_type);
}
struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino)
@@ -2634,6 +2659,16 @@ retry:
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
i = list_first_entry(&nm_i->free_nid_list,
struct free_nid, list);
+
+ if (unlikely(is_invalid_nid(sbi, i->nid))) {
+ spin_unlock(&nm_i->nid_list_lock);
+ f2fs_err(sbi, "Corrupted nid %u in free_nid_list",
+ i->nid);
+ f2fs_stop_checkpoint(sbi, false,
+ STOP_CP_REASON_CORRUPTED_NID);
+ return false;
+ }
+
*nid = i->nid;
__move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 030390543b54..9cb8dcf8d417 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -57,6 +57,7 @@ enum node_type {
NODE_TYPE_REGULAR,
NODE_TYPE_INODE,
NODE_TYPE_XATTR,
+ NODE_TYPE_NON_INODE,
};
/*
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 4cb3a91801b4..215e442db72c 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -548,7 +548,7 @@ got_it:
}
/* Get the node page */
- node_folio = f2fs_get_node_folio(sbi, nid);
+ node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
if (IS_ERR(node_folio))
return PTR_ERR(node_folio);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index cc82d42ef14c..b45eace879d7 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2774,6 +2774,8 @@ static int get_new_segment(struct f2fs_sb_info *sbi,
unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
+ unsigned int alloc_policy = sbi->allocate_section_policy;
+ unsigned int alloc_hint = sbi->allocate_section_hint;
bool init = true;
int i;
int ret = 0;
@@ -2807,6 +2809,21 @@ static int get_new_segment(struct f2fs_sb_info *sbi,
}
#endif
+ /*
+ * Prevent allocate_section_hint from exceeding MAIN_SECS()
+ * due to desynchronization.
+ */
+ if (alloc_policy != ALLOCATE_FORWARD_NOHINT &&
+ alloc_hint > MAIN_SECS(sbi))
+ alloc_hint = MAIN_SECS(sbi);
+
+ if (alloc_policy == ALLOCATE_FORWARD_FROM_HINT &&
+ hint < alloc_hint)
+ hint = alloc_hint;
+ else if (alloc_policy == ALLOCATE_FORWARD_WITHIN_HINT &&
+ hint >= alloc_hint)
+ hint = 0;
+
find_other_zone:
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
@@ -3672,7 +3689,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
- f2fs_is_cow_file(inode))
+ f2fs_is_cow_file(inode) ||
+ is_inode_flag_set(inode, FI_NEED_IPU))
return CURSEG_HOT_DATA;
return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
inode->i_write_hint);
@@ -3936,12 +3954,18 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
int seg_type = log_type_to_seg_type(type);
bool keep_order = (f2fs_lfs_mode(fio->sbi) &&
seg_type == CURSEG_COLD_DATA);
+ int err;
if (keep_order)
f2fs_down_read(&fio->sbi->io_order_lock);
- if (f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr,
- &fio->new_blkaddr, sum, type, fio)) {
+ err = f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr,
+ &fio->new_blkaddr, sum, type, fio);
+ if (unlikely(err)) {
+ f2fs_err_ratelimited(fio->sbi,
+ "%s Failed to allocate data block, ino:%u, index:%lu, type:%d, old_blkaddr:0x%x, new_blkaddr:0x%x, err:%d",
+ __func__, fio->ino, folio->index, type,
+ fio->old_blkaddr, fio->new_blkaddr, err);
if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host))
fscrypt_finalize_bounce_page(&fio->encrypted_page);
folio_end_writeback(folio);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5e2ee5c686b1..1ce2c8abaf48 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -600,6 +600,16 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
}
+static inline unsigned int get_left_section_blocks(struct f2fs_sb_info *sbi,
+ enum log_type type, unsigned int segno)
+{
+ if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
+ return CAP_BLKS_PER_SEC(sbi) - SEGS_TO_BLKS(sbi,
+ (segno - GET_START_SEG_FROM_SEC(sbi, segno))) -
+ CURSEG_I(sbi, type)->next_blkoff;
+ return CAP_BLKS_PER_SEC(sbi) - get_ckpt_valid_blocks(sbi, segno, true);
+}
+
static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
unsigned int node_blocks, unsigned int data_blocks,
unsigned int dent_blocks)
@@ -614,14 +624,7 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
if (unlikely(segno == NULL_SEGNO))
return false;
- if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) {
- left_blocks = CAP_BLKS_PER_SEC(sbi) -
- SEGS_TO_BLKS(sbi, (segno - GET_START_SEG_FROM_SEC(sbi, segno))) -
- CURSEG_I(sbi, i)->next_blkoff;
- } else {
- left_blocks = CAP_BLKS_PER_SEC(sbi) -
- get_ckpt_valid_blocks(sbi, segno, true);
- }
+ left_blocks = get_left_section_blocks(sbi, i, segno);
blocks = i <= CURSEG_COLD_DATA ? data_blocks : node_blocks;
if (blocks > left_blocks)
@@ -634,14 +637,7 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
if (unlikely(segno == NULL_SEGNO))
return false;
- if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) {
- left_blocks = CAP_BLKS_PER_SEC(sbi) -
- SEGS_TO_BLKS(sbi, (segno - GET_START_SEG_FROM_SEC(sbi, segno))) -
- CURSEG_I(sbi, CURSEG_HOT_DATA)->next_blkoff;
- } else {
- left_blocks = CAP_BLKS_PER_SEC(sbi) -
- get_ckpt_valid_blocks(sbi, segno, true);
- }
+ left_blocks = get_left_section_blocks(sbi, CURSEG_HOT_DATA, segno);
if (dent_blocks > left_blocks)
return false;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e16c4e2830c2..fd8e7b0b2166 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -143,6 +143,7 @@ enum {
Opt_extent_cache,
Opt_data_flush,
Opt_reserve_root,
+ Opt_reserve_node,
Opt_resgid,
Opt_resuid,
Opt_mode,
@@ -181,6 +182,7 @@ enum {
Opt_nat_bits,
Opt_jqfmt,
Opt_checkpoint,
+ Opt_lookup_mode,
Opt_err,
};
@@ -244,6 +246,13 @@ static const struct constant_table f2fs_param_errors[] = {
{}
};
+static const struct constant_table f2fs_param_lookup_mode[] = {
+ {"perf", LOOKUP_PERF},
+ {"compat", LOOKUP_COMPAT},
+ {"auto", LOOKUP_AUTO},
+ {}
+};
+
static const struct fs_parameter_spec f2fs_param_specs[] = {
fsparam_enum("background_gc", Opt_gc_background, f2fs_param_background_gc),
fsparam_flag("disable_roll_forward", Opt_disable_roll_forward),
@@ -265,6 +274,7 @@ static const struct fs_parameter_spec f2fs_param_specs[] = {
fsparam_flag_no("extent_cache", Opt_extent_cache),
fsparam_flag("data_flush", Opt_data_flush),
fsparam_u32("reserve_root", Opt_reserve_root),
+ fsparam_u32("reserve_node", Opt_reserve_node),
fsparam_gid("resgid", Opt_resgid),
fsparam_uid("resuid", Opt_resuid),
fsparam_enum("mode", Opt_mode, f2fs_param_mode),
@@ -300,6 +310,7 @@ static const struct fs_parameter_spec f2fs_param_specs[] = {
fsparam_enum("memory", Opt_memory_mode, f2fs_param_memory_mode),
fsparam_flag("age_extent_cache", Opt_age_extent_cache),
fsparam_enum("errors", Opt_errors, f2fs_param_errors),
+ fsparam_enum("lookup_mode", Opt_lookup_mode, f2fs_param_lookup_mode),
{}
};
@@ -336,6 +347,8 @@ static match_table_t f2fs_checkpoint_tokens = {
#define F2FS_SPEC_discard_unit (1 << 21)
#define F2FS_SPEC_memory_mode (1 << 22)
#define F2FS_SPEC_errors (1 << 23)
+#define F2FS_SPEC_lookup_mode (1 << 24)
+#define F2FS_SPEC_reserve_node (1 << 25)
struct f2fs_fs_context {
struct f2fs_mount_info info;
@@ -437,22 +450,30 @@ static void f2fs_destroy_casefold_cache(void) { }
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
- block_t limit = min((sbi->user_block_count >> 3),
+ block_t block_limit = min((sbi->user_block_count >> 3),
sbi->user_block_count - sbi->reserved_blocks);
+ block_t node_limit = sbi->total_node_count >> 3;
/* limit is 12.5% */
if (test_opt(sbi, RESERVE_ROOT) &&
- F2FS_OPTION(sbi).root_reserved_blocks > limit) {
- F2FS_OPTION(sbi).root_reserved_blocks = limit;
+ F2FS_OPTION(sbi).root_reserved_blocks > block_limit) {
+ F2FS_OPTION(sbi).root_reserved_blocks = block_limit;
f2fs_info(sbi, "Reduce reserved blocks for root = %u",
F2FS_OPTION(sbi).root_reserved_blocks);
}
- if (!test_opt(sbi, RESERVE_ROOT) &&
+ if (test_opt(sbi, RESERVE_NODE) &&
+ F2FS_OPTION(sbi).root_reserved_nodes > node_limit) {
+ F2FS_OPTION(sbi).root_reserved_nodes = node_limit;
+ f2fs_info(sbi, "Reduce reserved nodes for root = %u",
+ F2FS_OPTION(sbi).root_reserved_nodes);
+ }
+ if (!test_opt(sbi, RESERVE_ROOT) && !test_opt(sbi, RESERVE_NODE) &&
(!uid_eq(F2FS_OPTION(sbi).s_resuid,
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
!gid_eq(F2FS_OPTION(sbi).s_resgid,
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
- f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
+ f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root"
+ " and reserve_node",
from_kuid_munged(&init_user_ns,
F2FS_OPTION(sbi).s_resuid),
from_kgid_munged(&init_user_ns,
@@ -480,6 +501,12 @@ static void init_once(void *foo)
struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
inode_init_once(&fi->vfs_inode);
+#ifdef CONFIG_FS_ENCRYPTION
+ fi->i_crypt_info = NULL;
+#endif
+#ifdef CONFIG_FS_VERITY
+ fi->i_verity_info = NULL;
+#endif
}
#ifdef CONFIG_QUOTA
@@ -841,6 +868,11 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32;
ctx->spec_mask |= F2FS_SPEC_reserve_root;
break;
+ case Opt_reserve_node:
+ ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_NODE);
+ F2FS_CTX_INFO(ctx).root_reserved_nodes = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_reserve_node;
+ break;
case Opt_resuid:
F2FS_CTX_INFO(ctx).s_resuid = result.uid;
ctx->spec_mask |= F2FS_SPEC_resuid;
@@ -988,6 +1020,10 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_enable:
+ F2FS_CTX_INFO(ctx).unusable_cap_perc = 0;
+ ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc;
+ F2FS_CTX_INFO(ctx).unusable_cap = 0;
+ ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap;
ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
default:
@@ -1143,6 +1179,10 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_nat_bits:
ctx_set_opt(ctx, F2FS_MOUNT_NAT_BITS);
break;
+ case Opt_lookup_mode:
+ F2FS_CTX_INFO(ctx).lookup_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_lookup_mode;
+ break;
}
return 0;
}
@@ -1185,7 +1225,11 @@ static int f2fs_check_quota_consistency(struct fs_context *fc,
goto err_jquota_change;
if (old_qname) {
- if (strcmp(old_qname, new_qname) == 0) {
+ if (!new_qname) {
+ f2fs_info(sbi, "remove qf_name %s",
+ old_qname);
+ continue;
+ } else if (strcmp(old_qname, new_qname) == 0) {
ctx->qname_mask &= ~(1 << i);
continue;
}
@@ -1424,6 +1468,14 @@ static int f2fs_check_opt_consistency(struct fs_context *fc,
ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT);
ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT;
}
+ if (test_opt(sbi, RESERVE_NODE) &&
+ (ctx->opt_mask & F2FS_MOUNT_RESERVE_NODE) &&
+ ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_NODE)) {
+ f2fs_info(sbi, "Preserve previous reserve_node=%u",
+ F2FS_OPTION(sbi).root_reserved_nodes);
+ ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_NODE);
+ ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_NODE;
+ }
err = f2fs_check_test_dummy_encryption(fc, sb);
if (err)
@@ -1623,6 +1675,9 @@ static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb)
if (ctx->spec_mask & F2FS_SPEC_reserve_root)
F2FS_OPTION(sbi).root_reserved_blocks =
F2FS_CTX_INFO(ctx).root_reserved_blocks;
+ if (ctx->spec_mask & F2FS_SPEC_reserve_node)
+ F2FS_OPTION(sbi).root_reserved_nodes =
+ F2FS_CTX_INFO(ctx).root_reserved_nodes;
if (ctx->spec_mask & F2FS_SPEC_resgid)
F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid;
if (ctx->spec_mask & F2FS_SPEC_resuid)
@@ -1652,6 +1707,8 @@ static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb)
F2FS_OPTION(sbi).memory_mode = F2FS_CTX_INFO(ctx).memory_mode;
if (ctx->spec_mask & F2FS_SPEC_errors)
F2FS_OPTION(sbi).errors = F2FS_CTX_INFO(ctx).errors;
+ if (ctx->spec_mask & F2FS_SPEC_lookup_mode)
+ F2FS_OPTION(sbi).lookup_mode = F2FS_CTX_INFO(ctx).lookup_mode;
f2fs_apply_compression(fc, sb);
f2fs_apply_test_dummy_encryption(fc, sb);
@@ -1744,7 +1801,7 @@ static int f2fs_drop_inode(struct inode *inode)
if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) {
if (!inode->i_nlink && !is_bad_inode(inode)) {
/* to avoid evict_inode call simultaneously */
- atomic_inc(&inode->i_count);
+ __iget(inode);
spin_unlock(&inode->i_lock);
/* should remain fi->extent_tree for writepage */
@@ -1763,12 +1820,12 @@ static int f2fs_drop_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
spin_lock(&inode->i_lock);
- atomic_dec(&inode->i_count);
+ iput(inode);
}
trace_f2fs_drop_inode(inode, 0);
return 0;
}
- ret = generic_drop_inode(inode);
+ ret = inode_generic_drop(inode);
if (!ret)
ret = fscrypt_drop_inode(inode);
trace_f2fs_drop_inode(inode, ret);
@@ -2343,9 +2400,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
seq_puts(seq, "fragment:block");
seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
- if (test_opt(sbi, RESERVE_ROOT))
- seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
+ if (test_opt(sbi, RESERVE_ROOT) || test_opt(sbi, RESERVE_NODE))
+ seq_printf(seq, ",reserve_root=%u,reserve_node=%u,resuid=%u,"
+ "resgid=%u",
F2FS_OPTION(sbi).root_reserved_blocks,
+ F2FS_OPTION(sbi).root_reserved_nodes,
from_kuid_munged(&init_user_ns,
F2FS_OPTION(sbi).s_resuid),
from_kgid_munged(&init_user_ns,
@@ -2416,6 +2475,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sbi, NAT_BITS))
seq_puts(seq, ",nat_bits");
+ if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_PERF)
+ seq_show_option(seq, "lookup_mode", "perf");
+ else if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_COMPAT)
+ seq_show_option(seq, "lookup_mode", "compat");
+ else if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_AUTO)
+ seq_show_option(seq, "lookup_mode", "auto");
+
return 0;
}
@@ -2480,6 +2546,8 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount)
#endif
f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL);
+
+ F2FS_OPTION(sbi).lookup_mode = LOOKUP_PERF;
}
#ifdef CONFIG_QUOTA
@@ -2560,21 +2628,39 @@ out_unlock:
restore_flag:
sbi->gc_mode = gc_mode;
sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
+ f2fs_info(sbi, "f2fs_disable_checkpoint() finish, err:%d", err);
return err;
}
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
{
- int retry = DEFAULT_RETRY_IO_COUNT;
+ unsigned int nr_pages = get_pages(sbi, F2FS_DIRTY_DATA) / 16;
+ long long start, writeback, end;
+
+ f2fs_info(sbi, "f2fs_enable_checkpoint() starts, meta: %lld, node: %lld, data: %lld",
+ get_pages(sbi, F2FS_DIRTY_META),
+ get_pages(sbi, F2FS_DIRTY_NODES),
+ get_pages(sbi, F2FS_DIRTY_DATA));
+
+ f2fs_update_time(sbi, ENABLE_TIME);
+
+ start = ktime_get();
/* we should flush all the data to keep data consistency */
- do {
- sync_inodes_sb(sbi->sb);
+ while (get_pages(sbi, F2FS_DIRTY_DATA)) {
+ writeback_inodes_sb_nr(sbi->sb, nr_pages, WB_REASON_SYNC);
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
- } while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
- if (unlikely(retry < 0))
- f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
+ if (f2fs_time_over(sbi, ENABLE_TIME))
+ break;
+ }
+ writeback = ktime_get();
+
+ sync_inodes_sb(sbi->sb);
+
+ if (unlikely(get_pages(sbi, F2FS_DIRTY_DATA)))
+ f2fs_warn(sbi, "checkpoint=enable has some unwritten data: %lld",
+ get_pages(sbi, F2FS_DIRTY_DATA));
f2fs_down_write(&sbi->gc_lock);
f2fs_dirty_to_prefree(sbi);
@@ -2587,6 +2673,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
/* Let's ensure there's no pending checkpoint anymore */
f2fs_flush_ckpt_thread(sbi);
+
+ end = ktime_get();
+
+ f2fs_info(sbi, "f2fs_enable_checkpoint() finishes, writeback:%llu, sync:%llu",
+ ktime_ms_delta(writeback, start),
+ ktime_ms_delta(end, writeback));
}
static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
@@ -3570,6 +3662,8 @@ static struct block_device **f2fs_get_devices(struct super_block *sb,
}
static const struct fscrypt_operations f2fs_cryptops = {
+ .inode_info_offs = (int)offsetof(struct f2fs_inode_info, i_crypt_info) -
+ (int)offsetof(struct f2fs_inode_info, vfs_inode),
.needs_bounce_pages = 1,
.has_32bit_inodes = 1,
.supports_subblock_data_units = 1,
@@ -3581,7 +3675,7 @@ static const struct fscrypt_operations f2fs_cryptops = {
.has_stable_inodes = f2fs_has_stable_inodes,
.get_devices = f2fs_get_devices,
};
-#endif
+#endif /* CONFIG_FS_ENCRYPTION */
static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
@@ -4148,6 +4242,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->total_node_count = SEGS_TO_BLKS(sbi,
((le32_to_cpu(raw_super->segment_count_nat) / 2) *
NAT_ENTRY_PER_BLOCK));
+ sbi->allocate_section_hint = le32_to_cpu(raw_super->section_count);
+ sbi->allocate_section_policy = ALLOCATE_FORWARD_NOHINT;
F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
@@ -4171,6 +4267,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
+ sbi->interval_time[ENABLE_TIME] = DEF_ENABLE_INTERVAL;
sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] =
DEF_UMOUNT_DISCARD_TIMEOUT;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -4629,9 +4726,11 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
sbi->aligned_blksize = true;
+ sbi->bggc_io_aware = AWARE_ALL_IO;
#ifdef CONFIG_BLK_DEV_ZONED
sbi->max_open_zones = UINT_MAX;
sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ;
+ sbi->bggc_io_aware = AWARE_READ_IO;
#endif
for (i = 0; i < max_devices; i++) {
@@ -4659,6 +4758,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
SEGS_TO_BLKS(sbi,
FDEV(i).total_segments) - 1 +
le32_to_cpu(raw_super->segment0_blkaddr);
+ sbi->allocate_section_hint = FDEV(i).total_segments /
+ SEGS_PER_SEC(sbi);
} else {
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
FDEV(i).end_blk = FDEV(i).start_blk +
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f736052dea50..6d2a4fba68a2 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -281,6 +281,22 @@ static ssize_t encoding_flags_show(struct f2fs_attr *a,
le16_to_cpu(F2FS_RAW_SUPER(sbi)->s_encoding_flags));
}
+static ssize_t effective_lookup_mode_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ switch (F2FS_OPTION(sbi).lookup_mode) {
+ case LOOKUP_PERF:
+ return sysfs_emit(buf, "perf\n");
+ case LOOKUP_COMPAT:
+ return sysfs_emit(buf, "compat\n");
+ case LOOKUP_AUTO:
+ if (sb_no_casefold_compat_fallback(sbi->sb))
+ return sysfs_emit(buf, "auto:perf\n");
+ return sysfs_emit(buf, "auto:compat\n");
+ }
+ return 0;
+}
+
static ssize_t mounted_time_sec_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -866,6 +882,27 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "bggc_io_aware")) {
+ if (t < AWARE_ALL_IO || t > AWARE_NONE)
+ return -EINVAL;
+ sbi->bggc_io_aware = t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "allocate_section_hint")) {
+ if (t < 0 || t > MAIN_SECS(sbi))
+ return -EINVAL;
+ sbi->allocate_section_hint = t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "allocate_section_policy")) {
+ if (t < ALLOCATE_FORWARD_NOHINT || t > ALLOCATE_FORWARD_FROM_HINT)
+ return -EINVAL;
+ sbi->allocate_section_policy = t;
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
@@ -1138,6 +1175,8 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
F2FS_SBI_GENERAL_RW_ATTR(dir_level);
+F2FS_SBI_GENERAL_RW_ATTR(allocate_section_hint);
+F2FS_SBI_GENERAL_RW_ATTR(allocate_section_policy);
#ifdef CONFIG_F2FS_IOSTAT
F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
@@ -1175,6 +1214,7 @@ F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy);
#endif
F2FS_SBI_GENERAL_RW_ATTR(carve_out);
F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section);
+F2FS_SBI_GENERAL_RW_ATTR(bggc_io_aware);
/* STAT_INFO ATTR */
#ifdef CONFIG_F2FS_STAT_FS
@@ -1211,6 +1251,7 @@ F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
F2FS_GENERAL_RO_ATTR(unusable);
F2FS_GENERAL_RO_ATTR(encoding);
F2FS_GENERAL_RO_ATTR(encoding_flags);
+F2FS_GENERAL_RO_ATTR(effective_lookup_mode);
F2FS_GENERAL_RO_ATTR(mounted_time_sec);
F2FS_GENERAL_RO_ATTR(main_blkaddr);
F2FS_GENERAL_RO_ATTR(pending_discard);
@@ -1303,6 +1344,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(discard_idle_interval),
ATTR_LIST(gc_idle_interval),
ATTR_LIST(umount_discard_timeout),
+ ATTR_LIST(bggc_io_aware),
#ifdef CONFIG_F2FS_IOSTAT
ATTR_LIST(iostat_enable),
ATTR_LIST(iostat_period_ms),
@@ -1329,6 +1371,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(current_reserved_blocks),
ATTR_LIST(encoding),
ATTR_LIST(encoding_flags),
+ ATTR_LIST(effective_lookup_mode),
ATTR_LIST(mounted_time_sec),
#ifdef CONFIG_F2FS_STAT_FS
ATTR_LIST(cp_foreground_calls),
@@ -1371,6 +1414,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(max_read_extent_count),
ATTR_LIST(carve_out),
ATTR_LIST(reserved_pin_section),
+ ATTR_LIST(allocate_section_hint),
+ ATTR_LIST(allocate_section_policy),
NULL,
};
ATTRIBUTE_GROUPS(f2fs);
@@ -1723,12 +1768,15 @@ static int __maybe_unused disk_map_seq_show(struct seq_file *seq,
seq_printf(seq, " Main : 0x%010x (%10d)\n",
SM_I(sbi)->main_blkaddr,
le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main));
- seq_printf(seq, " # of Sections : %12d\n",
- le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count));
+ seq_printf(seq, " Block size : %12lu KB\n", F2FS_BLKSIZE >> 10);
+ seq_printf(seq, " Segment size : %12d MB\n",
+ (BLKS_PER_SEG(sbi) << (F2FS_BLKSIZE_BITS - 10)) >> 10);
seq_printf(seq, " Segs/Sections : %12d\n",
SEGS_PER_SEC(sbi));
seq_printf(seq, " Section size : %12d MB\n",
- SEGS_PER_SEC(sbi) << 1);
+ (BLKS_PER_SEC(sbi) << (F2FS_BLKSIZE_BITS - 10)) >> 10);
+ seq_printf(seq, " # of Sections : %12d\n",
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count));
if (!f2fs_is_multi_device(sbi))
return 0;
@@ -1742,6 +1790,69 @@ static int __maybe_unused disk_map_seq_show(struct seq_file *seq,
return 0;
}
+static int __maybe_unused donation_list_seq_show(struct seq_file *seq,
+ void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct inode *inode;
+ struct f2fs_inode_info *fi;
+ struct dentry *dentry;
+ char *buf, *path;
+ int i;
+
+ buf = f2fs_getname(sbi);
+ if (!buf)
+ return 0;
+
+ seq_printf(seq, "Donation List\n");
+ seq_printf(seq, " # of files : %u\n", sbi->donate_files);
+ seq_printf(seq, " %-50s %10s %20s %20s %22s\n",
+ "File path", "Status", "Donation offset (kb)",
+ "Donation size (kb)", "File cached size (kb)");
+ seq_printf(seq, "---\n");
+
+ for (i = 0; i < sbi->donate_files; i++) {
+ spin_lock(&sbi->inode_lock[DONATE_INODE]);
+ if (list_empty(&sbi->inode_list[DONATE_INODE])) {
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+ break;
+ }
+ fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
+ struct f2fs_inode_info, gdonate_list);
+ list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
+ inode = igrab(&fi->vfs_inode);
+ spin_unlock(&sbi->inode_lock[DONATE_INODE]);
+
+ if (!inode)
+ continue;
+
+ inode_lock_shared(inode);
+
+ dentry = d_find_alias(inode);
+ if (!dentry) {
+ path = NULL;
+ } else {
+ path = dentry_path_raw(dentry, buf, PATH_MAX);
+ if (IS_ERR(path))
+ goto next;
+ }
+ seq_printf(seq, " %-50s %10s %20llu %20llu %22llu\n",
+ path ? path : "<unlinked>",
+ is_inode_flag_set(inode, FI_DONATE_FINISHED) ?
+ "Evicted" : "Donated",
+ (loff_t)fi->donate_start << (PAGE_SHIFT - 10),
+ (loff_t)(fi->donate_end + 1) << (PAGE_SHIFT - 10),
+ (loff_t)inode->i_mapping->nrpages << (PAGE_SHIFT - 10));
+next:
+ dput(dentry);
+ inode_unlock_shared(inode);
+ iput(inode);
+ }
+ f2fs_putname(buf);
+ return 0;
+}
+
#ifdef CONFIG_F2FS_FAULT_INJECTION
static int __maybe_unused inject_stats_seq_show(struct seq_file *seq,
void *offset)
@@ -1851,6 +1962,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
discard_plist_seq_show, sb);
proc_create_single_data("disk_map", 0444, sbi->s_proc,
disk_map_seq_show, sb);
+ proc_create_single_data("donation_list", 0444, sbi->s_proc,
+ donation_list_seq_show, sb);
#ifdef CONFIG_F2FS_FAULT_INJECTION
proc_create_single_data("inject_stats", 0444, sbi->s_proc,
inject_stats_seq_show, sb);
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 2287f238ae09..f0ab9a3c7a82 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -287,6 +287,8 @@ static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
}
const struct fsverity_operations f2fs_verityops = {
+ .inode_info_offs = (int)offsetof(struct f2fs_inode_info, i_verity_info) -
+ (int)offsetof(struct f2fs_inode_info, vfs_inode),
.begin_enable_verity = f2fs_begin_enable_verity,
.end_enable_verity = f2fs_end_enable_verity,
.get_verity_descriptor = f2fs_get_verity_descriptor,