summaryrefslogtreecommitdiff
path: root/fs/f2fs/segment.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-05-04 18:03:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-05-04 18:03:38 -0700
commitd0195c7d7af6a456c37f4b4b2df5528f10714482 (patch)
tree9b93122a6011814b63fd5a66d2a0a240388ad5e5 /fs/f2fs/segment.c
parente4adffb8daf476a01e7b4a55f586dc8c26e81392 (diff)
parent9557727876674893d35940fddbd03d3b505e7ed8 (diff)
Merge tag 'f2fs-for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, we added a new mount option, "checkpoint_merge", which introduces a kernel thread dealing with the f2fs checkpoints. Once we start to manage the IO priority along with blk-cgroup, the checkpoint operation can be processed in a lower priority under the process context. Since the checkpoint holds all the filesystem operations, we give a higher priority to the checkpoint thread all the time. Enhancements: - introduce gc_merge mount option to introduce a checkpoint thread - improve to run discard thread efficiently - allow modular compression algorithms - expose # of overprivision segments to sysfs - expose runtime compression stat to sysfs Bug fixes: - fix OOB memory access by the node id lookup - avoid touching checkpointed data in the checkpoint-disabled mode - fix the resizing flow to avoid kernel panic and race conditions - fix block allocation issues on pinned files - address some swapfile issues - fix hugtask problem and kernel panic during atomic write operations - don't start checkpoint thread in RO And, we've cleaned up some kernel coding style and build warnings. In addition, we fixed some minor race conditions and error handling routines" * tag 'f2fs-for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (48 commits) f2fs: drop inplace IO if fs status is abnormal f2fs: compress: remove unneed check condition f2fs: clean up left deprecated IO trace codes f2fs: avoid using native allocate_segment_by_default() f2fs: remove unnecessary struct declaration f2fs: fix to avoid NULL pointer dereference f2fs: avoid duplicated codes for cleanup f2fs: document: add description about compressed space handling f2fs: clean up build warnings f2fs: fix the periodic wakeups of discard thread f2fs: fix to avoid accessing invalid fio in f2fs_allocate_data_block() f2fs: fix to avoid GC/mmap race with f2fs_truncate() f2fs: set checkpoint_merge by default f2fs: Fix a hungtask problem in atomic write f2fs: fix to restrict mount condition on readonly block device f2fs: introduce gc_merge mount option f2fs: fix to cover __allocate_new_section() with curseg_lock f2fs: fix wrong alloc_type in f2fs_do_replace_block f2fs: delete empty compress.h f2fs: fix a typo in inode.c ...
Diffstat (limited to 'fs/f2fs/segment.c')
-rw-r--r--fs/f2fs/segment.c184
1 files changed, 133 insertions, 51 deletions
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c2866561263e..c605415840b5 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -186,7 +186,10 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
{
struct inmem_pages *new;
- f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
+ if (PagePrivate(page))
+ set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
+ else
+ f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
@@ -324,23 +327,27 @@ void f2fs_drop_inmem_pages(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- while (!list_empty(&fi->inmem_pages)) {
+ do {
mutex_lock(&fi->inmem_lock);
+ if (list_empty(&fi->inmem_pages)) {
+ fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
+
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (!list_empty(&fi->inmem_ilist))
+ list_del_init(&fi->inmem_ilist);
+ if (f2fs_is_atomic_file(inode)) {
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ sbi->atomic_files--;
+ }
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+ mutex_unlock(&fi->inmem_lock);
+ break;
+ }
__revoke_inmem_pages(inode, &fi->inmem_pages,
true, false, true);
mutex_unlock(&fi->inmem_lock);
- }
-
- fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
-
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (!list_empty(&fi->inmem_ilist))
- list_del_init(&fi->inmem_ilist);
- if (f2fs_is_atomic_file(inode)) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
- sbi->atomic_files--;
- }
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ } while (1);
}
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
@@ -503,8 +510,19 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
* dir/node pages without enough free segments.
*/
if (has_not_enough_free_secs(sbi, 0, 0)) {
- down_write(&sbi->gc_lock);
- f2fs_gc(sbi, false, false, NULL_SEGNO);
+ if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
+ sbi->gc_thread->f2fs_gc_task) {
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ wake_up(&sbi->gc_thread->gc_wait_queue_head);
+ io_schedule();
+ finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+ } else {
+ down_write(&sbi->gc_lock);
+ f2fs_gc(sbi, false, false, false, NULL_SEGNO);
+ }
}
}
@@ -653,7 +671,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
llist_add(&cmd.llnode, &fcc->issue_list);
- /* update issue_list before we wake up issue_flush thread */
+ /*
+ * update issue_list before we wake up issue_flush thread, this
+ * smp_mb() pairs with another barrier in ___wait_event(), see
+ * more details in comments of waitqueue_active().
+ */
smp_mb();
if (waitqueue_active(&fcc->flush_wait_queue))
@@ -861,7 +883,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, false);
- ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
+ ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
ckpt_valid_blocks == usable_blocks)) {
@@ -946,7 +968,7 @@ static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
if (get_valid_blocks(sbi, segno, false))
continue;
- if (get_ckpt_valid_blocks(sbi, segno))
+ if (get_ckpt_valid_blocks(sbi, segno, false))
continue;
mutex_unlock(&dirty_i->seglist_lock);
return segno;
@@ -1095,6 +1117,8 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
int discard_type, unsigned int granularity)
{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
@@ -1114,7 +1138,9 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
dpolicy->ordered = true;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
- dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ if (atomic_read(&dcc->discard_cmd_cnt))
+ dpolicy->max_interval =
+ DEF_MIN_DISCARD_ISSUE_TIME;
}
} else if (discard_type == DPOLICY_FORCE) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -1730,8 +1756,15 @@ static int issue_discard_thread(void *data)
set_freezable();
do {
- __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
- dcc->discard_granularity);
+ if (sbi->gc_mode == GC_URGENT_HIGH ||
+ !f2fs_available_free_memory(sbi, DISCARD_CACHE))
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+ else
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
+ dcc->discard_granularity);
+
+ if (!atomic_read(&dcc->discard_cmd_cnt))
+ wait_ms = dpolicy.max_interval;
wait_event_interruptible_timeout(*q,
kthread_should_stop() || freezing(current) ||
@@ -1755,9 +1788,8 @@ static int issue_discard_thread(void *data)
wait_ms = dpolicy.max_interval;
continue;
}
-
- if (sbi->gc_mode == GC_URGENT_HIGH)
- __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+ if (!atomic_read(&dcc->discard_cmd_cnt))
+ continue;
sb_start_intwrite(sbi->sb);
@@ -1765,7 +1797,7 @@ static int issue_discard_thread(void *data)
if (issued > 0) {
__wait_all_discard_cmd(sbi, &dpolicy);
wait_ms = dpolicy.min_interval;
- } else if (issued == -1){
+ } else if (issued == -1) {
wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
if (!wait_ms)
wait_ms = dpolicy.mid_interval;
@@ -2142,6 +2174,7 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
unsigned int segno, int modified)
{
struct seg_entry *se = get_seg_entry(sbi, segno);
+
se->type = type;
if (modified)
__mark_sit_entry_dirty(sbi, segno);
@@ -2333,6 +2366,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
void *addr = curseg->sum_blk;
+
addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
memcpy(addr, sum, sizeof(struct f2fs_summary));
}
@@ -2604,22 +2638,20 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
curseg->alloc_type = LFS;
}
-static void __next_free_blkoff(struct f2fs_sb_info *sbi,
- struct curseg_info *seg, block_t start)
+static int __next_free_blkoff(struct f2fs_sb_info *sbi,
+ int segno, block_t start)
{
- struct seg_entry *se = get_seg_entry(sbi, seg->segno);
+ struct seg_entry *se = get_seg_entry(sbi, segno);
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
unsigned long *target_map = SIT_I(sbi)->tmp_map;
unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
- int i, pos;
+ int i;
for (i = 0; i < entries; i++)
target_map[i] = ckpt_map[i] | cur_map[i];
- pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
-
- seg->next_blkoff = pos;
+ return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
}
/*
@@ -2631,11 +2663,18 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
struct curseg_info *seg)
{
if (seg->alloc_type == SSR)
- __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
+ seg->next_blkoff =
+ __next_free_blkoff(sbi, seg->segno,
+ seg->next_blkoff + 1);
else
seg->next_blkoff++;
}
+bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
+{
+ return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
+}
+
/*
* This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
@@ -2661,7 +2700,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
reset_curseg(sbi, type, 1);
curseg->alloc_type = SSR;
- __next_free_blkoff(sbi, curseg, 0);
+ curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
sum_page = f2fs_get_sum_page(sbi, new_segno);
if (IS_ERR(sum_page)) {
@@ -2893,7 +2932,8 @@ unlock:
up_read(&SM_I(sbi)->curseg_lock);
}
-static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
+static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
+ bool new_sec, bool force)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
@@ -2901,32 +2941,43 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
if (!curseg->inited)
goto alloc;
- if (!curseg->next_blkoff &&
- !get_valid_blocks(sbi, curseg->segno, false) &&
- !get_ckpt_valid_blocks(sbi, curseg->segno))
- return;
+ if (force || curseg->next_blkoff ||
+ get_valid_blocks(sbi, curseg->segno, new_sec))
+ goto alloc;
+ if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
+ return;
alloc:
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
locate_dirty_segment(sbi, old_segno);
}
-void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type)
+static void __allocate_new_section(struct f2fs_sb_info *sbi,
+ int type, bool force)
+{
+ __allocate_new_segment(sbi, type, true, force);
+}
+
+void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
{
+ down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
- __allocate_new_segment(sbi, type);
+ __allocate_new_section(sbi, type, force);
up_write(&SIT_I(sbi)->sentry_lock);
+ up_read(&SM_I(sbi)->curseg_lock);
}
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
int i;
+ down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
- __allocate_new_segment(sbi, i);
+ __allocate_new_segment(sbi, i, false, false);
up_write(&SIT_I(sbi)->sentry_lock);
+ up_read(&SM_I(sbi)->curseg_lock);
}
static const struct segment_allocation default_salloc_ops = {
@@ -3239,7 +3290,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
struct inode *inode = fio->page->mapping->host;
if (is_cold_data(fio->page)) {
- if (fio->sbi->am.atgc_enabled)
+ if (fio->sbi->am.atgc_enabled &&
+ (fio->io_type == FS_DATA_IO) &&
+ (fio->sbi->gc_mode != GC_URGENT_HIGH))
return CURSEG_ALL_DATA_ATGC;
else
return CURSEG_COLD_DATA;
@@ -3365,12 +3418,12 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
f2fs_inode_chksum_set(sbi, page);
}
- if (F2FS_IO_ALIGNED(sbi))
- fio->retry = false;
-
if (fio) {
struct f2fs_bio_info *io;
+ if (F2FS_IO_ALIGNED(sbi))
+ fio->retry = false;
+
INIT_LIST_HEAD(&fio->list);
fio->in_list = true;
io = sbi->write_io[fio->type] + fio->temp;
@@ -3499,7 +3552,13 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
__func__, segno);
- return -EFSCORRUPTED;
+ err = -EFSCORRUPTED;
+ goto drop_bio;
+ }
+
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || f2fs_cp_error(sbi)) {
+ err = -EIO;
+ goto drop_bio;
}
stat_inc_inplace_blocks(fio->sbi);
@@ -3514,6 +3573,15 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
}
return err;
+drop_bio:
+ if (fio->bio) {
+ struct bio *bio = *(fio->bio);
+
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ fio->bio = NULL;
+ }
+ return err;
}
static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
@@ -3539,6 +3607,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct seg_entry *se;
int type;
unsigned short old_blkoff;
+ unsigned char old_alloc_type;
segno = GET_SEGNO(sbi, new_blkaddr);
se = get_seg_entry(sbi, segno);
@@ -3572,6 +3641,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
old_cursegno = curseg->segno;
old_blkoff = curseg->next_blkoff;
+ old_alloc_type = curseg->alloc_type;
/* change the current segment */
if (segno != curseg->segno) {
@@ -3606,6 +3676,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
change_curseg(sbi, type, true);
}
curseg->next_blkoff = old_blkoff;
+ curseg->alloc_type = old_alloc_type;
}
up_write(&sit_i->sentry_lock);
@@ -3717,6 +3788,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
for (j = 0; j < blk_off; j++) {
struct f2fs_summary *s;
+
s = (struct f2fs_summary *)(kaddr + offset);
seg_i->sum_blk->entries[j] = *s;
offset += SUMMARY_SIZE;
@@ -3779,6 +3851,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
if (__exist_node_summaries(sbi)) {
struct f2fs_summary *ns = &sum->entries[0];
int i;
+
for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
ns->version = 0;
ns->ofs_in_node = 0;
@@ -3880,6 +3953,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
/* Step 3: write summary entries */
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
unsigned short blkoff;
+
seg_i = CURSEG_I(sbi, i);
if (sbi->ckpt->alloc_type[i] == SSR)
blkoff = sbi->blocks_per_seg;
@@ -3916,6 +3990,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
int i, end;
+
if (IS_DATASEG(type))
end = type + NR_CURSEG_DATA_TYPE;
else
@@ -4499,6 +4574,7 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
/* set use the current segments */
for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
struct curseg_info *curseg_t = CURSEG_I(sbi, type);
+
__set_test_and_inuse(sbi, curseg_t->segno);
}
}
@@ -4731,7 +4807,8 @@ static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
}
static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
- void *data) {
+ void *data)
+{
memcpy(data, zone, sizeof(struct blk_zone));
return 0;
}
@@ -4783,7 +4860,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
f2fs_notice(sbi, "Assign new section to curseg[%d]: "
"curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
- allocate_segment_by_default(sbi, type, true);
+
+ f2fs_allocate_new_section(sbi, type, true);
/* check consistency of the zone curseg pointed to */
if (check_zone_write_pointer(sbi, zbd, &zone))
@@ -4847,8 +4925,10 @@ struct check_zone_write_pointer_args {
};
static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
- void *data) {
+ void *data)
+{
struct check_zone_write_pointer_args *args;
+
args = (struct check_zone_write_pointer_args *)data;
return check_zone_write_pointer(args->sbi, args->fdev, zone);
@@ -5127,6 +5207,7 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
kvfree(dirty_i->victim_secmap);
}
@@ -5171,6 +5252,7 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
{
struct free_segmap_info *free_i = SM_I(sbi)->free_info;
+
if (!free_i)
return;
SM_I(sbi)->free_info = NULL;