summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs24
-rw-r--r--Documentation/filesystems/f2fs.rst2
-rw-r--r--fs/f2fs/Kconfig10
-rw-r--r--fs/f2fs/acl.h2
-rw-r--r--fs/f2fs/checkpoint.c37
-rw-r--r--fs/f2fs/compress.c182
-rw-r--r--fs/f2fs/data.c163
-rw-r--r--fs/f2fs/dir.c374
-rw-r--r--fs/f2fs/f2fs.h171
-rw-r--r--fs/f2fs/file.c401
-rw-r--r--fs/f2fs/gc.c125
-rw-r--r--fs/f2fs/gc.h2
-rw-r--r--fs/f2fs/hash.c76
-rw-r--r--fs/f2fs/inline.c49
-rw-r--r--fs/f2fs/namei.c19
-rw-r--r--fs/f2fs/node.c101
-rw-r--r--fs/f2fs/node.h5
-rw-r--r--fs/f2fs/recovery.c51
-rw-r--r--fs/f2fs/segment.c40
-rw-r--r--fs/f2fs/segment.h2
-rw-r--r--fs/f2fs/super.c88
-rw-r--r--fs/f2fs/sysfs.c97
-rw-r--r--fs/f2fs/trace.h2
-rw-r--r--fs/f2fs/xattr.h8
-rw-r--r--include/trace/events/f2fs.h83
25 files changed, 1580 insertions, 534 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index bd8a0d19abe6..4bb93a06d8ab 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -323,3 +323,27 @@ What: /sys/fs/f2fs/<disk>/mounted_time_sec
Date: February 2020
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Show the mounted time in secs of this partition.
+
+What: /sys/fs/f2fs/<disk>/data_io_flag
+Date: April 2020
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Give a way to attach REQ_META|FUA to data writes
+ given temperature-based bits. Now the bits indicate:
+ * REQ_META | REQ_FUA |
+ * 5 | 4 | 3 | 2 | 1 | 0 |
+ * Cold | Warm | Hot | Cold | Warm | Hot |
+
+What: /sys/fs/f2fs/<disk>/node_io_flag
+Date: June 2020
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Give a way to attach REQ_META|FUA to node writes
+ given temperature-based bits. Now the bits indicate:
+ * REQ_META | REQ_FUA |
+ * 5 | 4 | 3 | 2 | 1 | 0 |
+ * Cold | Warm | Hot | Cold | Warm | Hot |
+
+What: /sys/fs/f2fs/<disk>/iostat_period_ms
+Date: April 2020
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Give a way to change iostat_period time. 3secs by default.
+ The new iostat trace gives stats gap given the period.
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 4218ac658629..099d45ac8d8f 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -248,7 +248,7 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enabl
would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable
This space is reclaimed once checkpoint=enable.
compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo",
- "lz4" and "zstd" algorithm.
+ "lz4", "zstd" and "lzo-rle" algorithm.
compress_log_size=%u Support configuring compress cluster size, the size will
be 4KB * (1 << %u), 16KB is minimum size, also it's
default size.
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index bb68d21e1f8c..d13c5c6a9787 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -127,3 +127,13 @@ config F2FS_FS_ZSTD
default y
help
Support ZSTD compress algorithm, if unsure, say Y.
+
+config F2FS_FS_LZORLE
+ bool "LZO-RLE compression support"
+ depends on F2FS_FS_COMPRESSION
+ depends on F2FS_FS_LZO
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
+ default y
+ help
+ Support LZO-RLE compress algorithm, if unsure, say Y.
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index b96823c59b15..124868c13f80 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/acl.h
*
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 852890b72d6a..236064930251 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -86,6 +86,8 @@ repeat:
return ERR_PTR(err);
}
+ f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
+
lock_page(page);
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
@@ -220,6 +222,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.is_por = (type == META_POR),
};
struct blk_plug plug;
+ int err;
if (unlikely(type == META_POR))
fio.op_flags &= ~REQ_META;
@@ -263,8 +266,11 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
}
fio.page = page;
- f2fs_submit_page_bio(&fio);
- f2fs_put_page(page, 0);
+ err = f2fs_submit_page_bio(&fio);
+ f2fs_put_page(page, err ? 1 : 0);
+
+ if (!err)
+ f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
}
out:
blk_finish_plug(&plug);
@@ -889,8 +895,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
int i;
int err;
- sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks),
- GFP_KERNEL);
+ sbi->ckpt = f2fs_kvzalloc(sbi, array_size(blk_size, cp_blks),
+ GFP_KERNEL);
if (!sbi->ckpt)
return -ENOMEM;
/*
@@ -1160,10 +1166,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
- struct blk_plug plug;
int err = 0, cnt = 0;
- blk_start_plug(&plug);
+ /*
+ * Let's flush inline_data in dirty node pages.
+ */
+ f2fs_flush_inline_data(sbi);
retry_flush_quotas:
f2fs_lock_all(sbi);
@@ -1192,7 +1200,7 @@ retry_flush_dents:
f2fs_unlock_all(sbi);
err = f2fs_sync_dirty_inodes(sbi, DIR_INODE);
if (err)
- goto out;
+ return err;
cond_resched();
goto retry_flush_quotas;
}
@@ -1208,7 +1216,7 @@ retry_flush_dents:
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
- goto out;
+ return err;
cond_resched();
goto retry_flush_quotas;
}
@@ -1224,7 +1232,7 @@ retry_flush_nodes:
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
- goto out;
+ return err;
}
cond_resched();
goto retry_flush_nodes;
@@ -1236,8 +1244,6 @@ retry_flush_nodes:
*/
__prepare_cp_block(sbi);
up_write(&sbi->node_change);
-out:
- blk_finish_plug(&plug);
return err;
}
@@ -1260,6 +1266,9 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
if (unlikely(f2fs_cp_error(sbi)))
break;
+ if (type == F2FS_DIRTY_META)
+ f2fs_sync_meta_pages(sbi, META, LONG_MAX,
+ FS_CP_META_IO);
io_schedule_timeout(DEFAULT_IO_TIMEOUT);
}
finish_wait(&sbi->cp_wait, &wait);
@@ -1553,7 +1562,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
return 0;
f2fs_warn(sbi, "Start checkpoint disabled!");
}
- mutex_lock(&sbi->cp_mutex);
+ if (cpc->reason != CP_RESIZE)
+ mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
@@ -1622,7 +1632,8 @@ stop:
f2fs_update_time(sbi, CP_TIME);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
- mutex_unlock(&sbi->cp_mutex);
+ if (cpc->reason != CP_RESIZE)
+ mutex_unlock(&sbi->cp_mutex);
return err;
}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index df7b2d15eacd..1e02a8c106b0 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -65,15 +65,6 @@ static void f2fs_set_compressed_page(struct page *page,
page->mapping = inode->i_mapping;
}
-static void f2fs_put_compressed_page(struct page *page)
-{
- set_page_private(page, (unsigned long)NULL);
- ClearPagePrivate(page);
- page->mapping = NULL;
- unlock_page(page);
- put_page(page);
-}
-
static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock)
{
int i;
@@ -98,8 +89,7 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len)
f2fs_drop_rpages(cc, len, true);
}
-static void f2fs_put_rpages_mapping(struct compress_ctx *cc,
- struct address_space *mapping,
+static void f2fs_put_rpages_mapping(struct address_space *mapping,
pgoff_t start, int len)
{
int i;
@@ -236,7 +226,12 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc)
if (!cc->private)
return -ENOMEM;
- cc->clen = LZ4_compressBound(PAGE_SIZE << cc->log_cluster_size);
+ /*
+ * we do not change cc->clen to LZ4_compressBound(inputsize) to
+ * adapt worst compress case, because lz4 compressor can handle
+ * output budget properly.
+ */
+ cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE;
return 0;
}
@@ -252,11 +247,9 @@ static int lz4_compress_pages(struct compress_ctx *cc)
len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen,
cc->clen, cc->private);
- if (!len) {
- printk_ratelimited("%sF2FS-fs (%s): lz4 compress failed\n",
- KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id);
- return -EIO;
- }
+ if (!len)
+ return -EAGAIN;
+
cc->clen = len;
return 0;
}
@@ -366,6 +359,13 @@ static int zstd_compress_pages(struct compress_ctx *cc)
return -EIO;
}
+ /*
+ * there is compressed data remained in intermediate buffer due to
+ * no more space in cbuf.cdata
+ */
+ if (ret)
+ return -EAGAIN;
+
cc->clen = outbuf.pos;
return 0;
}
@@ -451,6 +451,31 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = {
};
#endif
+#ifdef CONFIG_F2FS_FS_LZO
+#ifdef CONFIG_F2FS_FS_LZORLE
+static int lzorle_compress_pages(struct compress_ctx *cc)
+{
+ int ret;
+
+ ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata,
+ &cc->clen, cc->private);
+ if (ret != LZO_E_OK) {
+ printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n",
+ KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret);
+ return -EIO;
+ }
+ return 0;
+}
+
+static const struct f2fs_compress_ops f2fs_lzorle_ops = {
+ .init_compress_ctx = lzo_init_compress_ctx,
+ .destroy_compress_ctx = lzo_destroy_compress_ctx,
+ .compress_pages = lzorle_compress_pages,
+ .decompress_pages = lzo_decompress_pages,
+};
+#endif
+#endif
+
static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = {
#ifdef CONFIG_F2FS_FS_LZO
&f2fs_lzo_ops,
@@ -467,6 +492,11 @@ static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = {
#else
NULL,
#endif
+#if defined(CONFIG_F2FS_FS_LZO) && defined(CONFIG_F2FS_FS_LZORLE)
+ &f2fs_lzorle_ops,
+#else
+ NULL,
+#endif
};
bool f2fs_is_compress_backend_ready(struct inode *inode)
@@ -476,17 +506,47 @@ bool f2fs_is_compress_backend_ready(struct inode *inode)
return f2fs_cops[F2FS_I(inode)->i_compress_algorithm];
}
-static struct page *f2fs_grab_page(void)
+static mempool_t *compress_page_pool = NULL;
+static int num_compress_pages = 512;
+module_param(num_compress_pages, uint, 0444);
+MODULE_PARM_DESC(num_compress_pages,
+ "Number of intermediate compress pages to preallocate");
+
+int f2fs_init_compress_mempool(void)
+{
+ compress_page_pool = mempool_create_page_pool(num_compress_pages, 0);
+ if (!compress_page_pool)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void f2fs_destroy_compress_mempool(void)
+{
+ mempool_destroy(compress_page_pool);
+}
+
+static struct page *f2fs_compress_alloc_page(void)
{
struct page *page;
- page = alloc_page(GFP_NOFS);
- if (!page)
- return NULL;
+ page = mempool_alloc(compress_page_pool, GFP_NOFS);
lock_page(page);
+
return page;
}
+static void f2fs_compress_free_page(struct page *page)
+{
+ if (!page)
+ return;
+ set_page_private(page, (unsigned long)NULL);
+ ClearPagePrivate(page);
+ page->mapping = NULL;
+ unlock_page(page);
+ mempool_free(page, compress_page_pool);
+}
+
static int f2fs_compress_pages(struct compress_ctx *cc)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
@@ -516,7 +576,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
}
for (i = 0; i < cc->nr_cpages; i++) {
- cc->cpages[i] = f2fs_grab_page();
+ cc->cpages[i] = f2fs_compress_alloc_page();
if (!cc->cpages[i]) {
ret = -ENOMEM;
goto out_free_cpages;
@@ -561,7 +621,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
vunmap(cc->rbuf);
for (i = nr_cpages; i < cc->nr_cpages; i++) {
- f2fs_put_compressed_page(cc->cpages[i]);
+ f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
@@ -581,7 +641,7 @@ out_vunmap_rbuf:
out_free_cpages:
for (i = 0; i < cc->nr_cpages; i++) {
if (cc->cpages[i])
- f2fs_put_compressed_page(cc->cpages[i]);
+ f2fs_compress_free_page(cc->cpages[i]);
}
kfree(cc->cpages);
cc->cpages = NULL;
@@ -788,6 +848,8 @@ static bool cluster_may_compress(struct compress_ctx *cc)
return false;
if (!f2fs_cluster_is_full(cc))
return false;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(cc->inode))))
+ return false;
return __cluster_may_compress(cc);
}
@@ -879,7 +941,7 @@ retry:
if (!PageUptodate(page)) {
f2fs_unlock_rpages(cc, i + 1);
- f2fs_put_rpages_mapping(cc, mapping, start_idx,
+ f2fs_put_rpages_mapping(mapping, start_idx,
cc->cluster_size);
f2fs_destroy_compress_ctx(cc);
goto retry;
@@ -914,7 +976,7 @@ retry:
unlock_pages:
f2fs_unlock_rpages(cc, i);
release_pages:
- f2fs_put_rpages_mapping(cc, mapping, start_idx, i);
+ f2fs_put_rpages_mapping(mapping, start_idx, i);
f2fs_destroy_compress_ctx(cc);
return ret;
}
@@ -954,6 +1016,55 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
return first_index;
}
+int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
+{
+ void *fsdata = NULL;
+ struct page *pagep;
+ int log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
+ pgoff_t start_idx = from >> (PAGE_SHIFT + log_cluster_size) <<
+ log_cluster_size;
+ int err;
+
+ err = f2fs_is_compressed_cluster(inode, start_idx);
+ if (err < 0)
+ return err;
+
+ /* truncate normal cluster */
+ if (!err)
+ return f2fs_do_truncate_blocks(inode, from, lock);
+
+ /* truncate compressed cluster */
+ err = f2fs_prepare_compress_overwrite(inode, &pagep,
+ start_idx, &fsdata);
+
+ /* should not be a normal cluster */
+ f2fs_bug_on(F2FS_I_SB(inode), err == 0);
+
+ if (err <= 0)
+ return err;
+
+ if (err > 0) {
+ struct page **rpages = fsdata;
+ int cluster_size = F2FS_I(inode)->i_cluster_size;
+ int i;
+
+ for (i = cluster_size - 1; i >= 0; i--) {
+ loff_t start = rpages[i]->index << PAGE_SHIFT;
+
+ if (from <= start) {
+ zero_user_segment(rpages[i], 0, PAGE_SIZE);
+ } else {
+ zero_user_segment(rpages[i], from - start,
+ PAGE_SIZE);
+ break;
+ }
+ }
+
+ f2fs_compress_write_end(inode, fsdata, start_idx, true);
+ }
+ return 0;
+}
+
static int f2fs_write_compressed_pages(struct compress_ctx *cc,
int *submitted,
struct writeback_control *wbc,
@@ -985,7 +1096,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
loff_t psize;
int i, err;
- if (!f2fs_trylock_op(sbi))
+ if (!IS_NOQUOTA(inode) && !f2fs_trylock_op(sbi))
return -EAGAIN;
set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
@@ -1092,7 +1203,8 @@ unlock_continue:
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
+ if (!IS_NOQUOTA(inode))
+ f2fs_unlock_op(sbi);
spin_lock(&fi->i_size_lock);
if (fi->last_disk_size < psize)
@@ -1118,7 +1230,8 @@ out_put_cic:
out_put_dnode:
f2fs_put_dnode(&dn);
out_unlock_op:
- f2fs_unlock_op(sbi);
+ if (!IS_NOQUOTA(inode))
+ f2fs_unlock_op(sbi);
return -EAGAIN;
}
@@ -1132,7 +1245,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
if (unlikely(bio->bi_status))
mapping_set_error(cic->inode->i_mapping, -EIO);
- f2fs_put_compressed_page(page);
+ f2fs_compress_free_page(page);
dec_page_count(sbi, F2FS_WB_DATA);
@@ -1293,7 +1406,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
for (i = 0; i < dic->nr_cpages; i++) {
struct page *page;
- page = f2fs_grab_page();
+ page = f2fs_compress_alloc_page();
if (!page)
goto out_free;
@@ -1313,7 +1426,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
continue;
}
- dic->tpages[i] = f2fs_grab_page();
+ dic->tpages[i] = f2fs_compress_alloc_page();
if (!dic->tpages[i])
goto out_free;
}
@@ -1335,8 +1448,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
continue;
if (!dic->tpages[i])
continue;
- unlock_page(dic->tpages[i]);
- put_page(dic->tpages[i]);
+ f2fs_compress_free_page(dic->tpages[i]);
}
kfree(dic->tpages);
}
@@ -1345,7 +1457,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
for (i = 0; i < dic->nr_cpages; i++) {
if (!dic->cpages[i])
continue;
- f2fs_put_compressed_page(dic->cpages[i]);
+ f2fs_compress_free_page(dic->cpages[i]);
}
kfree(dic->cpages);
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 072fb19555a8..326c63879ddc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -115,7 +115,8 @@ static enum count_type __read_io_type(struct page *page)
/* postprocessing steps for read bios */
enum bio_post_read_step {
STEP_DECRYPT,
- STEP_DECOMPRESS,
+ STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */
+ STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */
STEP_VERITY,
};
@@ -514,6 +515,34 @@ void f2fs_submit_bio(struct f2fs_sb_info *sbi,
__submit_bio(sbi, bio, type);
}
+static void __attach_io_flag(struct f2fs_io_info *fio)
+{
+ struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
+ unsigned int io_flag, fua_flag, meta_flag;
+
+ if (fio->type == DATA)
+ io_flag = sbi->data_io_flag;
+ else if (fio->type == NODE)
+ io_flag = sbi->node_io_flag;
+ else
+ return;
+
+ fua_flag = io_flag & temp_mask;
+ meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
+
+ /*
+ * data/node io flag bits per temp:
+ * REQ_META | REQ_FUA |
+ * 5 | 4 | 3 | 2 | 1 | 0 |
+ * Cold | Warm | Hot | Cold | Warm | Hot |
+ */
+ if ((1 << fio->temp) & meta_flag)
+ fio->op_flags |= REQ_META;
+ if ((1 << fio->temp) & fua_flag)
+ fio->op_flags |= REQ_FUA;
+}
+
static void __submit_merged_bio(struct f2fs_bio_info *io)
{
struct f2fs_io_info *fio = &io->fio;
@@ -521,6 +550,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
if (!io->bio)
return;
+ __attach_io_flag(fio);
bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
if (is_read_io(fio->op))
@@ -662,6 +692,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
if (fio->io_wbc && !is_read_io(fio->op))
wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
+ __attach_io_flag(fio);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
@@ -848,6 +879,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
alloc_new:
if (!bio) {
bio = __bio_alloc(fio, BIO_MAX_PAGES);
+ __attach_io_flag(fio);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
add_bio_entry(fio->sbi, bio, page, fio->temp);
@@ -968,7 +1000,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
if (f2fs_compressed_file(inode))
- post_read_steps |= 1 << STEP_DECOMPRESS;
+ post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
if (f2fs_need_verity(inode, first_idx))
post_read_steps |= 1 << STEP_VERITY;
@@ -1011,6 +1043,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
}
ClearPageError(page);
inc_page_count(sbi, F2FS_RD_DATA);
+ f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
__submit_bio(sbi, bio, DATA);
return 0;
}
@@ -1809,6 +1842,25 @@ static int f2fs_xattr_fiemap(struct inode *inode,
return (err < 0 ? err : 0);
}
+static loff_t max_inode_blocks(struct inode *inode)
+{
+ loff_t result = ADDRS_PER_INODE(inode);
+ loff_t leaf_count = ADDRS_PER_BLOCK(inode);
+
+ /* two direct node blocks */
+ result += (leaf_count * 2);
+
+ /* two indirect node blocks */
+ leaf_count *= NIDS_PER_BLOCK;
+ result += (leaf_count * 2);
+
+ /* one double indirect node block */
+ leaf_count *= NIDS_PER_BLOCK;
+ result += leaf_count;
+
+ return result;
+}
+
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
@@ -1818,6 +1870,8 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
int ret = 0;
+ bool compr_cluster = false;
+ unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
ret = f2fs_precache_extents(inode);
@@ -1852,6 +1906,9 @@ next:
memset(&map_bh, 0, sizeof(struct buffer_head));
map_bh.b_size = len;
+ if (compr_cluster)
+ map_bh.b_size = blk_to_logical(inode, cluster_size - 1);
+
ret = get_data_block(inode, start_blk, &map_bh, 0,
F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
if (ret)
@@ -1862,7 +1919,7 @@ next:
start_blk = next_pgofs;
if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
- F2FS_I_SB(inode)->max_file_blocks))
+ max_inode_blocks(inode)))
goto prep_next;
flags |= FIEMAP_EXTENT_LAST;
@@ -1874,11 +1931,38 @@ next:
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size, flags);
+ if (ret)
+ goto out;
+ size = 0;
}
- if (start_blk > last_blk || ret)
+ if (start_blk > last_blk)
goto out;
+ if (compr_cluster) {
+ compr_cluster = false;
+
+
+ logical = blk_to_logical(inode, start_blk - 1);
+ phys = blk_to_logical(inode, map_bh.b_blocknr);
+ size = blk_to_logical(inode, cluster_size);
+
+ flags |= FIEMAP_EXTENT_ENCODED;
+
+ start_blk += cluster_size - 1;
+
+ if (start_blk > last_blk)
+ goto out;
+
+ goto prep_next;
+ }
+
+ if (map_bh.b_blocknr == COMPRESS_ADDR) {
+ compr_cluster = true;
+ start_blk++;
+ goto prep_next;
+ }
+
logical = blk_to_logical(inode, start_blk);
phys = blk_to_logical(inode, map_bh.b_blocknr);
size = map_bh.b_size;
@@ -2016,6 +2100,7 @@ submit_and_realloc:
goto submit_and_realloc;
inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
+ f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
ClearPageError(page);
*last_block_in_bio = block_nr;
goto out;
@@ -2114,6 +2199,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
for (i = 0; i < dic->nr_cpages; i++) {
struct page *page = dic->cpages[i];
block_t blkaddr;
+ struct bio_post_read_ctx *ctx;
blkaddr = data_blkaddr(dn.inode, dn.node_page,
dn.ofs_in_node + i + 1);
@@ -2131,16 +2217,16 @@ submit_and_realloc:
page->index, for_write);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
- bio = NULL;
dic->failed = true;
if (refcount_sub_and_test(dic->nr_cpages - i,
- &dic->ref))
+ &dic->ref)) {
f2fs_decompress_end_io(dic->rpages,
cc->cluster_size, true,
false);
- f2fs_free_dic(dic);
+ f2fs_free_dic(dic);
+ }
f2fs_put_dnode(&dn);
- *bio_ret = bio;
+ *bio_ret = NULL;
return ret;
}
}
@@ -2150,7 +2236,14 @@ submit_and_realloc:
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
goto submit_and_realloc;
+ /* tag STEP_DECOMPRESS to handle IO in wq */
+ ctx = bio->bi_private;
+ if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
+ ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
+
inc_page_count(sbi, F2FS_RD_DATA);
+ f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
ClearPageError(page);
*last_block_in_bio = blkaddr;
}
@@ -2624,8 +2717,8 @@ write:
f2fs_available_free_memory(sbi, BASE_CHECK))))
goto redirty_out;
- /* Dentry blocks are controlled by checkpoint */
- if (S_ISDIR(inode->i_mode)) {
+ /* Dentry/quota blocks are controlled by checkpoint */
+ if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
fio.need_lock = LOCK_DONE;
err = f2fs_do_write_data_page(&fio);
goto done;
@@ -2767,7 +2860,6 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
- int cycled;
int range_whole = 0;
xa_mark_t tag;
int nwritten = 0;
@@ -2785,17 +2877,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
- if (index == 0)
- cycled = 1;
- else
- cycled = 0;
end = -1;
} else {
index = wbc->range_start >> PAGE_SHIFT;
end = wbc->range_end >> PAGE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
- cycled = 1; /* ignore range_cyclic tests */
}
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag = PAGECACHE_TAG_TOWRITE;
@@ -2960,12 +3047,13 @@ next:
}
}
#endif
- if ((!cycled && !done) || retry) {
- cycled = 1;
+ if (retry) {
index = 0;
- end = writeback_index - 1;
+ end = -1;
goto retry;
}
+ if (wbc->range_cyclic && !done)
+ done_index = 0;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
@@ -3494,6 +3582,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
} else if (err < 0) {
f2fs_write_failed(mapping, offset + count);
}
+ } else {
+ if (err > 0)
+ f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
}
out:
@@ -3577,6 +3668,37 @@ static int f2fs_set_data_page_dirty(struct page *page)
return 0;
}
+
+static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
+{
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct dnode_of_data dn;
+ sector_t start_idx, blknr = 0;
+ int ret;
+
+ start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
+ if (ret)
+ return 0;
+
+ if (dn.data_blkaddr != COMPRESS_ADDR) {
+ dn.ofs_in_node += block - start_idx;
+ blknr = f2fs_data_blkaddr(&dn);
+ if (!__is_valid_data_blkaddr(blknr))
+ blknr = 0;
+ }
+
+ f2fs_put_dnode(&dn);
+
+ return blknr;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
@@ -3588,6 +3710,9 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
filemap_write_and_wait(mapping);
+ if (f2fs_compressed_file(inode))
+ return f2fs_bmap_compress(inode, block);
+
return generic_block_bmap(mapping, block, get_data_block_bmap);
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 44bfc464df78..d35976785e8c 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -70,6 +70,111 @@ unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de)
return DT_UNKNOWN;
}
+/* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */
+int f2fs_init_casefolded_name(const struct inode *dir,
+ struct f2fs_filename *fname)
+{
+#ifdef CONFIG_UNICODE
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+
+ if (IS_CASEFOLDED(dir)) {
+ fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN,
+ GFP_NOFS);
+ if (!fname->cf_name.name)
+ return -ENOMEM;
+ fname->cf_name.len = utf8_casefold(sbi->s_encoding,
+ fname->usr_fname,
+ fname->cf_name.name,
+ F2FS_NAME_LEN);
+ if ((int)fname->cf_name.len <= 0) {
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+ if (f2fs_has_strict_mode(sbi))
+ return -EINVAL;
+ /* fall back to treating name as opaque byte sequence */
+ }
+ }
+#endif
+ return 0;
+}
+
+static int __f2fs_setup_filename(const struct inode *dir,
+ const struct fscrypt_name *crypt_name,
+ struct f2fs_filename *fname)
+{
+ int err;
+
+ memset(fname, 0, sizeof(*fname));
+
+ fname->usr_fname = crypt_name->usr_fname;
+ fname->disk_name = crypt_name->disk_name;
+#ifdef CONFIG_FS_ENCRYPTION
+ fname->crypto_buf = crypt_name->crypto_buf;
+#endif
+ if (crypt_name->is_ciphertext_name) {
+ /* hash was decoded from the no-key name */
+ fname->hash = cpu_to_le32(crypt_name->hash);
+ } else {
+ err = f2fs_init_casefolded_name(dir, fname);
+ if (err) {
+ f2fs_free_filename(fname);
+ return err;
+ }
+ f2fs_hash_filename(dir, fname);
+ }
+ return 0;
+}
+
+/*
+ * Prepare to search for @iname in @dir. This is similar to
+ * fscrypt_setup_filename(), but this also handles computing the casefolded name
+ * and the f2fs dirhash if needed, then packing all the information about this
+ * filename up into a 'struct f2fs_filename'.
+ */
+int f2fs_setup_filename(struct inode *dir, const struct qstr *iname,
+ int lookup, struct f2fs_filename *fname)
+{
+ struct fscrypt_name crypt_name;
+ int err;
+
+ err = fscrypt_setup_filename(dir, iname, lookup, &crypt_name);
+ if (err)
+ return err;
+
+ return __f2fs_setup_filename(dir, &crypt_name, fname);
+}
+
+/*
+ * Prepare to look up @dentry in @dir. This is similar to
+ * fscrypt_prepare_lookup(), but this also handles computing the casefolded name
+ * and the f2fs dirhash if needed, then packing all the information about this
+ * filename up into a 'struct f2fs_filename'.
+ */
+int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
+ struct f2fs_filename *fname)
+{
+ struct fscrypt_name crypt_name;
+ int err;
+
+ err = fscrypt_prepare_lookup(dir, dentry, &crypt_name);
+ if (err)
+ return err;
+
+ return __f2fs_setup_filename(dir, &crypt_name, fname);
+}
+
+void f2fs_free_filename(struct f2fs_filename *fname)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+ kfree(fname->crypto_buf.name);
+ fname->crypto_buf.name = NULL;
+#endif
+#ifdef CONFIG_UNICODE
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
+}
+
static unsigned long dir_block_index(unsigned int level,
int dir_level, unsigned int idx)
{
@@ -84,8 +189,7 @@ static unsigned long dir_block_index(unsigned int level,
static struct f2fs_dir_entry *find_in_block(struct inode *dir,
struct page *dentry_page,
- struct fscrypt_name *fname,
- f2fs_hash_t namehash,
+ const struct f2fs_filename *fname,
int *max_slots,
struct page **res_page)
{
@@ -96,7 +200,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(dir, &d, dentry_blk);
- de = f2fs_find_target_dentry(fname, namehash, max_slots, &d);
+ de = f2fs_find_target_dentry(&d, fname, max_slots);
if (de)
*res_page = dentry_page;
@@ -107,112 +211,57 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for.
- *
- * Returns: 0 if the directory entry matches, more than 0 if it
- * doesn't match or less than zero on error.
*/
-int f2fs_ci_compare(const struct inode *parent, const struct qstr *name,
- const struct qstr *entry, bool quick)
+static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
+ const u8 *de_name, u32 de_name_len)
{
- const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb);
+ const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
const struct unicode_map *um = sbi->s_encoding;
- int ret;
-
- if (quick)
- ret = utf8_strncasecmp_folded(um, name, entry);
- else
- ret = utf8_strncasecmp(um, name, entry);
+ struct qstr entry = QSTR_INIT(de_name, de_name_len);
+ int res;
- if (ret < 0) {
- /* Handle invalid character sequence as either an error
- * or as an opaque byte sequence.
+ res = utf8_strncasecmp_folded(um, name, &entry);
+ if (res < 0) {
+ /*
+ * In strict mode, ignore invalid names. In non-strict mode,
+ * fall back to treating them as opaque byte sequences.
*/
- if (f2fs_has_strict_mode(sbi))
- return -EINVAL;
-
- if (name->len != entry->len)
- return 1;
-
- return !!memcmp(name->name, entry->name, name->len);
+ if (f2fs_has_strict_mode(sbi) || name->len != entry.len)
+ return false;
+ return !memcmp(name->name, entry.name, name->len);
}
-
- return ret;
+ return res == 0;
}
+#endif /* CONFIG_UNICODE */
-static void f2fs_fname_setup_ci_filename(struct inode *dir,
- const struct qstr *iname,
- struct fscrypt_str *cf_name)
+static inline bool f2fs_match_name(const struct inode *dir,
+ const struct f2fs_filename *fname,
+ const u8 *de_name, u32 de_name_len)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
-
- if (!IS_CASEFOLDED(dir)) {
- cf_name->name = NULL;
- return;
- }
+ struct fscrypt_name f;
- cf_name->name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, GFP_NOFS);
- if (!cf_name->name)
- return;
-
- cf_name->len = utf8_casefold(sbi->s_encoding,
- iname, cf_name->name,
- F2FS_NAME_LEN);
- if ((int)cf_name->len <= 0) {
- kvfree(cf_name->name);
- cf_name->name = NULL;
- }
-}
-#endif
-
-static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d,
- struct f2fs_dir_entry *de,
- struct fscrypt_name *fname,
- struct fscrypt_str *cf_str,
- unsigned long bit_pos,
- f2fs_hash_t namehash)
-{
#ifdef CONFIG_UNICODE
- struct inode *parent = d->inode;
- struct f2fs_sb_info *sbi = F2FS_I_SB(parent);
- struct qstr entry;
-#endif
-
- if (de->hash_code != namehash)
- return false;
+ if (fname->cf_name.name) {
+ struct qstr cf = FSTR_TO_QSTR(&fname->cf_name);
-#ifdef CONFIG_UNICODE
- entry.name = d->filename[bit_pos];
- entry.len = de->name_len;
-
- if (sbi->s_encoding && IS_CASEFOLDED(parent)) {
- if (cf_str->name) {
- struct qstr cf = {.name = cf_str->name,
- .len = cf_str->len};
- return !f2fs_ci_compare(parent, &cf, &entry, true);
- }
- return !f2fs_ci_compare(parent, fname->usr_fname, &entry,
- false);
+ return f2fs_match_ci_name(dir, &cf, de_name, de_name_len);
}
#endif
- if (fscrypt_match_name(fname, d->filename[bit_pos],
- le16_to_cpu(de->name_len)))
- return true;
- return false;
+ f.usr_fname = fname->usr_fname;
+ f.disk_name = fname->disk_name;
+#ifdef CONFIG_FS_ENCRYPTION
+ f.crypto_buf = fname->crypto_buf;
+#endif
+ return fscrypt_match_name(&f, de_name, de_name_len);
}
-struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
- f2fs_hash_t namehash, int *max_slots,
- struct f2fs_dentry_ptr *d)
+struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
+ const struct f2fs_filename *fname, int *max_slots)
{
struct f2fs_dir_entry *de;
- struct fscrypt_str cf_str = { .name = NULL, .len = 0 };
unsigned long bit_pos = 0;
int max_len = 0;
-#ifdef CONFIG_UNICODE
- f2fs_fname_setup_ci_filename(d->inode, fname->usr_fname, &cf_str);
-#endif
-
if (max_slots)
*max_slots = 0;
while (bit_pos < d->max) {
@@ -229,7 +278,9 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
continue;
}
- if (f2fs_match_name(d, de, fname, &cf_str, bit_pos, namehash))
+ if (de->hash_code == fname->hash &&
+ f2fs_match_name(d->inode, fname, d->filename[bit_pos],
+ le16_to_cpu(de->name_len)))
goto found;
if (max_slots && max_len > *max_slots)
@@ -243,33 +294,27 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
found:
if (max_slots && max_len > *max_slots)
*max_slots = max_len;
-
-#ifdef CONFIG_UNICODE
- kvfree(cf_str.name);
-#endif
return de;
}
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
unsigned int level,
- struct fscrypt_name *fname,
+ const struct f2fs_filename *fname,
struct page **res_page)
{
- struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
- int s = GET_DENTRY_SLOTS(name.len);
+ int s = GET_DENTRY_SLOTS(fname->disk_name.len);
unsigned int nbucket, nblock;
unsigned int bidx, end_block;
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
bool room = false;
int max_slots;
- f2fs_hash_t namehash = f2fs_dentry_hash(dir, &name, fname);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
- le32_to_cpu(namehash) % nbucket);
+ le32_to_cpu(fname->hash) % nbucket);
end_block = bidx + nblock;
for (; bidx < end_block; bidx++) {
@@ -285,8 +330,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
}
- de = find_in_block(dir, dentry_page, fname, namehash,
- &max_slots, res_page);
+ de = find_in_block(dir, dentry_page, fname, &max_slots,
+ res_page);
if (de)
break;
@@ -295,8 +340,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
f2fs_put_page(dentry_page, 0);
}
- if (!de && room && F2FS_I(dir)->chash != namehash) {
- F2FS_I(dir)->chash = namehash;
+ if (!de && room && F2FS_I(dir)->chash != fname->hash) {
+ F2FS_I(dir)->chash = fname->hash;
F2FS_I(dir)->clevel = level;
}
@@ -304,7 +349,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
- struct fscrypt_name *fname, struct page **res_page)
+ const struct f2fs_filename *fname,
+ struct page **res_page)
{
unsigned long npages = dir_blocks(dir);
struct f2fs_dir_entry *de = NULL;
@@ -353,18 +399,10 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
const struct qstr *child, struct page **res_page)
{
struct f2fs_dir_entry *de = NULL;
- struct fscrypt_name fname;
+ struct f2fs_filename fname;
int err;
-#ifdef CONFIG_UNICODE
- if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) &&
- utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) {
- *res_page = ERR_PTR(-EINVAL);
- return NULL;
- }
-#endif
-
- err = fscrypt_setup_filename(dir, child, 1, &fname);
+ err = f2fs_setup_filename(dir, child, 1, &fname);
if (err) {
if (err == -ENOENT)
*res_page = NULL;
@@ -375,7 +413,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
de = __f2fs_find_entry(dir, &fname, res_page);
- fscrypt_free_filename(&fname);
+ f2fs_free_filename(&fname);
return de;
}
@@ -416,7 +454,8 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
f2fs_put_page(page, 1);
}
-static void init_dent_inode(const struct qstr *name, struct page *ipage)
+static void init_dent_inode(const struct f2fs_filename *fname,
+ struct page *ipage)
{
struct f2fs_inode *ri;
@@ -424,16 +463,16 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
- ri->i_namelen = cpu_to_le32(name->len);
- memcpy(ri->i_name, name->name, name->len);
+ ri->i_namelen = cpu_to_le32(fname->disk_name.len);
+ memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len);
set_page_dirty(ipage);
}
void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
- struct qstr dot = QSTR_INIT(".", 1);
- struct qstr dotdot = QSTR_INIT("..", 2);
+ struct fscrypt_str dot = FSTR_INIT(".", 1);
+ struct fscrypt_str dotdot = FSTR_INIT("..", 2);
/* update dirent of "." */
f2fs_update_dentry(inode->i_ino, inode->i_mode, d, &dot, 0, 0);
@@ -467,8 +506,7 @@ static int make_empty_dir(struct inode *inode,
}
struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
- const struct qstr *new_name, const struct qstr *orig_name,
- struct page *dpage)
+ const struct f2fs_filename *fname, struct page *dpage)
{
struct page *page;
int err;
@@ -493,7 +531,8 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
if (err)
goto put_error;
- err = f2fs_init_security(inode, dir, orig_name, page);
+ err = f2fs_init_security(inode, dir,
+ fname ? fname->usr_fname : NULL, page);
if (err)
goto put_error;
@@ -508,8 +547,8 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
return page;
}
- if (new_name) {
- init_dent_inode(new_name, page);
+ if (fname) {
+ init_dent_inode(fname, page);
if (IS_ENCRYPTED(dir))
file_set_enc_name(inode);
}
@@ -577,11 +616,11 @@ next:
}
bool f2fs_has_enough_room(struct inode *dir, struct page *ipage,
- struct fscrypt_name *fname)
+ const struct f2fs_filename *fname)
{
struct f2fs_dentry_ptr d;
unsigned int bit_pos;
- int slots = GET_DENTRY_SLOTS(fname_len(fname));
+ int slots = GET_DENTRY_SLOTS(fname->disk_name.len);
make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ipage));
@@ -591,8 +630,8 @@ bool f2fs_has_enough_room(struct inode *dir, struct page *ipage,
}
void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
- const struct qstr *name, f2fs_hash_t name_hash,
- unsigned int bit_pos)
+ const struct fscrypt_str *name, f2fs_hash_t name_hash,
+ unsigned int bit_pos)
{
struct f2fs_dir_entry *de;
int slots = GET_DENTRY_SLOTS(name->len);
@@ -612,15 +651,13 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
}
}
-int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
- const struct qstr *orig_name,
- struct inode *inode, nid_t ino, umode_t mode)
+int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname,
+ struct inode *inode, nid_t ino, umode_t mode)
{
unsigned int bit_pos;
unsigned int level;
unsigned int current_depth;
unsigned long bidx, block;
- f2fs_hash_t dentry_hash;
unsigned int nbucket, nblock;
struct page *dentry_page = NULL;
struct f2fs_dentry_block *dentry_blk = NULL;
@@ -629,11 +666,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
int slots, err = 0;
level = 0;
- slots = GET_DENTRY_SLOTS(new_name->len);
- dentry_hash = f2fs_dentry_hash(dir, new_name, NULL);
+ slots = GET_DENTRY_SLOTS(fname->disk_name.len);
current_depth = F2FS_I(dir)->i_current_depth;
- if (F2FS_I(dir)->chash == dentry_hash) {
+ if (F2FS_I(dir)->chash == fname->hash) {
level = F2FS_I(dir)->clevel;
F2FS_I(dir)->chash = 0;
}
@@ -655,7 +691,7 @@ start:
nblock = bucket_blocks(level);
bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
- (le32_to_cpu(dentry_hash) % nbucket));
+ (le32_to_cpu(fname->hash) % nbucket));
for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = f2fs_get_new_data_page(dir, NULL, block, true);
@@ -679,8 +715,7 @@ add_dentry:
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = f2fs_init_inode_metadata(inode, dir, new_name,
- orig_name, NULL);
+ page = f2fs_init_inode_metadata(inode, dir, fname, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -688,7 +723,8 @@ add_dentry:
}
make_dentry_ptr_block(NULL, &d, dentry_blk);
- f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos);
+ f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash,
+ bit_pos);
set_page_dirty(dentry_page);
@@ -712,21 +748,15 @@ fail:
return err;
}
-int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname,
- struct inode *inode, nid_t ino, umode_t mode)
+int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname,
+ struct inode *inode, nid_t ino, umode_t mode)
{
- struct qstr new_name;
int err = -EAGAIN;
- new_name.name = fname_name(fname);
- new_name.len = fname_len(fname);
-
if (f2fs_has_inline_dentry(dir))
- err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname,
- inode, ino, mode);
+ err = f2fs_add_inline_entry(dir, fname, inode, ino, mode);
if (err == -EAGAIN)
- err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname,
- inode, ino, mode);
+ err = f2fs_add_regular_entry(dir, fname, inode, ino, mode);
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
return err;
@@ -739,12 +769,12 @@ int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname,
int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
- struct fscrypt_name fname;
+ struct f2fs_filename fname;
struct page *page = NULL;
struct f2fs_dir_entry *de = NULL;
int err;
- err = fscrypt_setup_filename(dir, name, 0, &fname);
+ err = f2fs_setup_filename(dir, name, 0, &fname);
if (err)
return err;
@@ -767,7 +797,7 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
} else {
err = f2fs_add_dentry(dir, &fname, inode, ino, mode);
}
- fscrypt_free_filename(&fname);
+ f2fs_free_filename(&fname);
return err;
}
@@ -777,7 +807,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
int err = 0;
down_write(&F2FS_I(inode)->i_sem);
- page = f2fs_init_inode_metadata(inode, dir, NULL, NULL, NULL);
+ page = f2fs_init_inode_metadata(inode, dir, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -1080,17 +1110,41 @@ const struct file_operations f2fs_dir_operations = {
static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
{
- struct qstr qstr = {.name = str, .len = len };
const struct dentry *parent = READ_ONCE(dentry->d_parent);
- const struct inode *inode = READ_ONCE(parent->d_inode);
+ const struct inode *dir = READ_ONCE(parent->d_inode);
+ const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+ struct qstr entry = QSTR_INIT(str, len);
+ char strbuf[DNAME_INLINE_LEN];
+ int res;
+
+ if (!dir || !IS_CASEFOLDED(dir))
+ goto fallback;
- if (!inode || !IS_CASEFOLDED(inode)) {
- if (len != name->len)
- return -1;
- return memcmp(str, name->name, len);
+ /*
+ * If the dentry name is stored in-line, then it may be concurrently
+ * modified by a rename. If this happens, the VFS will eventually retry
+ * the lookup, so it doesn't matter what ->d_compare() returns.
+ * However, it's unsafe to call utf8_strncasecmp() with an unstable
+ * string. Therefore, we have to copy the name into a temporary buffer.
+ */
+ if (len <= DNAME_INLINE_LEN - 1) {
+ memcpy(strbuf, str, len);
+ strbuf[len] = 0;
+ entry.name = strbuf;
+ /* prevent compiler from optimizing out the temporary buffer */
+ barrier();
}
- return f2fs_ci_compare(inode, name, &qstr, false);
+ res = utf8_strncasecmp(sbi->s_encoding, name, &entry);
+ if (res >= 0)
+ return res;
+
+ if (f2fs_has_strict_mode(sbi))
+ return -EINVAL;
+fallback:
+ if (len != name->len)
+ return 1;
+ return !!memcmp(str, name->name, len);
}
static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5c0149d2f46a..b35a50f4953c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/f2fs.h
*
@@ -139,6 +139,7 @@ struct f2fs_mount_info {
int fs_mode; /* fs mode: LFS or ADAPTIVE */
int bggc_mode; /* bggc mode: off, on or sync */
struct fscrypt_dummy_context dummy_enc_ctx; /* test dummy encryption */
+ block_t unusable_cap_perc; /* percentage for cap */
block_t unusable_cap; /* Amount of space allowed to be
* unusable when disabling checkpoint
*/
@@ -194,6 +195,7 @@ enum {
#define CP_DISCARD 0x00000010
#define CP_TRIMMED 0x00000020
#define CP_PAUSE 0x00000040
+#define CP_RESIZE 0x00000080
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
@@ -428,6 +430,10 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64)
#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64)
+#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS \
+ _IOR(F2FS_IOCTL_MAGIC, 18, __u64)
+#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \
+ _IOR(F2FS_IOCTL_MAGIC, 19, __u64)
#define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL
#define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL
@@ -506,6 +512,42 @@ static inline int get_inline_xattr_addrs(struct inode *inode);
* For INODE and NODE manager
*/
/* for directory operations */
+
+struct f2fs_filename {
+ /*
+ * The filename the user specified. This is NULL for some
+ * filesystem-internal operations, e.g. converting an inline directory
+ * to a non-inline one, or roll-forward recovering an encrypted dentry.
+ */
+ const struct qstr *usr_fname;
+
+ /*
+ * The on-disk filename. For encrypted directories, this is encrypted.
+ * This may be NULL for lookups in an encrypted dir without the key.
+ */
+ struct fscrypt_str disk_name;
+
+ /* The dirhash of this filename */
+ f2fs_hash_t hash;
+
+#ifdef CONFIG_FS_ENCRYPTION
+ /*
+ * For lookups in encrypted directories: either the buffer backing
+ * disk_name, or a buffer that holds the decoded no-key name.
+ */
+ struct fscrypt_str crypto_buf;
+#endif
+#ifdef CONFIG_UNICODE
+ /*
+ * For casefolded directories: the casefolded name, but it's left NULL
+ * if the original name is not valid Unicode or if the filesystem is
+ * doing an internal operation where usr_fname is also NULL. In these
+ * cases we fall back to treating the name as an opaque byte sequence.
+ */
+ struct fscrypt_str cf_name;
+#endif
+};
+
struct f2fs_dentry_ptr {
struct inode *inode;
void *bitmap;
@@ -1088,8 +1130,9 @@ enum cp_reason_type {
};
enum iostat_type {
- APP_DIRECT_IO, /* app direct IOs */
- APP_BUFFERED_IO, /* app buffered IOs */
+ /* WRITE IO */
+ APP_DIRECT_IO, /* app direct write IOs */
+ APP_BUFFERED_IO, /* app buffered write IOs */
APP_WRITE_IO, /* app write IOs */
APP_MAPPED_IO, /* app mapped IOs */
FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */
@@ -1100,6 +1143,19 @@ enum iostat_type {
FS_CP_DATA_IO, /* data IOs from checkpoint */
FS_CP_NODE_IO, /* node IOs from checkpoint */
FS_CP_META_IO, /* meta IOs from checkpoint */
+
+ /* READ IO */
+ APP_DIRECT_READ_IO, /* app direct read IOs */
+ APP_BUFFERED_READ_IO, /* app buffered read IOs */
+ APP_READ_IO, /* app read IOs */
+ APP_MAPPED_READ_IO, /* app mapped read IOs */
+ FS_DATA_READ_IO, /* data read IOs */
+ FS_GDATA_READ_IO, /* data read IOs from background gc */
+ FS_CDATA_READ_IO, /* compressed data read IOs */
+ FS_NODE_READ_IO, /* node read IOs */
+ FS_META_READ_IO, /* meta read IOs */
+
+ /* other */
FS_DISCARD, /* discard */
NR_IO_TYPE,
};
@@ -1269,6 +1325,7 @@ enum compress_algorithm_type {
COMPRESS_LZO,
COMPRESS_LZ4,
COMPRESS_ZSTD,
+ COMPRESS_LZORLE,
COMPRESS_MAX,
};
@@ -1418,7 +1475,6 @@ struct f2fs_sb_info {
unsigned int segs_per_sec; /* segments per section */
unsigned int secs_per_zone; /* sections per zone */
unsigned int total_sections; /* total section count */
- struct mutex resize_mutex; /* for resize exclusion */
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
loff_t max_file_blocks; /* max block index of file */
@@ -1504,8 +1560,15 @@ struct f2fs_sb_info {
/* For app/fs IO statistics */
spinlock_t iostat_lock;
- unsigned long long write_iostat[NR_IO_TYPE];
+ unsigned long long rw_iostat[NR_IO_TYPE];
+ unsigned long long prev_rw_iostat[NR_IO_TYPE];
bool iostat_enable;
+ unsigned long iostat_next_period;
+ unsigned int iostat_period_ms;
+
+ /* to attach REQ_META|REQ_FUA flags */
+ unsigned int data_io_flag;
+ unsigned int node_io_flag;
/* For sysfs suppport */
struct kobject s_kobj;
@@ -2902,12 +2965,12 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
return is_set_ckpt_flags(sbi, CP_ERROR_FLAG);
}
-static inline bool is_dot_dotdot(const struct qstr *str)
+static inline bool is_dot_dotdot(const u8 *name, size_t len)
{
- if (str->len == 1 && str->name[0] == '.')
+ if (len == 1 && name[0] == '.')
return true;
- if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
+ if (len == 2 && name[0] == '.' && name[1] == '.')
return true;
return false;
@@ -2935,18 +2998,12 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
- void *ret;
-
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(sbi, FAULT_KMALLOC);
return NULL;
}
- ret = kmalloc(size, flags);
- if (ret)
- return ret;
-
- return kvmalloc(size, flags);
+ return kmalloc(size, flags);
}
static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
@@ -2996,29 +3053,45 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
sizeof((f2fs_inode)->field)) \
<= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \
+#define DEFAULT_IOSTAT_PERIOD_MS 3000
+#define MIN_IOSTAT_PERIOD_MS 100
+/* maximum period of iostat tracing is 1 day */
+#define MAX_IOSTAT_PERIOD_MS 8640000
+
static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
int i;
spin_lock(&sbi->iostat_lock);
- for (i = 0; i < NR_IO_TYPE; i++)
- sbi->write_iostat[i] = 0;
+ for (i = 0; i < NR_IO_TYPE; i++) {
+ sbi->rw_iostat[i] = 0;
+ sbi->prev_rw_iostat[i] = 0;
+ }
spin_unlock(&sbi->iostat_lock);
}
+extern void f2fs_record_iostat(struct f2fs_sb_info *sbi);
+
static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
enum iostat_type type, unsigned long long io_bytes)
{
if (!sbi->iostat_enable)
return;
spin_lock(&sbi->iostat_lock);
- sbi->write_iostat[type] += io_bytes;
+ sbi->rw_iostat[type] += io_bytes;
if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
- sbi->write_iostat[APP_BUFFERED_IO] =
- sbi->write_iostat[APP_WRITE_IO] -
- sbi->write_iostat[APP_DIRECT_IO];
+ sbi->rw_iostat[APP_BUFFERED_IO] =
+ sbi->rw_iostat[APP_WRITE_IO] -
+ sbi->rw_iostat[APP_DIRECT_IO];
+
+ if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
+ sbi->rw_iostat[APP_BUFFERED_READ_IO] =
+ sbi->rw_iostat[APP_READ_IO] -
+ sbi->rw_iostat[APP_DIRECT_READ_IO];
spin_unlock(&sbi->iostat_lock);
+
+ f2fs_record_iostat(sbi);
}
#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
@@ -3064,6 +3137,7 @@ static inline void f2fs_clear_page_private(struct page *page)
*/
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
+int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
int f2fs_getattr(const struct path *path, struct kstat *stat,
@@ -3099,31 +3173,32 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set);
struct dentry *f2fs_get_parent(struct dentry *child);
-extern int f2fs_ci_compare(const struct inode *parent,
- const struct qstr *name,
- const struct qstr *entry,
- bool quick);
-
/*
* dir.c
*/
unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de);
-struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname,
- f2fs_hash_t namehash, int *max_slots,
- struct f2fs_dentry_ptr *d);
+int f2fs_init_casefolded_name(const struct inode *dir,
+ struct f2fs_filename *fname);
+int f2fs_setup_filename(struct inode *dir, const struct qstr *iname,
+ int lookup, struct f2fs_filename *fname);
+int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
+ struct f2fs_filename *fname);
+void f2fs_free_filename(struct f2fs_filename *fname);
+struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
+ const struct f2fs_filename *fname, int *max_slots);
int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr);
void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d);
struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
- const struct qstr *new_name,
- const struct qstr *orig_name, struct page *dpage);
+ const struct f2fs_filename *fname, struct page *dpage);
void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth);
int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots);
void f2fs_drop_nlink(struct inode *dir, struct inode *inode);
struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
- struct fscrypt_name *fname, struct page **res_page);
+ const struct f2fs_filename *fname,
+ struct page **res_page);
struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
const struct qstr *child, struct page **res_page);
struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p);
@@ -3132,14 +3207,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr,
void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode);
bool f2fs_has_enough_room(struct inode *dir, struct page *ipage,
- struct fscrypt_name *fname);
+ const struct f2fs_filename *fname);
void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
- const struct qstr *name, f2fs_hash_t name_hash,
+ const struct fscrypt_str *name, f2fs_hash_t name_hash,
unsigned int bit_pos);
-int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
- const struct qstr *orig_name,
+int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname,
struct inode *inode, nid_t ino, umode_t mode);
-int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname,
+int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname,
struct inode *inode, nid_t ino, umode_t mode);
int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode);
@@ -3169,8 +3243,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
- const struct qstr *name_info, struct fscrypt_name *fname);
+void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname);
/*
* node.c
@@ -3202,6 +3275,7 @@ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
struct page *f2fs_get_node_page_ra(struct page *parent, int start);
int f2fs_move_node_page(struct page *node_page, int gc_type);
+int f2fs_flush_inline_data(struct f2fs_sb_info *sbi);
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic,
unsigned int *seq_id);
@@ -3645,7 +3719,7 @@ static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
static inline void __init f2fs_create_root_stats(void) { }
static inline void f2fs_destroy_root_stats(void) { }
-static inline void update_sit_info(struct f2fs_sb_info *sbi) {}
+static inline void f2fs_update_sit_info(struct f2fs_sb_info *sbi) {}
#endif
extern const struct file_operations f2fs_dir_operations;
@@ -3678,11 +3752,11 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry);
int f2fs_write_inline_data(struct inode *inode, struct page *page);
bool f2fs_recover_inline_data(struct inode *inode, struct page *npage);
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
- struct fscrypt_name *fname, struct page **res_page);
+ const struct f2fs_filename *fname,
+ struct page **res_page);
int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage);
-int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
- const struct qstr *orig_name,
+int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
struct inode *inode, nid_t ino, umode_t mode);
void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry,
struct page *page, struct inode *dir,
@@ -3781,8 +3855,11 @@ int f2fs_prepare_compress_overwrite(struct inode *inode,
struct page **pagep, pgoff_t index, void **fsdata);
bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
pgoff_t index, unsigned copied);
+int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock);
void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
bool f2fs_is_compress_backend_ready(struct inode *inode);
+int f2fs_init_compress_mempool(void);
+void f2fs_destroy_compress_mempool(void);
void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity);
bool f2fs_cluster_is_empty(struct compress_ctx *cc);
bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
@@ -3816,6 +3893,8 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
}
+static inline int f2fs_init_compress_mempool(void) { return 0; }
+static inline void f2fs_destroy_compress_mempool(void) { }
#endif
static inline void set_compress_context(struct inode *inode)
@@ -3962,6 +4041,10 @@ static inline void f2fs_i_compr_blocks_update(struct inode *inode,
{
int diff = F2FS_I(inode)->i_cluster_size - blocks;
+ /* don't update i_compr_blocks if saved blocks were released */
+ if (!add && !F2FS_I(inode)->i_compr_blocks)
+ return;
+
if (add) {
F2FS_I(inode)->i_compr_blocks += diff;
stat_add_compr_blocks(inode, diff);
@@ -4003,8 +4086,6 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
return true;
if (f2fs_is_multi_device(sbi))
return true;
- if (f2fs_compressed_file(inode))
- return true;
/*
* for blkzoned device, fallback direct IO to buffered IO, so
* all IOs can be serialized by log-structured write.
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6ab8f621a3c5..3268f8dd59bb 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -40,6 +40,10 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
ret = filemap_fault(vmf);
up_read(&F2FS_I(inode)->i_mmap_sem);
+ if (!ret)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO,
+ F2FS_BLKSIZE);
+
trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret);
return ret;
@@ -165,9 +169,11 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
{
struct dentry *dentry;
- inode = igrab(inode);
- dentry = d_find_any_alias(inode);
- iput(inode);
+ /*
+ * Make sure to get the non-deleted alias. The alias associated with
+ * the open file descriptor being fsync()'ed may be deleted already.
+ */
+ dentry = d_find_alias(inode);
if (!dentry)
return 0;
@@ -557,6 +563,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
bool compressed_cluster = false;
int cluster_index = 0, valid_blocks = 0;
int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
+ bool released = !F2FS_I(dn->inode)->i_compr_blocks;
if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
base = get_extra_isize(dn->inode);
@@ -595,7 +602,9 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
f2fs_invalidate_blocks(sbi, blkaddr);
- nr_free++;
+
+ if (!released || blkaddr != COMPRESS_ADDR)
+ nr_free++;
}
if (compressed_cluster)
@@ -643,9 +652,6 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
return 0;
}
- if (f2fs_compressed_file(inode))
- return 0;
-
page = f2fs_get_lock_data_page(inode, index, true);
if (IS_ERR(page))
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
@@ -661,7 +667,7 @@ truncate_out:
return 0;
}
-static int do_truncate_blocks(struct inode *inode, u64 from, bool lock)
+int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
@@ -729,23 +735,28 @@ free_partial:
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
{
u64 free_from = from;
+ int err;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
/*
* for compressed file, only support cluster size
* aligned truncation.
*/
- if (f2fs_compressed_file(inode)) {
- size_t cluster_shift = PAGE_SHIFT +
- F2FS_I(inode)->i_log_cluster_size;
- size_t cluster_mask = (1 << cluster_shift) - 1;
+ if (f2fs_compressed_file(inode))
+ free_from = round_up(from,
+ F2FS_I(inode)->i_cluster_size << PAGE_SHIFT);
+#endif
- free_from = from >> cluster_shift;
- if (from & cluster_mask)
- free_from++;
- free_from <<= cluster_shift;
- }
+ err = f2fs_do_truncate_blocks(inode, free_from, lock);
+ if (err)
+ return err;
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (from != free_from)
+ err = f2fs_truncate_partial_cluster(inode, from, lock);
+#endif
- return do_truncate_blocks(inode, free_from, lock);
+ return err;
}
int f2fs_truncate(struct inode *inode)
@@ -968,9 +979,7 @@ const struct inode_operations f2fs_file_inode_operations = {
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
.set_acl = f2fs_set_acl,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
.fiemap = f2fs_fiemap,
};
@@ -1649,7 +1658,11 @@ next_alloc:
down_write(&sbi->pin_sem);
map.m_seg_type = CURSEG_COLD_DATA_PINNED;
+
+ f2fs_lock_op(sbi);
f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA);
+ f2fs_unlock_op(sbi);
+
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
up_write(&sbi->pin_sem);
@@ -2219,8 +2232,15 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (in != F2FS_GOING_DOWN_FULLSYNC) {
ret = mnt_want_write_file(filp);
- if (ret)
+ if (ret) {
+ if (ret == -EROFS) {
+ ret = 0;
+ f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
+ trace_f2fs_shutdown(sbi, in, ret);
+ }
return ret;
+ }
}
switch (in) {
@@ -3301,7 +3321,6 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
__u64 block_count;
- int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -3313,9 +3332,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
sizeof(block_count)))
return -EFAULT;
- ret = f2fs_resize_fs(sbi, block_count);
-
- return ret;
+ return f2fs_resize_fs(sbi, block_count);
}
static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
@@ -3419,6 +3436,326 @@ static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg)
return put_user(blocks, (u64 __user *)arg);
}
+static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ unsigned int released_blocks = 0;
+ int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
+ block_t blkaddr;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + i);
+
+ if (!__is_valid_data_blkaddr(blkaddr))
+ continue;
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE)))
+ return -EFSCORRUPTED;
+ }
+
+ while (count) {
+ int compr_blocks = 0;
+
+ for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
+ blkaddr = f2fs_data_blkaddr(dn);
+
+ if (i == 0) {
+ if (blkaddr == COMPRESS_ADDR)
+ continue;
+ dn->ofs_in_node += cluster_size;
+ goto next;
+ }
+
+ if (__is_valid_data_blkaddr(blkaddr))
+ compr_blocks++;
+
+ if (blkaddr != NEW_ADDR)
+ continue;
+
+ dn->data_blkaddr = NULL_ADDR;
+ f2fs_set_data_blkaddr(dn);
+ }
+
+ f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false);
+ dec_valid_block_count(sbi, dn->inode,
+ cluster_size - compr_blocks);
+
+ released_blocks += cluster_size - compr_blocks;
+next:
+ count -= cluster_size;
+ }
+
+ return released_blocks;
+}
+
+static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ pgoff_t page_idx = 0, last_idx;
+ unsigned int released_blocks = 0;
+ int ret;
+ int writecount;
+
+ if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+ return -EOPNOTSUPP;
+
+ if (!f2fs_compressed_file(inode))
+ return -EINVAL;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+ inode_lock(inode);
+
+ writecount = atomic_read(&inode->i_writecount);
+ if ((filp->f_mode & FMODE_WRITE && writecount != 1) || writecount) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (IS_IMMUTABLE(inode)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+ if (ret)
+ goto out;
+
+ if (!F2FS_I(inode)->i_compr_blocks)
+ goto out;
+
+ F2FS_I(inode)->i_flags |= F2FS_IMMUTABLE_FL;
+ f2fs_set_inode_flags(inode);
+ inode->i_ctime = current_time(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+ while (page_idx < last_idx) {
+ struct dnode_of_data dn;
+ pgoff_t end_offset, count;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
+ if (ret) {
+ if (ret == -ENOENT) {
+ page_idx = f2fs_get_next_page_offset(&dn,
+ page_idx);
+ ret = 0;
+ continue;
+ }
+ break;
+ }
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+ count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
+ count = round_up(count, F2FS_I(inode)->i_cluster_size);
+
+ ret = release_compress_blocks(&dn, count);
+
+ f2fs_put_dnode(&dn);
+
+ if (ret < 0)
+ break;
+
+ page_idx += count;
+ released_blocks += ret;
+ }
+
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+out:
+ inode_unlock(inode);
+
+ mnt_drop_write_file(filp);
+
+ if (ret >= 0) {
+ ret = put_user(released_blocks, (u64 __user *)arg);
+ } else if (released_blocks && F2FS_I(inode)->i_compr_blocks) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
+ "iblocks=%llu, released=%u, compr_blocks=%llu, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, inode->i_blocks,
+ released_blocks,
+ F2FS_I(inode)->i_compr_blocks);
+ }
+
+ return ret;
+}
+
+static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ unsigned int reserved_blocks = 0;
+ int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
+ block_t blkaddr;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + i);
+
+ if (!__is_valid_data_blkaddr(blkaddr))
+ continue;
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE)))
+ return -EFSCORRUPTED;
+ }
+
+ while (count) {
+ int compr_blocks = 0;
+ blkcnt_t reserved;
+ int ret;
+
+ for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
+ blkaddr = f2fs_data_blkaddr(dn);
+
+ if (i == 0) {
+ if (blkaddr == COMPRESS_ADDR)
+ continue;
+ dn->ofs_in_node += cluster_size;
+ goto next;
+ }
+
+ if (__is_valid_data_blkaddr(blkaddr)) {
+ compr_blocks++;
+ continue;
+ }
+
+ dn->data_blkaddr = NEW_ADDR;
+ f2fs_set_data_blkaddr(dn);
+ }
+
+ reserved = cluster_size - compr_blocks;
+ ret = inc_valid_block_count(sbi, dn->inode, &reserved);
+ if (ret)
+ return ret;
+
+ if (reserved != cluster_size - compr_blocks)
+ return -ENOSPC;
+
+ f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
+
+ reserved_blocks += reserved;
+next:
+ count -= cluster_size;
+ }
+
+ return reserved_blocks;
+}
+
+static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ pgoff_t page_idx = 0, last_idx;
+ unsigned int reserved_blocks = 0;
+ int ret;
+
+ if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+ return -EOPNOTSUPP;
+
+ if (!f2fs_compressed_file(inode))
+ return -EINVAL;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ if (F2FS_I(inode)->i_compr_blocks)
+ goto out;
+
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+ inode_lock(inode);
+
+ if (!IS_IMMUTABLE(inode)) {
+ ret = -EINVAL;
+ goto unlock_inode;
+ }
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+ while (page_idx < last_idx) {
+ struct dnode_of_data dn;
+ pgoff_t end_offset, count;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
+ if (ret) {
+ if (ret == -ENOENT) {
+ page_idx = f2fs_get_next_page_offset(&dn,
+ page_idx);
+ ret = 0;
+ continue;
+ }
+ break;
+ }
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+ count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
+ count = round_up(count, F2FS_I(inode)->i_cluster_size);
+
+ ret = reserve_compress_blocks(&dn, count);
+
+ f2fs_put_dnode(&dn);
+
+ if (ret < 0)
+ break;
+
+ page_idx += count;
+ reserved_blocks += ret;
+ }
+
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+
+ if (ret >= 0) {
+ F2FS_I(inode)->i_flags &= ~F2FS_IMMUTABLE_FL;
+ f2fs_set_inode_flags(inode);
+ inode->i_ctime = current_time(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
+ }
+unlock_inode:
+ inode_unlock(inode);
+out:
+ mnt_drop_write_file(filp);
+
+ if (ret >= 0) {
+ ret = put_user(reserved_blocks, (u64 __user *)arg);
+ } else if (reserved_blocks && F2FS_I(inode)->i_compr_blocks) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
+ "iblocks=%llu, reserved=%u, compr_blocks=%llu, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, inode->i_blocks,
+ reserved_blocks,
+ F2FS_I(inode)->i_compr_blocks);
+ }
+
+ return ret;
+}
+
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -3501,6 +3838,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_set_volume_name(filp, arg);
case F2FS_IOC_GET_COMPRESS_BLOCKS:
return f2fs_get_compress_blocks(filp, arg);
+ case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
+ return f2fs_release_compress_blocks(filp, arg);
+ case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
+ return f2fs_reserve_compress_blocks(filp, arg);
default:
return -ENOTTY;
}
@@ -3510,11 +3851,17 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ int ret;
if (!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
- return generic_file_read_iter(iocb, iter);
+ ret = generic_file_read_iter(iocb, iter);
+
+ if (ret > 0)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret);
+
+ return ret;
}
static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
@@ -3662,6 +4009,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GET_VOLUME_NAME:
case F2FS_IOC_SET_VOLUME_NAME:
case F2FS_IOC_GET_COMPRESS_BLOCKS:
+ case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
+ case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 26248c8936db..5b95d5a146eb 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -13,6 +13,7 @@
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/freezer.h>
+#include <linux/sched/signal.h>
#include "f2fs.h"
#include "node.h"
@@ -737,6 +738,10 @@ got_it:
goto put_encrypted_page;
f2fs_put_page(fio.encrypted_page, 0);
f2fs_put_page(page, 1);
+
+ f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE);
+
return 0;
put_encrypted_page:
f2fs_put_page(fio.encrypted_page, 1);
@@ -840,6 +845,10 @@ static int move_data_block(struct inode *inode, block_t bidx,
f2fs_put_page(mpage, 1);
goto up_out;
}
+
+ f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio.sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE);
+
lock_page(mpage);
if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) ||
!PageUptodate(mpage))) {
@@ -1399,12 +1408,29 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
-static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
- unsigned int end)
+static int free_segment_range(struct f2fs_sb_info *sbi,
+ unsigned int secs, bool gc_only)
{
- int type;
- unsigned int segno, next_inuse;
+ unsigned int segno, next_inuse, start, end;
+ struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
+ int gc_mode, gc_type;
int err = 0;
+ int type;
+
+ /* Force block allocation for GC */
+ MAIN_SECS(sbi) -= secs;
+ start = MAIN_SECS(sbi) * sbi->segs_per_sec;
+ end = MAIN_SEGS(sbi) - 1;
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
+ if (SIT_I(sbi)->last_victim[gc_mode] >= start)
+ SIT_I(sbi)->last_victim[gc_mode] = 0;
+
+ for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
+ if (sbi->next_victim_seg[gc_type] >= start)
+ sbi->next_victim_seg[gc_type] = NULL_SEGNO;
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
/* Move out cursegs from the target range */
for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
@@ -1417,18 +1443,24 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
- down_write(&sbi->gc_lock);
do_garbage_collect(sbi, segno, &gc_list, FG_GC);
- up_write(&sbi->gc_lock);
put_gc_inode(&gc_list);
- if (get_valid_blocks(sbi, segno, true))
- return -EAGAIN;
+ if (!gc_only && get_valid_blocks(sbi, segno, true)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto out;
+ }
}
+ if (gc_only)
+ goto out;
- err = f2fs_sync_fs(sbi->sb, 1);
+ err = f2fs_write_checkpoint(sbi, &cpc);
if (err)
- return err;
+ goto out;
next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start);
if (next_inuse <= end) {
@@ -1436,6 +1468,8 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
next_inuse);
f2fs_bug_on(sbi, 1);
}
+out:
+ MAIN_SECS(sbi) += secs;
return err;
}
@@ -1481,6 +1515,7 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
+ MAIN_SECS(sbi) += secs;
FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks);
@@ -1502,8 +1537,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
{
__u64 old_block_count, shrunk_blocks;
+ struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
unsigned int secs;
- int gc_mode, gc_type;
int err = 0;
__u32 rem;
@@ -1538,10 +1573,27 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
return -EINVAL;
}
- freeze_bdev(sbi->sb->s_bdev);
-
shrunk_blocks = old_block_count - block_count;
secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
+
+ /* stop other GC */
+ if (!down_write_trylock(&sbi->gc_lock))
+ return -EAGAIN;
+
+ /* stop CP to protect MAIN_SEC in free_segment_range */
+ f2fs_lock_op(sbi);
+ err = free_segment_range(sbi, secs, true);
+ f2fs_unlock_op(sbi);
+ up_write(&sbi->gc_lock);
+ if (err)
+ return err;
+
+ set_sbi_flag(sbi, SBI_IS_RESIZEFS);
+
+ freeze_super(sbi->sb);
+ down_write(&sbi->gc_lock);
+ mutex_lock(&sbi->cp_mutex);
+
spin_lock(&sbi->stat_lock);
if (shrunk_blocks + valid_user_blocks(sbi) +
sbi->current_reserved_blocks + sbi->unusable_block_count +
@@ -1550,69 +1602,44 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
else
sbi->user_block_count -= shrunk_blocks;
spin_unlock(&sbi->stat_lock);
- if (err) {
- thaw_bdev(sbi->sb->s_bdev, sbi->sb);
- return err;
- }
-
- mutex_lock(&sbi->resize_mutex);
- set_sbi_flag(sbi, SBI_IS_RESIZEFS);
-
- mutex_lock(&DIRTY_I(sbi)->seglist_lock);
-
- MAIN_SECS(sbi) -= secs;
-
- for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
- if (SIT_I(sbi)->last_victim[gc_mode] >=
- MAIN_SECS(sbi) * sbi->segs_per_sec)
- SIT_I(sbi)->last_victim[gc_mode] = 0;
-
- for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
- if (sbi->next_victim_seg[gc_type] >=
- MAIN_SECS(sbi) * sbi->segs_per_sec)
- sbi->next_victim_seg[gc_type] = NULL_SEGNO;
-
- mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+ if (err)
+ goto out_err;
- err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec,
- MAIN_SEGS(sbi) - 1);
+ err = free_segment_range(sbi, secs, false);
if (err)
- goto out;
+ goto recover_out;
update_sb_metadata(sbi, -secs);
err = f2fs_commit_super(sbi, false);
if (err) {
update_sb_metadata(sbi, secs);
- goto out;
+ goto recover_out;
}
- mutex_lock(&sbi->cp_mutex);
update_fs_metadata(sbi, -secs);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
set_sbi_flag(sbi, SBI_IS_DIRTY);
- mutex_unlock(&sbi->cp_mutex);
- err = f2fs_sync_fs(sbi->sb, 1);
+ err = f2fs_write_checkpoint(sbi, &cpc);
if (err) {
- mutex_lock(&sbi->cp_mutex);
update_fs_metadata(sbi, secs);
- mutex_unlock(&sbi->cp_mutex);
update_sb_metadata(sbi, secs);
f2fs_commit_super(sbi, false);
}
-out:
+recover_out:
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
- MAIN_SECS(sbi) += secs;
spin_lock(&sbi->stat_lock);
sbi->user_block_count += shrunk_blocks;
spin_unlock(&sbi->stat_lock);
}
+out_err:
+ mutex_unlock(&sbi->cp_mutex);
+ up_write(&sbi->gc_lock);
+ thaw_super(sbi->sb);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
- mutex_unlock(&sbi->resize_mutex);
- thaw_bdev(sbi->sb->s_bdev, sbi->sb);
return err;
}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index bbac9d3787bd..db3c61046aa4 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/gc.h
*
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 8c4ea5003ef8..de841aaf3c43 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -67,22 +67,9 @@ static void str2hashbuf(const unsigned char *msg, size_t len,
*buf++ = pad;
}
-static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
- struct fscrypt_name *fname)
+static u32 TEA_hash_name(const u8 *p, size_t len)
{
- __u32 hash;
- f2fs_hash_t f2fs_hash;
- const unsigned char *p;
__u32 in[8], buf[4];
- const unsigned char *name = name_info->name;
- size_t len = name_info->len;
-
- /* encrypted bigname case */
- if (fname && !fname->disk_name.name)
- return cpu_to_le32(fname->hash);
-
- if (is_dot_dotdot(name_info))
- return 0;
/* Initialize the default seed for the hash checksum functions */
buf[0] = 0x67452301;
@@ -90,7 +77,6 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
buf[2] = 0x98badcfe;
buf[3] = 0x10325476;
- p = name;
while (1) {
str2hashbuf(p, len, in, 4);
TEA_transform(buf, in);
@@ -99,41 +85,43 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
break;
len -= 16;
}
- hash = buf[0];
- f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
- return f2fs_hash;
+ return buf[0] & ~F2FS_HASH_COL_BIT;
}
-f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
- const struct qstr *name_info, struct fscrypt_name *fname)
+/*
+ * Compute @fname->hash. For all directories, @fname->disk_name must be set.
+ * For casefolded directories, @fname->usr_fname must be set, and also
+ * @fname->cf_name if the filename is valid Unicode.
+ */
+void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
{
-#ifdef CONFIG_UNICODE
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
- const struct unicode_map *um = sbi->s_encoding;
- int r, dlen;
- unsigned char *buff;
- struct qstr folded;
+ const u8 *name = fname->disk_name.name;
+ size_t len = fname->disk_name.len;
- if (!name_info->len || !IS_CASEFOLDED(dir))
- goto opaque_seq;
+ WARN_ON_ONCE(!name);
- buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
-
- dlen = utf8_casefold(um, name_info, buff, PATH_MAX);
- if (dlen < 0) {
- kvfree(buff);
- goto opaque_seq;
+ if (is_dot_dotdot(name, len)) {
+ fname->hash = 0;
+ return;
}
- folded.name = buff;
- folded.len = dlen;
- r = __f2fs_dentry_hash(&folded, fname);
-
- kvfree(buff);
- return r;
-opaque_seq:
+#ifdef CONFIG_UNICODE
+ if (IS_CASEFOLDED(dir)) {
+ /*
+ * If the casefolded name is provided, hash it instead of the
+ * on-disk name. If the casefolded name is *not* provided, that
+ * should only be because the name wasn't valid Unicode, so fall
+ * back to treating the name as an opaque byte sequence.
+ */
+ WARN_ON_ONCE(!fname->usr_fname->name);
+ if (fname->cf_name.name) {
+ name = fname->cf_name.name;
+ len = fname->cf_name.len;
+ } else {
+ name = fname->usr_fname->name;
+ len = fname->usr_fname->len;
+ }
+ }
#endif
- return __f2fs_dentry_hash(name_info, fname);
+ fname->hash = cpu_to_le32(TEA_hash_name(name, len));
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 9686ffea177e..dbade310dc79 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -306,15 +306,14 @@ process_inline:
}
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
- struct fscrypt_name *fname, struct page **res_page)
+ const struct f2fs_filename *fname,
+ struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
- struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
struct page *ipage;
void *inline_dentry;
- f2fs_hash_t namehash;
ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage)) {
@@ -322,12 +321,10 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
return NULL;
}
- namehash = f2fs_dentry_hash(dir, &name, fname);
-
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
- de = f2fs_find_target_dentry(fname, namehash, NULL, &d);
+ de = f2fs_find_target_dentry(&d, fname, NULL);
unlock_page(ipage);
if (de)
*res_page = ipage;
@@ -444,7 +441,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
while (bit_pos < d.max) {
struct f2fs_dir_entry *de;
- struct qstr new_name;
+ struct f2fs_filename fname;
nid_t ino;
umode_t fake_mode;
@@ -460,14 +457,19 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
continue;
}
- new_name.name = d.filename[bit_pos];
- new_name.len = le16_to_cpu(de->name_len);
+ /*
+ * We only need the disk_name and hash to move the dentry.
+ * We don't need the original or casefolded filenames.
+ */
+ memset(&fname, 0, sizeof(fname));
+ fname.disk_name.name = d.filename[bit_pos];
+ fname.disk_name.len = le16_to_cpu(de->name_len);
+ fname.hash = de->hash_code;
ino = le32_to_cpu(de->ino);
fake_mode = f2fs_get_de_type(de) << S_SHIFT;
- err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL,
- ino, fake_mode);
+ err = f2fs_add_regular_entry(dir, &fname, NULL, ino, fake_mode);
if (err)
goto punch_dentry_pages;
@@ -544,7 +546,7 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
- struct fscrypt_name fname;
+ struct f2fs_filename fname;
void *inline_dentry = NULL;
int err = 0;
@@ -553,19 +555,19 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry)
f2fs_lock_op(sbi);
- err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname);
+ err = f2fs_setup_filename(dir, &dentry->d_name, 0, &fname);
if (err)
goto out;
ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
- goto out;
+ goto out_fname;
}
if (f2fs_has_enough_room(dir, ipage, &fname)) {
f2fs_put_page(ipage, 1);
- goto out;
+ goto out_fname;
}
inline_dentry = inline_data_addr(dir, ipage);
@@ -573,22 +575,22 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry)
err = do_convert_inline_dir(dir, ipage, inline_dentry);
if (!err)
f2fs_put_page(ipage, 1);
+out_fname:
+ f2fs_free_filename(&fname);
out:
f2fs_unlock_op(sbi);
return err;
}
-int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
- const struct qstr *orig_name,
- struct inode *inode, nid_t ino, umode_t mode)
+int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
+ struct inode *inode, nid_t ino, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos;
- f2fs_hash_t name_hash;
void *inline_dentry = NULL;
struct f2fs_dentry_ptr d;
- int slots = GET_DENTRY_SLOTS(new_name->len);
+ int slots = GET_DENTRY_SLOTS(fname->disk_name.len);
struct page *page = NULL;
int err = 0;
@@ -610,8 +612,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
- page = f2fs_init_inode_metadata(inode, dir, new_name,
- orig_name, ipage);
+ page = f2fs_init_inode_metadata(inode, dir, fname, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -620,8 +621,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
f2fs_wait_on_page_writeback(ipage, NODE, true, true);
- name_hash = f2fs_dentry_hash(dir, new_name, NULL);
- f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
+ f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash,
+ bit_pos);
set_page_dirty(ipage);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index f54119da2217..e94e02c6580a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -482,7 +482,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
nid_t ino = -1;
int err = 0;
unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir));
- struct fscrypt_name fname;
+ struct f2fs_filename fname;
trace_f2fs_lookup_start(dir, dentry, flags);
@@ -491,19 +491,20 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
- err = fscrypt_prepare_lookup(dir, dentry, &fname);
+ err = f2fs_prepare_lookup(dir, dentry, &fname);
if (err == -ENOENT)
goto out_splice;
if (err)
goto out;
de = __f2fs_find_entry(dir, &fname, &page);
- fscrypt_free_filename(&fname);
+ f2fs_free_filename(&fname);
if (!de) {
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out;
}
+ err = -ENOENT;
goto out_splice;
}
@@ -549,7 +550,7 @@ out_splice:
#endif
new = d_splice_alias(inode, dentry);
err = PTR_ERR_OR_ZERO(new);
- trace_f2fs_lookup_end(dir, dentry, ino, err);
+ trace_f2fs_lookup_end(dir, dentry, ino, !new ? -ENOENT : err);
return new;
out_iput:
iput(inode);
@@ -564,7 +565,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = d_inode(dentry);
struct f2fs_dir_entry *de;
struct page *page;
- int err = -ENOENT;
+ int err;
trace_f2fs_unlink_enter(dir, dentry);
@@ -1287,9 +1288,7 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.get_link = f2fs_encrypted_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
};
const struct inode_operations f2fs_dir_inode_operations = {
@@ -1307,9 +1306,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
.set_acl = f2fs_set_acl,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
.fiemap = f2fs_fiemap,
};
@@ -1317,9 +1314,7 @@ const struct inode_operations f2fs_symlink_inode_operations = {
.get_link = f2fs_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
};
const struct inode_operations f2fs_special_inode_operations = {
@@ -1327,7 +1322,5 @@ const struct inode_operations f2fs_special_inode_operations = {
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
.set_acl = f2fs_set_acl,
-#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
-#endif
};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index ecbd6bd14a49..03e24df1c84f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1300,7 +1300,13 @@ static int read_node_page(struct page *page, int op_flags)
}
fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
- return f2fs_submit_page_bio(&fio);
+
+ err = f2fs_submit_page_bio(&fio);
+
+ if (!err)
+ f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE);
+
+ return err;
}
/*
@@ -1514,8 +1520,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
trace_f2fs_writepage(page, NODE);
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) {
+ ClearPageUptodate(page);
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ unlock_page(page);
+ return 0;
+ }
goto redirty_out;
+ }
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
@@ -1801,6 +1814,53 @@ static bool flush_dirty_inode(struct page *page)
return true;
}
+int f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
+{
+ pgoff_t index = 0;
+ struct pagevec pvec;
+ int nr_pages;
+ int ret = 0;
+
+ pagevec_init(&pvec);
+
+ while ((nr_pages = pagevec_lookup_tag(&pvec,
+ NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) {
+ int i;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ if (!IS_DNODE(page))
+ continue;
+
+ lock_page(page);
+
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+continue_unlock:
+ unlock_page(page);
+ continue;
+ }
+
+ if (!PageDirty(page)) {
+ /* someone wrote it for us */
+ goto continue_unlock;
+ }
+
+ /* flush inline_data, if it's async context. */
+ if (is_inline_node(page)) {
+ clear_inline_node(page);
+ unlock_page(page);
+ flush_inline_data(sbi, ino_of_node(page));
+ continue;
+ }
+ unlock_page(page);
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+ return ret;
+}
+
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type)
@@ -1864,8 +1924,8 @@ continue_unlock:
goto continue_unlock;
}
- /* flush inline_data */
- if (is_inline_node(page)) {
+ /* flush inline_data, if it's async context. */
+ if (do_balance && is_inline_node(page)) {
clear_inline_node(page);
unlock_page(page);
flush_inline_data(sbi, ino_of_node(page));
@@ -2482,7 +2542,6 @@ void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct free_nid *i, *next;
int nr = nr_shrink;
if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
@@ -2491,17 +2550,23 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
if (!mutex_trylock(&nm_i->build_lock))
return 0;
- spin_lock(&nm_i->nid_list_lock);
- list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
- if (nr_shrink <= 0 ||
- nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
- break;
+ while (nr_shrink && nm_i->nid_cnt[FREE_NID] > MAX_FREE_NIDS) {
+ struct free_nid *i, *next;
+ unsigned int batch = SHRINK_NID_BATCH_SIZE;
- __remove_free_nid(sbi, i, FREE_NID);
- kmem_cache_free(free_nid_slab, i);
- nr_shrink--;
+ spin_lock(&nm_i->nid_list_lock);
+ list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
+ if (!nr_shrink || !batch ||
+ nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
+ break;
+ __remove_free_nid(sbi, i, FREE_NID);
+ kmem_cache_free(free_nid_slab, i);
+ nr_shrink--;
+ batch--;
+ }
+ spin_unlock(&nm_i->nid_list_lock);
}
- spin_unlock(&nm_i->nid_list_lock);
+
mutex_unlock(&nm_i->build_lock);
return nr - nr_shrink;
@@ -2928,7 +2993,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
return 0;
nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
- nm_i->nat_bits = f2fs_kzalloc(sbi,
+ nm_i->nat_bits = f2fs_kvzalloc(sbi,
nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
if (!nm_i->nat_bits)
return -ENOMEM;
@@ -3061,9 +3126,9 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
int i;
nm_i->free_nid_bitmap =
- f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *),
- nm_i->nat_blocks),
- GFP_KERNEL);
+ f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *),
+ nm_i->nat_blocks),
+ GFP_KERNEL);
if (!nm_i->free_nid_bitmap)
return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e05af5df5648..69e5859e993c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/node.h
*
@@ -15,6 +15,9 @@
#define FREE_NID_PAGES 8
#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
+/* size of free nid batch when shrinking */
+#define SHRINK_NID_BATCH_SIZE 8
+
#define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */
/* maximum readahead size for node during getting data blocks */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index dd804c07eeb0..ae5310f02e7f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -107,13 +107,51 @@ static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
kmem_cache_free(fsync_entry_slab, entry);
}
+static int init_recovered_filename(const struct inode *dir,
+ struct f2fs_inode *raw_inode,
+ struct f2fs_filename *fname,
+ struct qstr *usr_fname)
+{
+ int err;
+
+ memset(fname, 0, sizeof(*fname));
+ fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen);
+ fname->disk_name.name = raw_inode->i_name;
+
+ if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN))
+ return -ENAMETOOLONG;
+
+ if (!IS_ENCRYPTED(dir)) {
+ usr_fname->name = fname->disk_name.name;
+ usr_fname->len = fname->disk_name.len;
+ fname->usr_fname = usr_fname;
+ }
+
+ /* Compute the hash of the filename */
+ if (IS_CASEFOLDED(dir)) {
+ err = f2fs_init_casefolded_name(dir, fname);
+ if (err)
+ return err;
+ f2fs_hash_filename(dir, fname);
+#ifdef CONFIG_UNICODE
+ /* Case-sensitive match is fine for recovery */
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
+ } else {
+ f2fs_hash_filename(dir, fname);
+ }
+ return 0;
+}
+
static int recover_dentry(struct inode *inode, struct page *ipage,
struct list_head *dir_list)
{
struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
- struct fscrypt_name fname;
+ struct f2fs_filename fname;
+ struct qstr usr_fname;
struct page *page;
struct inode *dir, *einode;
struct fsync_inode_entry *entry;
@@ -132,16 +170,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
}
dir = entry->inode;
-
- memset(&fname, 0, sizeof(struct fscrypt_name));
- fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
- fname.disk_name.name = raw_inode->i_name;
-
- if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
- WARN_ON(1);
- err = -ENAMETOOLONG;
+ err = init_recovered_filename(dir, raw_inode, &fname, &usr_fname);
+ if (err)
goto out;
- }
retry:
de = __f2fs_find_entry(dir, &fname, &page);
if (de && inode->i_ino == le32_to_cpu(de->ino))
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index b7a9421472a7..196f31503511 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1029,9 +1029,9 @@ static void f2fs_submit_discard_endio(struct bio *bio)
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
unsigned long flags;
- dc->error = blk_status_to_errno(bio->bi_status);
-
spin_lock_irqsave(&dc->lock, flags);
+ if (!dc->error)
+ dc->error = blk_status_to_errno(bio->bi_status);
dc->bio_ref--;
if (!dc->bio_ref && dc->state == D_SUBMIT) {
dc->state = D_DONE;
@@ -1101,7 +1101,6 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
- dpolicy->max_requests = UINT_MAX;
dpolicy->io_aware = false;
/* we need to issue all to keep CP_TRIMMED_FLAG */
dpolicy->granularity = 1;
@@ -1215,12 +1214,14 @@ submit:
len = total_len;
}
- if (!err && len)
+ if (!err && len) {
+ dcc->undiscard_blks -= len;
__update_discard_tree_range(sbi, bdev, lstart, start, len);
+ }
return err;
}
-static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
+static void __insert_discard_tree(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len,
struct rb_node **insert_p,
@@ -1229,7 +1230,6 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct rb_node **p;
struct rb_node *parent = NULL;
- struct discard_cmd *dc = NULL;
bool leftmost = true;
if (insert_p && insert_parent) {
@@ -1241,12 +1241,8 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
lstart, &leftmost);
do_insert:
- dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
+ __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
p, leftmost);
- if (!dc)
- return NULL;
-
- return dc;
}
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
@@ -1463,6 +1459,8 @@ next:
return issued;
}
+static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy);
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
@@ -1471,12 +1469,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int i, issued = 0;
+ int i, issued;
bool io_interrupted = false;
if (dpolicy->timeout)
f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
+retry:
+ issued = 0;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (dpolicy->timeout &&
f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
@@ -1523,6 +1523,11 @@ next:
break;
}
+ if (dpolicy->type == DPOLICY_UMOUNT && issued) {
+ __wait_all_discard_cmd(sbi, dpolicy);
+ goto retry;
+ }
+
if (!issued && io_interrupted)
issued = -1;
@@ -3102,6 +3107,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
type = CURSEG_COLD_DATA;
}
+ /*
+ * We need to wait for node_write to avoid block allocation during
+ * checkpoint. This can only happen to quota writes which can cause
+ * the below discard race condition.
+ */
+ if (IS_DATASEG(type))
+ down_write(&sbi->node_write);
+
down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
@@ -3167,6 +3180,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
up_read(&SM_I(sbi)->curseg_lock);
+ if (IS_DATASEG(type))
+ up_write(&sbi->node_write);
+
if (put_pin_sem)
up_read(&sbi->pin_sem);
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7a83bd530812..cba16cca5189 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/segment.h
*
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8a9955902d84..20e56b0fa46a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -285,6 +285,22 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).s_resgid));
}
+static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
+{
+ if (!F2FS_OPTION(sbi).unusable_cap_perc)
+ return;
+
+ if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
+ F2FS_OPTION(sbi).unusable_cap = sbi->user_block_count;
+ else
+ F2FS_OPTION(sbi).unusable_cap = (sbi->user_block_count / 100) *
+ F2FS_OPTION(sbi).unusable_cap_perc;
+
+ f2fs_info(sbi, "Adjust unusable cap for checkpoint=disable = %u / %u%%",
+ F2FS_OPTION(sbi).unusable_cap,
+ F2FS_OPTION(sbi).unusable_cap_perc);
+}
+
static void init_once(void *foo)
{
struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
@@ -471,11 +487,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (!name)
return -ENOMEM;
- if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
+ if (!strcmp(name, "on")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
- } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
+ } else if (!strcmp(name, "off")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF;
- } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
+ } else if (!strcmp(name, "sync")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC;
} else {
kvfree(name);
@@ -635,16 +651,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (!name)
return -ENOMEM;
- if (strlen(name) == 8 &&
- !strncmp(name, "adaptive", 8)) {
+ if (!strcmp(name, "adaptive")) {
if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature");
kvfree(name);
return -EINVAL;
}
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
- } else if (strlen(name) == 3 &&
- !strncmp(name, "lfs", 3)) {
+ } else if (!strcmp(name, "lfs")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
} else {
kvfree(name);
@@ -769,14 +783,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
- if (strlen(name) == 10 &&
- !strncmp(name, "user-based", 10)) {
+ if (!strcmp(name, "user-based")) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER;
- } else if (strlen(name) == 3 &&
- !strncmp(name, "off", 3)) {
+ } else if (!strcmp(name, "off")) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
- } else if (strlen(name) == 8 &&
- !strncmp(name, "fs-based", 8)) {
+ } else if (!strcmp(name, "fs-based")) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
} else {
kvfree(name);
@@ -789,11 +800,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (!name)
return -ENOMEM;
- if (strlen(name) == 7 &&
- !strncmp(name, "default", 7)) {
+ if (!strcmp(name, "default")) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
- } else if (strlen(name) == 5 &&
- !strncmp(name, "reuse", 5)) {
+ } else if (!strcmp(name, "reuse")) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
} else {
kvfree(name);
@@ -805,14 +814,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
- if (strlen(name) == 5 &&
- !strncmp(name, "posix", 5)) {
+ if (!strcmp(name, "posix")) {
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
- } else if (strlen(name) == 6 &&
- !strncmp(name, "strict", 6)) {
+ } else if (!strcmp(name, "strict")) {
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
- } else if (strlen(name) == 9 &&
- !strncmp(name, "nobarrier", 9)) {
+ } else if (!strcmp(name, "nobarrier")) {
F2FS_OPTION(sbi).fsync_mode =
FSYNC_MODE_NOBARRIER;
} else {
@@ -832,12 +838,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
if (arg < 0 || arg > 100)
return -EINVAL;
- if (arg == 100)
- F2FS_OPTION(sbi).unusable_cap =
- sbi->user_block_count;
- else
- F2FS_OPTION(sbi).unusable_cap =
- (sbi->user_block_count / 100) * arg;
+ F2FS_OPTION(sbi).unusable_cap_perc = arg;
set_opt(sbi, DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_disable_cap:
@@ -860,17 +861,18 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
- if (strlen(name) == 3 && !strcmp(name, "lzo")) {
+ if (!strcmp(name, "lzo")) {
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_LZO;
- } else if (strlen(name) == 3 &&
- !strcmp(name, "lz4")) {
+ } else if (!strcmp(name, "lz4")) {
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_LZ4;
- } else if (strlen(name) == 4 &&
- !strcmp(name, "zstd")) {
+ } else if (!strcmp(name, "zstd")) {
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_ZSTD;
+ } else if (!strcmp(name, "lzo-rle")) {
+ F2FS_OPTION(sbi).compress_algorithm =
+ COMPRESS_LZORLE;
} else {
kfree(name);
return -EINVAL;
@@ -1330,7 +1332,8 @@ static int f2fs_statfs_project(struct super_block *sb,
limit >>= sb->s_blocksize_bits;
if (limit && buf->f_blocks > limit) {
- curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+ curblock = (dquot->dq_dqb.dqb_curspace +
+ dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
buf->f_blocks = limit;
buf->f_bfree = buf->f_bavail =
(buf->f_blocks > curblock) ?
@@ -1465,6 +1468,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq,
case COMPRESS_ZSTD:
algtype = "zstd";
break;
+ case COMPRESS_LZORLE:
+ algtype = "lzo-rle";
+ break;
}
seq_printf(seq, ",compress_algorithm=%s", algtype);
@@ -1880,6 +1886,7 @@ skip:
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
limit_reserve_root(sbi);
+ adjust_unusable_cap_perc(sbi);
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
restore_gc:
@@ -3062,7 +3069,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
- FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
+ FDEV(devi).blkz_seq = f2fs_kvzalloc(sbi,
BITS_TO_LONGS(FDEV(devi).nr_blkz)
* sizeof(unsigned long),
GFP_KERNEL);
@@ -3449,7 +3456,6 @@ try_onemore:
init_rwsem(&sbi->gc_lock);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
- mutex_init(&sbi->resize_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
@@ -3460,6 +3466,7 @@ try_onemore:
/* init iostat info */
spin_lock_init(&sbi->iostat_lock);
sbi->iostat_enable = false;
+ sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
for (i = 0; i < NR_PAGE_TYPE; i++) {
int n = (i == META) ? 1: NR_TEMP_TYPE;
@@ -3557,6 +3564,7 @@ try_onemore:
sbi->reserved_blocks = 0;
sbi->current_reserved_blocks = 0;
limit_reserve_root(sbi);
+ adjust_unusable_cap_perc(sbi);
for (i = 0; i < NR_INODE_TYPE; i++) {
INIT_LIST_HEAD(&sbi->inode_list[i]);
@@ -3927,7 +3935,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_bioset();
if (err)
goto free_bio_enrty_cache;
+ err = f2fs_init_compress_mempool();
+ if (err)
+ goto free_bioset;
return 0;
+free_bioset:
+ f2fs_destroy_bioset();
free_bio_enrty_cache:
f2fs_destroy_bio_entry_cache();
free_post_read:
@@ -3955,6 +3968,7 @@ fail:
static void __exit exit_f2fs_fs(void)
{
+ f2fs_destroy_compress_mempool();
f2fs_destroy_bioset();
f2fs_destroy_bio_entry_cache();
f2fs_destroy_post_read_processing();
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 3162f46b3c9b..e877c59b9fdb 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -15,6 +15,7 @@
#include "f2fs.h"
#include "segment.h"
#include "gc.h"
+#include <trace/events/f2fs.h>
static struct proc_dir_entry *f2fs_proc_root;
@@ -372,7 +373,6 @@ out:
return count;
}
-
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
if (!sbi->iostat_enable)
@@ -380,6 +380,15 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "iostat_period_ms")) {
+ if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS)
+ return -EINVAL;
+ spin_lock(&sbi->iostat_lock);
+ sbi->iostat_period_ms = (unsigned int)t;
+ spin_unlock(&sbi->iostat_lock);
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
@@ -538,6 +547,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
@@ -545,6 +555,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
#endif
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(free_segments);
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
@@ -618,6 +630,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_idle_interval),
ATTR_LIST(umount_discard_timeout),
ATTR_LIST(iostat_enable),
+ ATTR_LIST(iostat_period_ms),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
ATTR_LIST(extension_list),
@@ -625,6 +638,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(inject_rate),
ATTR_LIST(inject_type),
#endif
+ ATTR_LIST(data_io_flag),
+ ATTR_LIST(node_io_flag),
ATTR_LIST(dirty_segments),
ATTR_LIST(free_segments),
ATTR_LIST(unusable),
@@ -754,6 +769,33 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
return 0;
}
+void f2fs_record_iostat(struct f2fs_sb_info *sbi)
+{
+ unsigned long long iostat_diff[NR_IO_TYPE];
+ int i;
+
+ if (time_is_after_jiffies(sbi->iostat_next_period))
+ return;
+
+ /* Need double check under the lock */
+ spin_lock(&sbi->iostat_lock);
+ if (time_is_after_jiffies(sbi->iostat_next_period)) {
+ spin_unlock(&sbi->iostat_lock);
+ return;
+ }
+ sbi->iostat_next_period = jiffies +
+ msecs_to_jiffies(sbi->iostat_period_ms);
+
+ for (i = 0; i < NR_IO_TYPE; i++) {
+ iostat_diff[i] = sbi->rw_iostat[i] -
+ sbi->prev_rw_iostat[i];
+ sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
+ }
+ spin_unlock(&sbi->iostat_lock);
+
+ trace_f2fs_iostat(sbi, iostat_diff);
+}
+
static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
void *offset)
{
@@ -766,33 +808,58 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
seq_printf(seq, "time: %-16llu\n", now);
- /* print app IOs */
+ /* print app write IOs */
+ seq_puts(seq, "[WRITE]\n");
seq_printf(seq, "app buffered: %-16llu\n",
- sbi->write_iostat[APP_BUFFERED_IO]);
+ sbi->rw_iostat[APP_BUFFERED_IO]);
seq_printf(seq, "app direct: %-16llu\n",
- sbi->write_iostat[APP_DIRECT_IO]);
+ sbi->rw_iostat[APP_DIRECT_IO]);
seq_printf(seq, "app mapped: %-16llu\n",
- sbi->write_iostat[APP_MAPPED_IO]);
+ sbi->rw_iostat[APP_MAPPED_IO]);
- /* print fs IOs */
+ /* print fs write IOs */
seq_printf(seq, "fs data: %-16llu\n",
- sbi->write_iostat[FS_DATA_IO]);
+ sbi->rw_iostat[FS_DATA_IO]);
seq_printf(seq, "fs node: %-16llu\n",
- sbi->write_iostat[FS_NODE_IO]);
+ sbi->rw_iostat[FS_NODE_IO]);
seq_printf(seq, "fs meta: %-16llu\n",
- sbi->write_iostat[FS_META_IO]);
+ sbi->rw_iostat[FS_META_IO]);
seq_printf(seq, "fs gc data: %-16llu\n",
- sbi->write_iostat[FS_GC_DATA_IO]);
+ sbi->rw_iostat[FS_GC_DATA_IO]);
seq_printf(seq, "fs gc node: %-16llu\n",
- sbi->write_iostat[FS_GC_NODE_IO]);
+ sbi->rw_iostat[FS_GC_NODE_IO]);
seq_printf(seq, "fs cp data: %-16llu\n",
- sbi->write_iostat[FS_CP_DATA_IO]);
+ sbi->rw_iostat[FS_CP_DATA_IO]);
seq_printf(seq, "fs cp node: %-16llu\n",
- sbi->write_iostat[FS_CP_NODE_IO]);
+ sbi->rw_iostat[FS_CP_NODE_IO]);
seq_printf(seq, "fs cp meta: %-16llu\n",
- sbi->write_iostat[FS_CP_META_IO]);
+ sbi->rw_iostat[FS_CP_META_IO]);
+
+ /* print app read IOs */
+ seq_puts(seq, "[READ]\n");
+ seq_printf(seq, "app buffered: %-16llu\n",
+ sbi->rw_iostat[APP_BUFFERED_READ_IO]);
+ seq_printf(seq, "app direct: %-16llu\n",
+ sbi->rw_iostat[APP_DIRECT_READ_IO]);
+ seq_printf(seq, "app mapped: %-16llu\n",
+ sbi->rw_iostat[APP_MAPPED_READ_IO]);
+
+ /* print fs read IOs */
+ seq_printf(seq, "fs data: %-16llu\n",
+ sbi->rw_iostat[FS_DATA_READ_IO]);
+ seq_printf(seq, "fs gc data: %-16llu\n",
+ sbi->rw_iostat[FS_GDATA_READ_IO]);
+ seq_printf(seq, "fs compr_data: %-16llu\n",
+ sbi->rw_iostat[FS_CDATA_READ_IO]);
+ seq_printf(seq, "fs node: %-16llu\n",
+ sbi->rw_iostat[FS_NODE_READ_IO]);
+ seq_printf(seq, "fs meta: %-16llu\n",
+ sbi->rw_iostat[FS_META_READ_IO]);
+
+ /* print other IOs */
+ seq_puts(seq, "[OTHER]\n");
seq_printf(seq, "fs discard: %-16llu\n",
- sbi->write_iostat[FS_DISCARD]);
+ sbi->rw_iostat[FS_DISCARD]);
return 0;
}
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index e8075fc5b228..789f6aa727fc 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* f2fs IO tracer
*
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 938fcd20565d..416d652774a3 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/xattr.h
*
@@ -136,6 +136,7 @@ extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *);
#else
#define f2fs_xattr_handlers NULL
+#define f2fs_listxattr NULL
static inline int f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size,
struct page *page, int flags)
@@ -148,11 +149,6 @@ static inline int f2fs_getxattr(struct inode *inode, int index,
{
return -EOPNOTSUPP;
}
-static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
- size_t buffer_size)
-{
- return -EOPNOTSUPP;
-}
static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { }
#endif
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 24c2557c37f0..8639ab962a71 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -50,6 +50,7 @@ TRACE_DEFINE_ENUM(CP_RECOVERY);
TRACE_DEFINE_ENUM(CP_DISCARD);
TRACE_DEFINE_ENUM(CP_TRIMMED);
TRACE_DEFINE_ENUM(CP_PAUSE);
+TRACE_DEFINE_ENUM(CP_RESIZE);
#define show_block_type(type) \
__print_symbolic(type, \
@@ -126,7 +127,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE);
{ CP_RECOVERY, "Recovery" }, \
{ CP_DISCARD, "Discard" }, \
{ CP_PAUSE, "Pause" }, \
- { CP_TRIMMED, "Trimmed" })
+ { CP_TRIMMED, "Trimmed" }, \
+ { CP_RESIZE, "Resize" })
#define show_fsync_cpreason(type) \
__print_symbolic(type, \
@@ -154,7 +156,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE);
__print_symbolic(type, \
{ COMPRESS_LZO, "LZO" }, \
{ COMPRESS_LZ4, "LZ4" }, \
- { COMPRESS_ZSTD, "ZSTD" })
+ { COMPRESS_ZSTD, "ZSTD" }, \
+ { COMPRESS_LZORLE, "LZO-RLE" })
struct f2fs_sb_info;
struct f2fs_io_info;
@@ -1812,6 +1815,82 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end,
TP_ARGS(inode, cluster_idx, compressed_size, ret)
);
+TRACE_EVENT(f2fs_iostat,
+
+ TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat),
+
+ TP_ARGS(sbi, iostat),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long long, app_dio)
+ __field(unsigned long long, app_bio)
+ __field(unsigned long long, app_wio)
+ __field(unsigned long long, app_mio)
+ __field(unsigned long long, fs_dio)
+ __field(unsigned long long, fs_nio)
+ __field(unsigned long long, fs_mio)
+ __field(unsigned long long, fs_gc_dio)
+ __field(unsigned long long, fs_gc_nio)
+ __field(unsigned long long, fs_cp_dio)
+ __field(unsigned long long, fs_cp_nio)
+ __field(unsigned long long, fs_cp_mio)
+ __field(unsigned long long, app_drio)
+ __field(unsigned long long, app_brio)
+ __field(unsigned long long, app_rio)
+ __field(unsigned long long, app_mrio)
+ __field(unsigned long long, fs_drio)
+ __field(unsigned long long, fs_gdrio)
+ __field(unsigned long long, fs_cdrio)
+ __field(unsigned long long, fs_nrio)
+ __field(unsigned long long, fs_mrio)
+ __field(unsigned long long, fs_discard)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sbi->sb->s_dev;
+ __entry->app_dio = iostat[APP_DIRECT_IO];
+ __entry->app_bio = iostat[APP_BUFFERED_IO];
+ __entry->app_wio = iostat[APP_WRITE_IO];
+ __entry->app_mio = iostat[APP_MAPPED_IO];
+ __entry->fs_dio = iostat[FS_DATA_IO];
+ __entry->fs_nio = iostat[FS_NODE_IO];
+ __entry->fs_mio = iostat[FS_META_IO];
+ __entry->fs_gc_dio = iostat[FS_GC_DATA_IO];
+ __entry->fs_gc_nio = iostat[FS_GC_NODE_IO];
+ __entry->fs_cp_dio = iostat[FS_CP_DATA_IO];
+ __entry->fs_cp_nio = iostat[FS_CP_NODE_IO];
+ __entry->fs_cp_mio = iostat[FS_CP_META_IO];
+ __entry->app_drio = iostat[APP_DIRECT_READ_IO];
+ __entry->app_brio = iostat[APP_BUFFERED_READ_IO];
+ __entry->app_rio = iostat[APP_READ_IO];
+ __entry->app_mrio = iostat[APP_MAPPED_READ_IO];
+ __entry->fs_drio = iostat[FS_DATA_READ_IO];
+ __entry->fs_gdrio = iostat[FS_GDATA_READ_IO];
+ __entry->fs_cdrio = iostat[FS_CDATA_READ_IO];
+ __entry->fs_nrio = iostat[FS_NODE_READ_IO];
+ __entry->fs_mrio = iostat[FS_META_READ_IO];
+ __entry->fs_discard = iostat[FS_DISCARD];
+ ),
+
+ TP_printk("dev = (%d,%d), "
+ "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], "
+ "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], "
+ "gc [data=%llu, node=%llu], "
+ "cp [data=%llu, node=%llu, meta=%llu], "
+ "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], "
+ "fs [data=%llu, (gc_data=%llu, compr_data=%llu), "
+ "node=%llu, meta=%llu]",
+ show_dev(__entry->dev), __entry->app_wio, __entry->app_dio,
+ __entry->app_bio, __entry->app_mio, __entry->fs_dio,
+ __entry->fs_nio, __entry->fs_mio, __entry->fs_discard,
+ __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio,
+ __entry->fs_cp_nio, __entry->fs_cp_mio,
+ __entry->app_rio, __entry->app_drio, __entry->app_brio,
+ __entry->app_mrio, __entry->fs_drio, __entry->fs_gdrio,
+ __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio)
+);
+
#endif /* _TRACE_F2FS_H */
/* This part must be outside protection */