diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-17 12:10:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-17 12:10:37 -0700 |
commit | 586f14a6a182bbdb9404dc66464dcd8d0ac175a3 (patch) | |
tree | da5fde6b79a83b11c764b60901e791cbc4e1a458 /fs | |
parent | 586a7a854234b0a48b0e188ad11896fd2764174f (diff) | |
parent | a3c10bed330b7ab401254a0c91098a03b04f1448 (diff) |
Merge tag 'erofs-for-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang:
"Updates for folio conversions for compressed inodes: While large folio
support for compressed data could work now, it remains disabled since
the stress test could hang due to page migration in a few hours after
enabling it. I need more time to investigate further before enabling
this feature.
Additionally, clean up stream decompressors and tracepoints for
simplicity.
Summary:
- More folio conversions for compressed inodes
- Stream decompressor (LZMA/DEFLATE/ZSTD) cleanups
- Minor tracepoint cleanup"
* tag 'erofs-for-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: silence uninitialized variable warning in z_erofs_scan_folio()
erofs: avoid refcounting short-lived pages
erofs: get rid of z_erofs_map_blocks_iter_* tracepoints
erofs: tidy up stream decompressors
erofs: refine z_erofs_{init,exit}_subsystem()
erofs: move each decompressor to its own source file
erofs: tidy up `struct z_erofs_bvec`
erofs: teach z_erofs_scan_folios() to handle multi-page folios
erofs: convert z_erofs_read_fragment() to folios
erofs: convert z_erofs_pcluster_readmore() to folios
Diffstat (limited to 'fs')
-rw-r--r-- | fs/erofs/compress.h | 61 | ||||
-rw-r--r-- | fs/erofs/decompressor.c | 148 | ||||
-rw-r--r-- | fs/erofs/decompressor_deflate.c | 149 | ||||
-rw-r--r-- | fs/erofs/decompressor_lzma.c | 166 | ||||
-rw-r--r-- | fs/erofs/decompressor_zstd.c | 154 | ||||
-rw-r--r-- | fs/erofs/internal.h | 48 | ||||
-rw-r--r-- | fs/erofs/super.c | 34 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 346 | ||||
-rw-r--r-- | fs/erofs/zmap.c | 4 |
9 files changed, 493 insertions, 617 deletions
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 19d53c30c8af..7bfe251680ec 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -24,6 +24,8 @@ struct z_erofs_decompressor { void *data, int size); int (*decompress)(struct z_erofs_decompress_req *rq, struct page **pagepool); + int (*init)(void); + void (*exit)(void); char *name; }; @@ -52,17 +54,14 @@ struct z_erofs_decompressor { */ /* - * short-lived pages are pages directly from buddy system with specific - * page->private (no need to set PagePrivate since these are non-LRU / - * non-movable pages and bypass reclaim / migration code). + * Currently, short-lived pages are pages directly from buddy system + * with specific page->private (Z_EROFS_SHORTLIVED_PAGE). + * In the future world of Memdescs, it should be type 0 (Misc) memory + * which type can be checked with a new helper. */ static inline bool z_erofs_is_shortlived_page(struct page *page) { - if (page->private != Z_EROFS_SHORTLIVED_PAGE) - return false; - - DBG_BUGON(page->mapping); - return true; + return page->private == Z_EROFS_SHORTLIVED_PAGE; } static inline bool z_erofs_put_shortlivedpage(struct page **pagepool, @@ -70,32 +69,32 @@ static inline bool z_erofs_put_shortlivedpage(struct page **pagepool, { if (!z_erofs_is_shortlived_page(page)) return false; - - /* short-lived pages should not be used by others at the same time */ - if (page_ref_count(page) > 1) { - put_page(page); - } else { - /* follow the pcluster rule above. */ - erofs_pagepool_add(pagepool, page); - } + erofs_pagepool_add(pagepool, page); return true; } +extern const struct z_erofs_decompressor z_erofs_lzma_decomp; +extern const struct z_erofs_decompressor z_erofs_deflate_decomp; +extern const struct z_erofs_decompressor z_erofs_zstd_decomp; +extern const struct z_erofs_decompressor *z_erofs_decomp[]; + +struct z_erofs_stream_dctx { + struct z_erofs_decompress_req *rq; + unsigned int inpages, outpages; /* # of {en,de}coded pages */ + int no, ni; /* the current {en,de}coded page # */ + + unsigned int avail_out; /* remaining bytes in the decoded buffer */ + unsigned int inbuf_pos, inbuf_sz; + /* current status of the encoded buffer */ + u8 *kin, *kout; /* buffer mapped pointers */ + void *bounce; /* bounce buffer for inplace I/Os */ + bool bounced; /* is the bounce buffer used now? */ +}; + +int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst, + void **src, struct page **pgpl); int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, unsigned int padbufsize); -extern const struct z_erofs_decompressor erofs_decompressors[]; - -/* prototypes for specific algorithms */ -int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, void *data, int size); -int z_erofs_load_deflate_config(struct super_block *sb, - struct erofs_super_block *dsb, void *data, int size); -int z_erofs_load_zstd_config(struct super_block *sb, - struct erofs_super_block *dsb, void *data, int size); -int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool); -int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool); -int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq, - struct page **pgpl); +int __init z_erofs_init_decompressor(void); +void z_erofs_exit_decompressor(void); #endif diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 9d85b6c11c6b..c2253b6a5416 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2019 HUAWEI, Inc. * https://www.huawei.com/ + * Copyright (C) 2024 Alibaba Cloud */ #include "compress.h" #include <linux/lz4.h> @@ -109,7 +110,6 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, if (top) { victim = availables[--top]; - get_page(victim); } else { victim = __erofs_allocpage(pagepool, rq->gfp, true); if (!victim) @@ -371,40 +371,113 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, return 0; } -const struct z_erofs_decompressor erofs_decompressors[] = { - [Z_EROFS_COMPRESSION_SHIFTED] = { +int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst, + void **src, struct page **pgpl) +{ + struct z_erofs_decompress_req *rq = dctx->rq; + struct super_block *sb = rq->sb; + struct page **pgo, *tmppage; + unsigned int j; + + if (!dctx->avail_out) { + if (++dctx->no >= dctx->outpages || !rq->outputsize) { + erofs_err(sb, "insufficient space for decompressed data"); + return -EFSCORRUPTED; + } + + if (dctx->kout) + kunmap_local(dctx->kout); + dctx->avail_out = min(rq->outputsize, PAGE_SIZE - rq->pageofs_out); + rq->outputsize -= dctx->avail_out; + pgo = &rq->out[dctx->no]; + if (!*pgo && rq->fillgaps) { /* deduped */ + *pgo = erofs_allocpage(pgpl, rq->gfp); + if (!*pgo) { + dctx->kout = NULL; + return -ENOMEM; + } + set_page_private(*pgo, Z_EROFS_SHORTLIVED_PAGE); + } + if (*pgo) { + dctx->kout = kmap_local_page(*pgo); + *dst = dctx->kout + rq->pageofs_out; + } else { + *dst = dctx->kout = NULL; + } + rq->pageofs_out = 0; + } + + if (dctx->inbuf_pos == dctx->inbuf_sz && rq->inputsize) { + if (++dctx->ni >= dctx->inpages) { + erofs_err(sb, "invalid compressed data"); + return -EFSCORRUPTED; + } + if (dctx->kout) /* unlike kmap(), take care of the orders */ + kunmap_local(dctx->kout); + kunmap_local(dctx->kin); + + dctx->inbuf_sz = min_t(u32, rq->inputsize, PAGE_SIZE); + rq->inputsize -= dctx->inbuf_sz; + dctx->kin = kmap_local_page(rq->in[dctx->ni]); + *src = dctx->kin; + dctx->bounced = false; + if (dctx->kout) { + j = (u8 *)*dst - dctx->kout; + dctx->kout = kmap_local_page(rq->out[dctx->no]); + *dst = dctx->kout + j; + } + dctx->inbuf_pos = 0; + } + + /* + * Handle overlapping: Use the given bounce buffer if the input data is + * under processing; Or utilize short-lived pages from the on-stack page + * pool, where pages are shared among the same request. Note that only + * a few inplace I/O pages need to be doubled. + */ + if (!dctx->bounced && rq->out[dctx->no] == rq->in[dctx->ni]) { + memcpy(dctx->bounce, *src, dctx->inbuf_sz); + *src = dctx->bounce; + dctx->bounced = true; + } + + for (j = dctx->ni + 1; j < dctx->inpages; ++j) { + if (rq->out[dctx->no] != rq->in[j]) + continue; + tmppage = erofs_allocpage(pgpl, rq->gfp); + if (!tmppage) + return -ENOMEM; + set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); + copy_highpage(tmppage, rq->in[j]); + rq->in[j] = tmppage; + } + return 0; +} + +const struct z_erofs_decompressor *z_erofs_decomp[] = { + [Z_EROFS_COMPRESSION_SHIFTED] = &(const struct z_erofs_decompressor) { .decompress = z_erofs_transform_plain, .name = "shifted" }, - [Z_EROFS_COMPRESSION_INTERLACED] = { + [Z_EROFS_COMPRESSION_INTERLACED] = &(const struct z_erofs_decompressor) { .decompress = z_erofs_transform_plain, .name = "interlaced" }, - [Z_EROFS_COMPRESSION_LZ4] = { + [Z_EROFS_COMPRESSION_LZ4] = &(const struct z_erofs_decompressor) { .config = z_erofs_load_lz4_config, .decompress = z_erofs_lz4_decompress, + .init = z_erofs_gbuf_init, + .exit = z_erofs_gbuf_exit, .name = "lz4" }, #ifdef CONFIG_EROFS_FS_ZIP_LZMA - [Z_EROFS_COMPRESSION_LZMA] = { - .config = z_erofs_load_lzma_config, - .decompress = z_erofs_lzma_decompress, - .name = "lzma" - }, + [Z_EROFS_COMPRESSION_LZMA] = &z_erofs_lzma_decomp, #endif #ifdef CONFIG_EROFS_FS_ZIP_DEFLATE - [Z_EROFS_COMPRESSION_DEFLATE] = { - .config = z_erofs_load_deflate_config, - .decompress = z_erofs_deflate_decompress, - .name = "deflate" - }, + [Z_EROFS_COMPRESSION_DEFLATE] = &z_erofs_deflate_decomp, #endif #ifdef CONFIG_EROFS_FS_ZIP_ZSTD - [Z_EROFS_COMPRESSION_ZSTD] = { - .config = z_erofs_load_zstd_config, - .decompress = z_erofs_zstd_decompress, - .name = "zstd" - }, + [Z_EROFS_COMPRESSION_ZSTD] = &z_erofs_zstd_decomp, #endif }; @@ -432,6 +505,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) offset = EROFS_SUPER_OFFSET + sbi->sb_size; alg = 0; for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { + const struct z_erofs_decompressor *dec = z_erofs_decomp[alg]; void *data; if (!(algs & 1)) @@ -443,16 +517,13 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) break; } - if (alg >= ARRAY_SIZE(erofs_decompressors) || - !erofs_decompressors[alg].config) { + if (alg < Z_EROFS_COMPRESSION_MAX && dec && dec->config) { + ret = dec->config(sb, dsb, data, size); + } else { erofs_err(sb, "algorithm %d isn't enabled on this kernel", alg); ret = -EOPNOTSUPP; - } else { - ret = erofs_decompressors[alg].config(sb, - dsb, data, size); } - kfree(data); if (ret) break; @@ -460,3 +531,28 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) erofs_put_metabuf(&buf); return ret; } + +int __init z_erofs_init_decompressor(void) +{ + int i, err; + + for (i = 0; i < Z_EROFS_COMPRESSION_MAX; ++i) { + err = z_erofs_decomp[i] ? z_erofs_decomp[i]->init() : 0; + if (err) { + while (--i) + if (z_erofs_decomp[i]) + z_erofs_decomp[i]->exit(); + return err; + } + } + return 0; +} + +void z_erofs_exit_decompressor(void) +{ + int i; + + for (i = 0; i < Z_EROFS_COMPRESSION_MAX; ++i) + if (z_erofs_decomp[i]) + z_erofs_decomp[i]->exit(); +} diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index 3a3461561a3c..5070d2fcc737 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -15,7 +15,7 @@ static DECLARE_WAIT_QUEUE_HEAD(z_erofs_deflate_wq); module_param_named(deflate_streams, z_erofs_deflate_nstrms, uint, 0444); -void z_erofs_deflate_exit(void) +static void z_erofs_deflate_exit(void) { /* there should be no running fs instance */ while (z_erofs_deflate_avail_strms) { @@ -41,7 +41,7 @@ void z_erofs_deflate_exit(void) } } -int __init z_erofs_deflate_init(void) +static int __init z_erofs_deflate_init(void) { /* by default, use # of possible CPUs instead */ if (!z_erofs_deflate_nstrms) @@ -49,7 +49,7 @@ int __init z_erofs_deflate_init(void) return 0; } -int z_erofs_load_deflate_config(struct super_block *sb, +static int z_erofs_load_deflate_config(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size) { struct z_erofs_deflate_cfgs *dfl = data; @@ -97,27 +97,26 @@ failed: return -ENOMEM; } -int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, - struct page **pgpl) +static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, + struct page **pgpl) { - const unsigned int nrpages_out = - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; - const unsigned int nrpages_in = - PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; struct super_block *sb = rq->sb; - unsigned int insz, outsz, pofs; + struct z_erofs_stream_dctx dctx = { + .rq = rq, + .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT, + .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) + >> PAGE_SHIFT, + .no = -1, .ni = 0, + }; struct z_erofs_deflate *strm; - u8 *kin, *kout = NULL; - bool bounced = false; - int no = -1, ni = 0, j = 0, zerr, err; + int zerr, err; /* 1. get the exact DEFLATE compressed size */ - kin = kmap_local_page(*rq->in); - err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in, - min_t(unsigned int, rq->inputsize, - sb->s_blocksize - rq->pageofs_in)); + dctx.kin = kmap_local_page(*rq->in); + err = z_erofs_fixup_insize(rq, dctx.kin + rq->pageofs_in, + min(rq->inputsize, sb->s_blocksize - rq->pageofs_in)); if (err) { - kunmap_local(kin); + kunmap_local(dctx.kin); return err; } @@ -134,102 +133,35 @@ again: spin_unlock(&z_erofs_deflate_lock); /* 3. multi-call decompress */ - insz = rq->inputsize; - outsz = rq->outputsize; zerr = zlib_inflateInit2(&strm->z, -MAX_WBITS); if (zerr != Z_OK) { err = -EIO; goto failed_zinit; } - pofs = rq->pageofs_out; - strm->z.avail_in = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in); - insz -= strm->z.avail_in; - strm->z.next_in = kin + rq->pageofs_in; + rq->fillgaps = true; /* DEFLATE doesn't support NULL output buffer */ + strm->z.avail_in = min(rq->inputsize, PAGE_SIZE - rq->pageofs_in); + rq->inputsize -= strm->z.avail_in; + strm->z.next_in = dctx.kin + rq->pageofs_in; strm->z.avail_out = 0; + dctx.bounce = strm->bounce; while (1) { - if (!strm->z.avail_out) { - if (++no >= nrpages_out || !outsz) { - erofs_err(sb, "insufficient space for decompressed data"); - err = -EFSCORRUPTED; - break; - } - - if (kout) - kunmap_local(kout); - strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs); - outsz -= strm->z.avail_out; - if (!rq->out[no]) { - rq->out[no] = erofs_allocpage(pgpl, rq->gfp); - if (!rq->out[no]) { - kout = NULL; - err = -ENOMEM; - break; - } - set_page_private(rq->out[no], - Z_EROFS_SHORTLIVED_PAGE); - } - kout = kmap_local_page(rq->out[no]); - strm->z.next_out = kout + pofs; - pofs = 0; - } - - if (!strm->z.avail_in && insz) { - if (++ni >= nrpages_in) { - erofs_err(sb, "invalid compressed data"); - err = -EFSCORRUPTED; - break; - } - - if (kout) { /* unlike kmap(), take care of the orders */ - j = strm->z.next_out - kout; - kunmap_local(kout); - } - kunmap_local(kin); - strm->z.avail_in = min_t(u32, insz, PAGE_SIZE); - insz -= strm->z.avail_in; - kin = kmap_local_page(rq->in[ni]); - strm->z.next_in = kin; - bounced = false; - if (kout) { - kout = kmap_local_page(rq->out[no]); - strm->z.next_out = kout + j; - } - } - - /* - * Handle overlapping: Use bounced buffer if the compressed - * data is under processing; Or use short-lived pages from the - * on-stack pagepool where pages share among the same request - * and not _all_ inplace I/O pages are needed to be doubled. - */ - if (!bounced && rq->out[no] == rq->in[ni]) { - memcpy(strm->bounce, strm->z.next_in, strm->z.avail_in); - strm->z.next_in = strm->bounce; - bounced = true; - } - - for (j = ni + 1; j < nrpages_in; ++j) { - struct page *tmppage; - - if (rq->out[no] != rq->in[j]) - continue; - tmppage = erofs_allocpage(pgpl, rq->gfp); - if (!tmppage) { - err = -ENOMEM; - goto failed; - } - set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); - copy_highpage(tmppage, rq->in[j]); - rq->in[j] = tmppage; - } + dctx.avail_out = strm->z.avail_out; + dctx.inbuf_sz = strm->z.avail_in; + err = z_erofs_stream_switch_bufs(&dctx, + (void **)&strm->z.next_out, + (void **)&strm->z.next_in, pgpl); + if (err) + break; + strm->z.avail_out = dctx.avail_out; + strm->z.avail_in = dctx.inbuf_sz; zerr = zlib_inflate(&strm->z, Z_SYNC_FLUSH); - if (zerr != Z_OK || !(outsz + strm->z.avail_out)) { + if (zerr != Z_OK || !(rq->outputsize + strm->z.avail_out)) { if (zerr == Z_OK && rq->partial_decoding) break; - if (zerr == Z_STREAM_END && !outsz) + if (zerr == Z_STREAM_END && !rq->outputsize) break; erofs_err(sb, "failed to decompress %d in[%u] out[%u]", zerr, rq->inputsize, rq->outputsize); @@ -237,13 +169,12 @@ again: break; } } -failed: if (zlib_inflateEnd(&strm->z) != Z_OK && !err) err = -EIO; - if (kout) - kunmap_local(kout); + if (dctx.kout) + kunmap_local(dctx.kout); failed_zinit: - kunmap_local(kin); + kunmap_local(dctx.kin); /* 4. push back DEFLATE stream context to the global list */ spin_lock(&z_erofs_deflate_lock); strm->next = z_erofs_deflate_head; @@ -252,3 +183,11 @@ failed_zinit: wake_up(&z_erofs_deflate_wq); return err; } + +const struct z_erofs_decompressor z_erofs_deflate_decomp = { + .config = z_erofs_load_deflate_config, + .decompress = z_erofs_deflate_decompress, + .init = z_erofs_deflate_init, + .exit = z_erofs_deflate_exit, + .name = "deflate", +}; diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 4b28dc130c9f..06a722b85a45 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -5,7 +5,6 @@ struct z_erofs_lzma { struct z_erofs_lzma *next; struct xz_dec_microlzma *state; - struct xz_buf buf; u8 bounce[PAGE_SIZE]; }; @@ -18,7 +17,7 @@ static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq); module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444); -void z_erofs_lzma_exit(void) +static void z_erofs_lzma_exit(void) { /* there should be no running fs instance */ while (z_erofs_lzma_avail_strms) { @@ -46,7 +45,7 @@ void z_erofs_lzma_exit(void) } } -int __init z_erofs_lzma_init(void) +static int __init z_erofs_lzma_init(void) { unsigned int i; @@ -70,7 +69,7 @@ int __init z_erofs_lzma_init(void) return 0; } -int z_erofs_load_lzma_config(struct super_block *sb, +static int z_erofs_load_lzma_config(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size) { static DEFINE_MUTEX(lzma_resize_mutex); @@ -147,26 +146,28 @@ again: return err; } -int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, - struct page **pgpl) +static int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, + struct page **pgpl) { - const unsigned int nrpages_out = - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; - const unsigned int nrpages_in = - PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; - unsigned int inlen, outlen, pageofs; + struct super_block *sb = rq->sb; + struct z_erofs_stream_dctx dctx = { + .rq = rq, + .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT, + .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) + >> PAGE_SHIFT, + .no = -1, .ni = 0, + }; + struct xz_buf buf = {}; struct z_erofs_lzma *strm; - u8 *kin; - bool bounced = false; - int no, ni, j, err = 0; + enum xz_ret xz_err; + int err; /* 1. get the exact LZMA compressed size */ - kin = kmap(*rq->in); - err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in, - min_t(unsigned int, rq->inputsize, - rq->sb->s_blocksize - rq->pageofs_in)); + dctx.kin = kmap_local_page(*rq->in); + err = z_erofs_fixup_insize(rq, dctx.kin + rq->pageofs_in, + min(rq->inputsize, sb->s_blocksize - rq->pageofs_in)); if (err) { - kunmap(*rq->in); + kunmap_local(dctx.kin); return err; } @@ -183,108 +184,45 @@ again: spin_unlock(&z_erofs_lzma_lock); /* 3. multi-call decompress */ - inlen = rq->inputsize; - outlen = rq->outputsize; - xz_dec_microlzma_reset(strm->state, inlen, outlen, + xz_dec_microlzma_reset(strm->state, rq->inputsize, rq->outputsize, !rq->partial_decoding); - pageofs = rq->pageofs_out; - strm->buf.in = kin + rq->pageofs_in; - strm->buf.in_pos = 0; - strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - rq->pageofs_in); - inlen -= strm->buf.in_size; - strm->buf.out = NULL; - strm->buf.out_pos = 0; - strm->buf.out_size = 0; - - for (ni = 0, no = -1;;) { - enum xz_ret xz_err; - - if (strm->buf.out_pos == strm->buf.out_size) { - if (strm->buf.out) { - kunmap(rq->out[no]); - strm->buf.out = NULL; - } - - if (++no >= nrpages_out || !outlen) { - erofs_err(rq->sb, "decompressed buf out of bound"); - err = -EFSCORRUPTED; - break; - } - strm->buf.out_pos = 0; - strm->buf.out_size = min_t(u32, outlen, - PAGE_SIZE - pageofs); - outlen -= strm->buf.out_size; - if (!rq->out[no] && rq->fillgaps) { /* deduped */ - rq->out[no] = erofs_allocpage(pgpl, rq->gfp); - if (!rq->out[no]) { - err = -ENOMEM; - break; - } - set_page_private(rq->out[no], - Z_EROFS_SHORTLIVED_PAGE); - } - if (rq->out[no]) - strm->buf.out = kmap(rq->out[no]) + pageofs; - pageofs = 0; - } else if (strm->buf.in_pos == strm->buf.in_size) { - kunmap(rq->in[ni]); - - if (++ni >= nrpages_in || !inlen) { - erofs_err(rq->sb, "compressed buf out of bound"); - err = -EFSCORRUPTED; - break; - } - strm->buf.in_pos = 0; - strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE); - inlen -= strm->buf.in_size; - kin = kmap(rq->in[ni]); - strm->buf.in = kin; - bounced = false; - } + buf.in_size = min(rq->inputsize, PAGE_SIZE - rq->pageofs_in); + rq->inputsize -= buf.in_size; + buf.in = dctx.kin + rq->pageofs_in, + dctx.bounce = strm->bounce; + do { + dctx.avail_out = buf.out_size - buf.out_pos; + dctx.inbuf_sz = buf.in_size; + dctx.inbuf_pos = buf.in_pos; + err = z_erofs_stream_switch_bufs(&dctx, (void **)&buf.out, + (void **)&buf.in, pgpl); + if (err) + break; - /* - * Handle overlapping: Use bounced buffer if the compressed - * data is under processing; Otherwise, Use short-lived pages - * from the on-stack pagepool where pages share with the same - * request. - */ - if (!bounced && rq->out[no] == rq->in[ni]) { - memcpy(strm->bounce, strm->buf.in, strm->buf.in_size); - strm->buf.in = strm->bounce; - bounced = true; + if (buf.out_size == buf.out_pos) { + buf.out_size = dctx.avail_out; + buf.out_pos = 0; } - for (j = ni + 1; j < nrpages_in; ++j) { - struct page *tmppage; + buf.in_size = dctx.inbuf_sz; + buf.in_pos = dctx.inbuf_pos; - if (rq->out[no] != rq->in[j]) - continue; - tmppage = erofs_allocpage(pgpl, rq->gfp); - if (!tmppage) { - err = -ENOMEM; - goto failed; - } - set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); - copy_highpage(tmppage, rq->in[j]); - rq->in[j] = tmppage; - } - xz_err = xz_dec_microlzma_run(strm->state, &strm->buf); - DBG_BUGON(strm->buf.out_pos > strm->buf.out_size); - DBG_BUGON(strm->buf.in_pos > strm->buf.in_size); + xz_err = xz_dec_microlzma_run(strm->state, &buf); + DBG_BUGON(buf.out_pos > buf.out_size); + DBG_BUGON(buf.in_pos > buf.in_size); if (xz_err != XZ_OK) { - if (xz_err == XZ_STREAM_END && !outlen) + if (xz_err == XZ_STREAM_END && !rq->outputsize) break; - erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]", + erofs_err(sb, "failed to decompress %d in[%u] out[%u]", xz_err, rq->inputsize, rq->outputsize); err = -EFSCORRUPTED; break; } - } -failed: - if (no < nrpages_out && strm->buf.out) - kunmap(rq->out[no]); - if (ni < nrpages_in) - kunmap(rq->in[ni]); + } while (1); + + if (dctx.kout) + kunmap_local(dctx.kout); + kunmap_local(dctx.kin); /* 4. push back LZMA stream context to the global list */ spin_lock(&z_erofs_lzma_lock); strm->next = z_erofs_lzma_head; @@ -293,3 +231,11 @@ failed: wake_up(&z_erofs_lzma_wq); return err; } + +const struct z_erofs_decompressor z_erofs_lzma_decomp = { + .config = z_erofs_load_lzma_config, + .decompress = z_erofs_lzma_decompress, + .init = z_erofs_lzma_init, + .exit = z_erofs_lzma_exit, + .name = "lzma" +}; diff --git a/fs/erofs/decompressor_zstd.c b/fs/erofs/decompressor_zstd.c index 63a23cac3af4..7e177304967e 100644 --- a/fs/erofs/decompressor_zstd.c +++ b/fs/erofs/decompressor_zstd.c @@ -34,7 +34,7 @@ again: return strm; } -void z_erofs_zstd_exit(void) +static void z_erofs_zstd_exit(void) { while (z_erofs_zstd_avail_strms) { struct z_erofs_zstd *strm, *n; @@ -49,7 +49,7 @@ void z_erofs_zstd_exit(void) } } -int __init z_erofs_zstd_init(void) +static int __init z_erofs_zstd_init(void) { /* by default, use # of possible CPUs instead */ if (!z_erofs_zstd_nstrms) @@ -72,7 +72,7 @@ int __init z_erofs_zstd_init(void) return 0; } -int z_erofs_load_zstd_config(struct super_block *sb, +static int z_erofs_load_zstd_config(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size) { static DEFINE_MUTEX(zstd_resize_mutex); @@ -135,30 +135,29 @@ int z_erofs_load_zstd_config(struct super_block *sb, return strm ? -ENOMEM : 0; } -int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq, - struct page **pgpl) +static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq, + struct page **pgpl) { - const unsigned int nrpages_out = - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; - const unsigned int nrpages_in = - PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; - zstd_dstream *stream; struct super_block *sb = rq->sb; - unsigned int insz, outsz, pofs; - struct z_erofs_zstd *strm; + struct z_erofs_stream_dctx dctx = { + .rq = rq, + .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT, + .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) + >> PAGE_SHIFT, + .no = -1, .ni = 0, + }; zstd_in_buffer in_buf = { NULL, 0, 0 }; zstd_out_buffer out_buf = { NULL, 0, 0 }; - u8 *kin, *kout = NULL; - bool bounced = false; - int no = -1, ni = 0, j = 0, zerr, err; + struct z_erofs_zstd *strm; + zstd_dstream *stream; + int zerr, err; /* 1. get the exact compressed size */ - kin = kmap_local_page(*rq->in); - err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in, - min_t(unsigned int, rq->inputsize, - sb->s_blocksize - rq->pageofs_in)); + dctx.kin = kmap_local_page(*rq->in); + err = z_erofs_fixup_insize(rq, dctx.kin + rq->pageofs_in, + min(rq->inputsize, sb->s_blocksize - rq->pageofs_in)); if (err) { - kunmap_local(kin); + kunmap_local(dctx.kin); return err; } @@ -166,109 +165,48 @@ int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq, strm = z_erofs_isolate_strms(false); /* 3. multi-call decompress */ - insz = rq->inputsize; - outsz = rq->outputsize; stream = zstd_init_dstream(z_erofs_zstd_max_dictsize, strm->wksp, strm->wkspsz); if (!stream) { err = -EIO; goto failed_zinit; } - pofs = rq->pageofs_out; - in_buf.size = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in); - insz -= in_buf.size; - in_buf.src = kin + rq->pageofs_in; + rq->fillgaps = true; /* ZSTD doesn't support NULL output buffer */ + in_buf.size = min_t(u32, rq->inputsize, PAGE_SIZE - rq->pageofs_in); + rq->inputsize -= in_buf.size; + in_buf.src = dctx.kin + rq->pageofs_in; + dctx.bounce = strm->bounce; + do { - if (out_buf.size == out_buf.pos) { - if (++no >= nrpages_out || !outsz) { - erofs_err(sb, "insufficient space for decompressed data"); - err = -EFSCORRUPTED; - break; - } + dctx.avail_out = out_buf.size - out_buf.pos; + dctx.inbuf_sz = in_buf.size; + dctx.inbuf_pos = in_buf.pos; + err = z_erofs_stream_switch_bufs(&dctx, &out_buf.dst, + (void **)&in_buf.src, pgpl); + if (err) + break; - if (kout) - kunmap_local(kout); - out_buf.size = min_t(u32, outsz, PAGE_SIZE - pofs); - outsz -= out_buf.size; - if (!rq->out[no]) { - rq->out[no] = erofs_allocpage(pgpl, rq->gfp); - if (!rq->out[no]) { - kout = NULL; - err = -ENOMEM; - break; - } - set_page_private(rq->out[no], - Z_EROFS_SHORTLIVED_PAGE); - } - kout = kmap_local_page(rq->out[no]); - out_buf.dst = kout + pofs; + if (out_buf.size == out_buf.pos) { + out_buf.size = dctx.avail_out; out_buf.pos = 0; - pofs = 0; - } - - if (in_buf.size == in_buf.pos && insz) { - if (++ni >= nrpages_in) { - erofs_err(sb, "invalid compressed data"); - err = -EFSCORRUPTED; - break; - } - - if (kout) /* unlike kmap(), take care of the orders */ - kunmap_local(kout); - kunmap_local(kin); - in_buf.size = min_t(u32, insz, PAGE_SIZE); - insz -= in_buf.size; - kin = kmap_local_page(rq->in[ni]); - in_buf.src = kin; - in_buf.pos = 0; - bounced = false; - if (kout) { - j = (u8 *)out_buf.dst - kout; - kout = kmap_local_page(rq->out[no]); - out_buf.dst = kout + j; - } } + in_buf.size = dctx.inbuf_sz; + in_buf.pos = dctx.inbuf_pos; - /* - * Handle overlapping: Use bounced buffer if the compressed - * data is under processing; Or use short-lived pages from the - * on-stack pagepool where pages share among the same request - * and not _all_ inplace I/O pages are needed to be doubled. - */ - if (!bounced && rq->out[no] == rq->in[ni]) { - memcpy(strm->bounce, in_buf.src, in_buf.size); - in_buf.src = strm->bounce; - bounced = true; - } - - for (j = ni + 1; j < nrpages_in; ++j) { - struct page *tmppage; - - if (rq->out[no] != rq->in[j]) - continue; - tmppage = erofs_allocpage(pgpl, rq->gfp); - if (!tmppage) { - err = -ENOMEM; - goto failed; - } - set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); - copy_highpage(tmppage, rq->in[j]); - rq->in[j] = tmppage; - } zerr = zstd_decompress_stream(stream, &out_buf, &in_buf); - if (zstd_is_error(zerr) || (!zerr && outsz)) { + if (zstd_is_error(zerr) || (!zerr && rq->outputsize)) { erofs_err(sb, "failed to decompress in[%u] out[%u]: %s", rq->inputsize, rq->outputsize, zerr ? zstd_get_error_name(zerr) : "unexpected end of stream"); err = -EFSCORRUPTED; break; } - } while (outsz || out_buf.pos < out_buf.size); -failed: - if (kout) - kunmap_local(kout); + } while (rq->outputsize || out_buf.pos < out_buf.size); + + if (dctx.kout) + kunmap_local(dctx.kout); failed_zinit: - kunmap_local(kin); + kunmap_local(dctx.kin); /* 4. push back ZSTD stream context to the global list */ spin_lock(&z_erofs_zstd_lock); strm->next = z_erofs_zstd_head; @@ -277,3 +215,11 @@ failed_zinit: wake_up(&z_erofs_zstd_wq); return err; } + +const struct z_erofs_decompressor z_erofs_zstd_decomp = { + .config = z_erofs_load_zstd_config, + .decompress = z_erofs_zstd_decompress, + .init = z_erofs_zstd_init, + .exit = z_erofs_zstd_exit, + .name = "zstd", +}; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 0c1b44ac9524..736607675396 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -312,17 +312,13 @@ static inline unsigned int erofs_inode_datalayout(unsigned int ifmt) return (ifmt >> EROFS_I_DATALAYOUT_BIT) & EROFS_I_DATALAYOUT_MASK; } -/* - * Different from grab_cache_page_nowait(), reclaiming is never triggered - * when allocating new pages. - */ -static inline -struct page *erofs_grab_cache_page_nowait(struct address_space *mapping, - pgoff_t index) +/* reclaiming is never triggered when allocating new folios. */ +static inline struct folio *erofs_grab_folio_nowait(struct address_space *as, + pgoff_t index) { - return pagecache_get_page(mapping, index, + return __filemap_get_folio(as, index, FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, - readahead_gfp_mask(mapping) & ~__GFP_RECLAIM); + readahead_gfp_mask(as) & ~__GFP_RECLAIM); } /* Has a disk mapping */ @@ -458,8 +454,8 @@ void erofs_shrinker_register(struct super_block *sb); void erofs_shrinker_unregister(struct super_block *sb); int __init erofs_init_shrinker(void); void erofs_exit_shrinker(void); -int __init z_erofs_init_zip_subsystem(void); -void z_erofs_exit_zip_subsystem(void); +int __init z_erofs_init_subsystem(void); +void z_erofs_exit_subsystem(void); int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, struct erofs_workgroup *egrp); int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, @@ -476,37 +472,11 @@ static inline void erofs_shrinker_register(struct super_block *sb) {} static inline void erofs_shrinker_unregister(struct super_block *sb) {} static inline int erofs_init_shrinker(void) { return 0; } static inline void erofs_exit_shrinker(void) {} -static inline int z_erofs_init_zip_subsystem(void) { return 0; } -static inline void z_erofs_exit_zip_subsystem(void) {} -static inline int z_erofs_gbuf_init(void) { return 0; } -static inline void z_erofs_gbuf_exit(void) {} +static inline int z_erofs_init_subsystem(void) { return 0; } +static inline void z_erofs_exit_subsystem(void) {} static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; } #endif /* !CONFIG_EROFS_FS_ZIP */ -#ifdef CONFIG_EROFS_FS_ZIP_LZMA -int __init z_erofs_lzma_init(void); -void z_erofs_lzma_exit(void); -#else -static inline int z_erofs_lzma_init(void) { return 0; } -static inline int z_erofs_lzma_exit(void) { return 0; } -#endif /* !CONFIG_EROFS_FS_ZIP_LZMA */ - -#ifdef CONFIG_EROFS_FS_ZIP_DEFLATE -int __init z_erofs_deflate_init(void); -void z_erofs_deflate_exit(void); -#else -static inline int z_erofs_deflate_init(void) { return 0; } -static inline int z_erofs_deflate_exit(void) { return 0; } -#endif /* !CONFIG_EROFS_FS_ZIP_DEFLATE */ - -#ifdef CONFIG_EROFS_FS_ZIP_ZSTD -int __init z_erofs_zstd_init(void); -void z_erofs_zstd_exit(void); -#else -static inline int z_erofs_zstd_init(void) { return 0; } -static inline int z_erofs_zstd_exit(void) { return 0; } -#endif /* !CONFIG_EROFS_FS_ZIP_ZSTD */ - #ifdef CONFIG_EROFS_FS_ONDEMAND int erofs_fscache_register_fs(struct super_block *sb); void erofs_fscache_unregister_fs(struct super_block *sb); diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 1b91d9513013..35268263aaed 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -849,23 +849,7 @@ static int __init erofs_module_init(void) if (err) goto shrinker_err; - err = z_erofs_lzma_init(); - if (err) - goto lzma_err; - - err = z_erofs_deflate_init(); - if (err) - goto deflate_err; - - err = z_erofs_zstd_init(); - if (err) - goto zstd_err; - - err = z_erofs_gbuf_init(); - if (err) - goto gbuf_err; - - err = z_erofs_init_zip_subsystem(); + err = z_erofs_init_subsystem(); if (err) goto zip_err; @@ -882,16 +866,8 @@ static int __init erofs_module_init(void) fs_err: erofs_exit_sysfs(); sysfs_err: - z_erofs_exit_zip_subsystem(); + z_erofs_exit_subsystem(); zip_err: - z_erofs_gbuf_exit(); -gbuf_err: - z_erofs_zstd_exit(); -zstd_err: - z_erofs_deflate_exit(); -deflate_err: - z_erofs_lzma_exit(); -lzma_err: erofs_exit_shrinker(); shrinker_err: kmem_cache_destroy(erofs_inode_cachep); @@ -906,13 +882,9 @@ static void __exit erofs_module_exit(void) rcu_barrier(); erofs_exit_sysfs(); - z_erofs_exit_zip_subsystem(); - z_erofs_zstd_exit(); - z_erofs_deflate_exit(); - z_erofs_lzma_exit(); + z_erofs_exit_subsystem(); erofs_exit_shrinker(); kmem_cache_destroy(erofs_inode_cachep); - z_erofs_gbuf_exit(); } static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index d6fe002a4a71..424f656cd765 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -19,10 +19,7 @@ typedef void *z_erofs_next_pcluster_t; struct z_erofs_bvec { - union { - struct page *page; - struct folio *folio; - }; + struct page *page; int offset; unsigned int end; }; @@ -449,44 +446,51 @@ static inline int erofs_cpu_hotplug_init(void) { return 0; } static inline void erofs_cpu_hotplug_destroy(void) {} #endif -void z_erofs_exit_zip_subsystem(void) +void z_erofs_exit_subsystem(void) { erofs_cpu_hotplug_destroy(); erofs_destroy_percpu_workers(); destroy_workqueue(z_erofs_workqueue); z_erofs_destroy_pcluster_pool(); + z_erofs_exit_decompressor(); } -int __init z_erofs_init_zip_subsystem(void) +int __init z_erofs_init_subsystem(void) { - int err = z_erofs_create_pcluster_pool(); + int err = z_erofs_init_decompressor(); if (err) - goto out_error_pcluster_pool; + goto err_decompressor; + + err = z_erofs_create_pcluster_pool(); + if (err) + goto err_pcluster_pool; z_erofs_workqueue = alloc_workqueue("erofs_worker", WQ_UNBOUND | WQ_HIGHPRI, num_possible_cpus()); if (!z_erofs_workqueue) { err = -ENOMEM; - goto out_error_workqueue_init; + goto err_workqueue_init; } err = erofs_init_percpu_workers(); if (err) - goto out_error_pcpu_worker; + goto err_pcpu_worker; err = erofs_cpu_hotplug_init(); if (err < 0) - goto out_error_cpuhp_init; + goto err_cpuhp_init; return err; -out_error_cpuhp_init: +err_cpuhp_init: erofs_destroy_percpu_workers(); -out_error_pcpu_worker: +err_pcpu_worker: destroy_workqueue(z_erofs_workqueue); -out_error_workqueue_init: +err_workqueue_init: z_erofs_destroy_pcluster_pool(); -out_error_pcluster_pool: +err_pcluster_pool: + z_erofs_exit_decompressor(); +err_decompressor: return err; } @@ -617,32 +621,31 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; } -/* called by erofs_shrinker to get rid of all cached compressed bvecs */ +/* (erofs_shrinker) disconnect cached encoded data with pclusters */ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, struct erofs_workgroup *grp) { struct z_erofs_pcluster *const pcl = container_of(grp, struct z_erofs_pcluster, obj); unsigned int pclusterpages = z_erofs_pclusterpages(pcl); + struct folio *folio; int i; DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - /* There is no actice user since the pcluster is now freezed */ + /* Each cached folio contains one page unless bs > ps is supported */ for (i = 0; i < pclusterpages; ++i) { - struct folio *folio = pcl->compressed_bvecs[i].folio; - - if (!folio) - continue; + if (pcl->compressed_bvecs[i].page) { + folio = page_folio(pcl->compressed_bvecs[i].page); + /* Avoid reclaiming or migrating this folio */ + if (!folio_trylock(folio)) + return -EBUSY; - /* Avoid reclaiming or migrating this folio */ - if (!folio_trylock(folio)) - return -EBUSY; - - if (!erofs_folio_is_managed(sbi, folio)) - continue; - pcl->compressed_bvecs[i].folio = NULL; - folio_detach_private(folio); - folio_unlock(folio); + if (!erofs_folio_is_managed(sbi, folio)) + continue; + pcl->compressed_bvecs[i].page = NULL; + folio_detach_private(folio); + folio_unlock(folio); + } } return 0; } @@ -650,9 +653,9 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) { struct z_erofs_pcluster *pcl = folio_get_private(folio); - unsigned int pclusterpages = z_erofs_pclusterpages(pcl); + struct z_erofs_bvec *bvec = pcl->compressed_bvecs; + struct z_erofs_bvec *end = bvec + z_erofs_pclusterpages(pcl); bool ret; - int i; if (!folio_test_private(folio)) return true; @@ -661,9 +664,9 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) spin_lock(&pcl->obj.lockref.lock); if (pcl->obj.lockref.count <= 0) { DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - for (i = 0; i < pclusterpages; ++i) { - if (pcl->compressed_bvecs[i].folio == folio) { - pcl->compressed_bvecs[i].folio = NULL; + for (; bvec < end; ++bvec) { + if (bvec->page && page_folio(bvec->page) == folio) { + bvec->page = NULL; folio_detach_private(folio); ret = true; break; @@ -925,7 +928,7 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe) fe->pcl = NULL; } -static int z_erofs_read_fragment(struct super_block *sb, struct page *page, +static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio, unsigned int cur, unsigned int end, erofs_off_t pos) { struct inode *packed_inode = EROFS_SB(sb)->packed_inode; @@ -938,113 +941,109 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, buf.mapping = packed_inode->i_mapping; for (; cur < end; cur += cnt, pos += cnt) { - cnt = min_t(unsigned int, end - cur, - sb->s_blocksize - erofs_blkoff(sb, pos)); + cnt = min(end - cur, sb->s_blocksize - erofs_blkoff(sb, pos)); src = erofs_bread(&buf, pos, EROFS_KMAP); if (IS_ERR(src)) { erofs_put_metabuf(&buf); return PTR_ERR(src); } - memcpy_to_page(page, cur, src, cnt); + memcpy_to_folio(folio, cur, src, cnt); } erofs_put_metabuf(&buf); return 0; } -static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe, +static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f, struct folio *folio, bool ra) { - struct inode *const inode = fe->inode; - struct erofs_map_blocks *const map = &fe->map; + struct inode *const inode = f->inode; + struct erofs_map_blocks *const map = &f->map; const loff_t offset = folio_pos(folio); - const unsigned int bs = i_blocksize(inode), fs = folio_size(folio); - bool tight = true, exclusive; - unsigned int cur, end, len, split; + const unsigned int bs = i_blocksize(inode); + unsigned int end = folio_size(folio), split = 0, cur, pgs; + bool tight, excl; int err = 0; + tight = (bs == PAGE_SIZE); z_erofs_onlinefolio_init(folio); - split = 0; - end = fs; -repeat: - if (offset + end - 1 < map->m_la || - offset + end - 1 >= map->m_la + map->m_llen) { - z_erofs_pcluster_end(fe); - map->m_la = offset + end - 1; - map->m_llen = 0; - err = z_erofs_map_blocks_iter(inode, map, 0); - if (err) - goto out; - } - - cur = offset > map->m_la ? 0 : map->m_la - offset; - /* bump split parts first to avoid several separate cases */ - ++split; - - if (!(map->m_flags & EROFS_MAP_MAPPED)) { - folio_zero_segment(folio, cur, end); - tight = false; - goto next_part; - } - - if (map->m_flags & EROFS_MAP_FRAGMENT) { - erofs_off_t fpos = offset + cur - map->m_la; + do { + if (offset + end - 1 < map->m_la || + offset + end - 1 >= map->m_la + map->m_llen) { + z_erofs_pcluster_end(f); + map->m_la = offset + end - 1; + map->m_llen = 0; + err = z_erofs_map_blocks_iter(inode, map, 0); + if (err) + break; + } - len = min_t(unsigned int, map->m_llen - fpos, end - cur); - err = z_erofs_read_fragment(inode->i_sb, &folio->page, cur, - cur + len, EROFS_I(inode)->z_fragmentoff + fpos); - if (err) - goto out; - tight = false; - goto next_part; - } + cur = offset > map->m_la ? 0 : map->m_la - offset; + pgs = round_down(cur, PAGE_SIZE); + /* bump split parts first to avoid several separate cases */ + ++split; + + if (!(map->m_flags & EROFS_MAP_MAPPED)) { + folio_zero_segment(folio, cur, end); + tight = false; + } else if (map->m_flags & EROFS_MAP_FRAGMENT) { + erofs_off_t fpos = offset + cur - map->m_la; + + err = z_erofs_read_fragment(inode->i_sb, folio, cur, + cur + min(map->m_llen - fpos, end - cur), + EROFS_I(inode)->z_fragmentoff + fpos); + if (err) + break; + tight = false; + } else { + if (!f->pcl) { + err = z_erofs_pcluster_begin(f); + if (err) + break; + f->pcl->besteffort |= !ra; + } - if (!fe->pcl) { - err = z_erofs_pcluster_begin(fe); - if (err) - goto out; - fe->pcl->besteffort |= !ra; - } + pgs = round_down(end - 1, PAGE_SIZE); + /* + * Ensure this partial page belongs to this submit chain + * rather than other concurrent submit chains or + * noio(bypass) chains since those chains are handled + * asynchronously thus it cannot be used for inplace I/O + * or bvpage (should be processed in the strict order.) + */ + tight &= (f->mode >= Z_EROFS_PCLUSTER_FOLLOWED); + excl = false; + if (cur <= pgs) { + excl = (split <= 1) || tight; + cur = pgs; + } - /* - * Ensure the current partial folio belongs to this submit chain rather - * than other concurrent submit chains or the noio(bypass) chain since - * those chains are handled asynchronously thus the folio cannot be used - * for inplace I/O or bvpage (should be processed in a strict order.) - */ - tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); - exclusive = (!cur && ((split <= 1) || (tight && bs == fs))); - if (cur) - tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED); - - err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) { - .page = &folio->page, - .offset = offset - map->m_la, - .end = end, - }), exclusive); - if (err) - goto out; - - z_erofs_onlinefolio_split(folio); - if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) - fe->pcl->multibases = true; - if (fe->pcl->length < offset + end - map->m_la) { - fe->pcl->length = offset + end - map->m_la; - fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK; - } - if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && - !(map->m_flags & EROFS_MAP_PARTIAL_REF) && - fe->pcl->length == map->m_llen) - fe->pcl->partial = false; -next_part: - /* shorten the remaining extent to update progress */ - map->m_llen = offset + cur - map->m_la; - map->m_flags &= ~EROFS_MAP_FULL_MAPPED; - - end = cur; - if (end > 0) - goto repeat; + err = z_erofs_attach_page(f, &((struct z_erofs_bvec) { + .page = folio_page(folio, pgs >> PAGE_SHIFT), + .offset = offset + pgs - map->m_la, + .end = end - pgs, }), excl); + if (err) + break; -out: + z_erofs_onlinefolio_split(folio); + if (f->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) + f->pcl->multibases = true; + if (f->pcl->length < offset + end - map->m_la) { + f->pcl->length = offset + end - map->m_la; + f->pcl->pageofs_out = map->m_la & ~PAGE_MASK; + } + if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && + !(map->m_flags & EROFS_MAP_PARTIAL_REF) && + f->pcl->length == map->m_llen) + f->pcl->partial = false; + } + /* shorten the remaining extent to update progress */ + map->m_llen = offset + cur - map->m_la; + map->m_flags &= ~EROFS_MAP_FULL_MAPPED; + if (cur <= pgs) { + split = cur < pgs; + tight = (bs == PAGE_SIZE); + } + } while ((end = cur) > 0); z_erofs_onlinefolio_end(folio, err); return err; } @@ -1066,7 +1065,7 @@ static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi, static bool z_erofs_page_is_invalidated(struct page *page) { - return !page->mapping && !z_erofs_is_shortlived_page(page); + return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page); } struct z_erofs_decompress_backend { @@ -1221,8 +1220,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, struct z_erofs_pcluster *pcl = be->pcl; unsigned int pclusterpages = z_erofs_pclusterpages(pcl); const struct z_erofs_decompressor *decomp = - &erofs_decompressors[pcl->algorithmformat]; - int i, err2; + z_erofs_decomp[pcl->algorithmformat]; + int i, j, jtop, err2; struct page *page; bool overlapped; @@ -1280,10 +1279,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL); put_page(page); } else { + /* managed folios are still left in compressed_bvecs[] */ for (i = 0; i < pclusterpages; ++i) { - /* consider shortlived pages added when decompressing */ page = be->compressed_pages[i]; - if (!page || erofs_folio_is_managed(sbi, page_folio(page))) continue; @@ -1294,21 +1292,31 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, if (be->compressed_pages < be->onstack_pages || be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES) kvfree(be->compressed_pages); - z_erofs_fill_other_copies(be, err); + jtop = 0; + z_erofs_fill_other_copies(be, err); for (i = 0; i < be->nr_pages; ++i) { page = be->decompressed_pages[i]; if (!page) continue; DBG_BUGON(z_erofs_page_is_invalidated(page)); - - /* recycle all individual short-lived pages */ - if (z_erofs_put_shortlivedpage(be->pagepool, page)) + if (!z_erofs_is_shortlived_page(page)) { + z_erofs_onlinefolio_end(page_folio(page), err); continue; - z_erofs_onlinefolio_end(page_folio(page), err); + } + if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) { + erofs_pagepool_add(be->pagepool, page); + continue; + } + for (j = 0; j < jtop && be->decompressed_pages[j] != page; ++j) + ; + if (j >= jtop) /* this bounce page is newly detected */ + be->decompressed_pages[jtop++] = page; } - + while (jtop) + erofs_pagepool_add(be->pagepool, + be->decompressed_pages[--jtop]); if (be->decompressed_pages != be->onstack_pages) kvfree(be->decompressed_pages); @@ -1419,7 +1427,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, bool tocache = false; struct z_erofs_bvec zbv; struct address_space *mapping; - struct page *page; + struct folio *folio; int bs = i_blocksize(f->inode); /* Except for inplace folios, the entire folio can be used for I/Os */ @@ -1429,23 +1437,25 @@ repeat: spin_lock(&pcl->obj.lockref.lock); zbv = pcl->compressed_bvecs[nr]; spin_unlock(&pcl->obj.lockref.lock); - if (!zbv.folio) + if (!zbv.page) goto out_allocfolio; - bvec->bv_page = &zbv.folio->page; + bvec->bv_page = zbv.page; DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); + + folio = page_folio(zbv.page); /* * Handle preallocated cached folios. We tried to allocate such folios * without triggering direct reclaim. If allocation failed, inplace * file-backed folios will be used instead. */ - if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { - zbv.folio->private = 0; + if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { + folio->private = 0; tocache = true; goto out_tocache; } - mapping = READ_ONCE(zbv.folio->mapping); + mapping = READ_ONCE(folio->mapping); /* * File-backed folios for inplace I/Os are all locked steady, * therefore it is impossible for `mapping` to be NULL. @@ -1457,21 +1467,21 @@ repeat: return; } - folio_lock(zbv.folio); - if (zbv.folio->mapping == mc) { + folio_lock(folio); + if (folio->mapping == mc) { /* * The cached folio is still in managed cache but without * a valid `->private` pcluster hint. Let's reconnect them. */ - if (!folio_test_private(zbv.folio)) { - folio_attach_private(zbv.folio, pcl); + if (!folio_test_private(folio)) { + folio_attach_private(folio, pcl); /* compressed_bvecs[] already takes a ref before */ - folio_put(zbv.folio); + folio_put(folio); } /* no need to submit if it is already up-to-date */ - if (folio_test_uptodate(zbv.folio)) { - folio_unlock(zbv.folio); + if (folio_test_uptodate(folio)) { + folio_unlock(folio); bvec->bv_page = NULL; } return; @@ -1481,32 +1491,31 @@ repeat: * It has been truncated, so it's unsafe to reuse this one. Let's * allocate a new page for compressed data. */ - DBG_BUGON(zbv.folio->mapping); + DBG_BUGON(folio->mapping); tocache = true; - folio_unlock(zbv.folio); - folio_put(zbv.folio); + folio_unlock(folio); + folio_put(folio); out_allocfolio: - page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); + zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); spin_lock(&pcl->obj.lockref.lock); - if (pcl->compressed_bvecs[nr].folio) { - erofs_pagepool_add(&f->pagepool, page); + if (pcl->compressed_bvecs[nr].page) { + erofs_pagepool_add(&f->pagepool, zbv.page); spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } - pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page); + bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page; + folio = page_folio(zbv.page); + /* first mark it as a temporary shortlived folio (now 1 ref) */ + folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; spin_unlock(&pcl->obj.lockref.lock); - bvec->bv_page = page; out_tocache: if (!tocache || bs != PAGE_SIZE || - filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) { - /* turn into a temporary shortlived folio (1 ref) */ - zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; + filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp)) return; - } - folio_attach_private(zbv.folio, pcl); + folio_attach_private(folio, pcl); /* drop a refcount added by allocpage (then 2 refs in total here) */ - folio_put(zbv.folio); + folio_put(folio); } static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb, @@ -1767,7 +1776,6 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, end = round_up(end, PAGE_SIZE); } else { end = round_up(map->m_la, PAGE_SIZE); - if (!map->m_llen) return; } @@ -1775,15 +1783,15 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, cur = map->m_la + map->m_llen - 1; while ((cur >= end) && (cur < i_size_read(inode))) { pgoff_t index = cur >> PAGE_SHIFT; - struct page *page; + struct folio *folio; - page = erofs_grab_cache_page_nowait(inode->i_mapping, index); - if (page) { - if (PageUptodate(page)) - unlock_page(page); + folio = erofs_grab_folio_nowait(inode->i_mapping, index); + if (!IS_ERR_OR_NULL(folio)) { + if (folio_test_uptodate(folio)) + folio_unlock(folio); else - z_erofs_scan_folio(f, page_folio(page), !!rac); - put_page(page); + z_erofs_scan_folio(f, folio, !!rac); + folio_put(folio); } if (cur < PAGE_SIZE) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 74d3d7bffcf3..403af6e31d5b 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -686,7 +686,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, struct erofs_inode *const vi = EROFS_I(inode); int err = 0; - trace_z_erofs_map_blocks_iter_enter(inode, map, flags); + trace_erofs_map_blocks_enter(inode, map, flags); /* when trying to read beyond EOF, leave it unmapped */ if (map->m_la >= inode->i_size) { @@ -713,7 +713,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, out: if (err) map->m_llen = 0; - trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); + trace_erofs_map_blocks_exit(inode, map, flags, err); return err; } |