diff options
Diffstat (limited to 'fs/erofs')
-rw-r--r-- | fs/erofs/Kconfig | 16 | ||||
-rw-r--r-- | fs/erofs/Makefile | 1 | ||||
-rw-r--r-- | fs/erofs/compress.h | 10 | ||||
-rw-r--r-- | fs/erofs/data.c | 104 | ||||
-rw-r--r-- | fs/erofs/decompressor.c | 14 | ||||
-rw-r--r-- | fs/erofs/decompressor_crypto.c | 181 | ||||
-rw-r--r-- | fs/erofs/decompressor_deflate.c | 20 | ||||
-rw-r--r-- | fs/erofs/dir.c | 23 | ||||
-rw-r--r-- | fs/erofs/erofs_fs.h | 15 | ||||
-rw-r--r-- | fs/erofs/fileio.c | 24 | ||||
-rw-r--r-- | fs/erofs/fscache.c | 9 | ||||
-rw-r--r-- | fs/erofs/inode.c | 21 | ||||
-rw-r--r-- | fs/erofs/internal.h | 49 | ||||
-rw-r--r-- | fs/erofs/super.c | 88 | ||||
-rw-r--r-- | fs/erofs/sysfs.c | 71 | ||||
-rw-r--r-- | fs/erofs/xattr.c | 56 | ||||
-rw-r--r-- | fs/erofs/xattr.h | 3 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 107 | ||||
-rw-r--r-- | fs/erofs/zmap.c | 159 |
19 files changed, 690 insertions, 281 deletions
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 8f68ec49ad89..7b26efc271ee 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -144,6 +144,22 @@ config EROFS_FS_ZIP_ZSTD If unsure, say N. +config EROFS_FS_ZIP_ACCEL + bool "EROFS hardware decompression support" + depends on EROFS_FS_ZIP + select CRYPTO + select CRYPTO_DEFLATE + help + Saying Y here includes hardware accelerator support for reading + EROFS file systems containing compressed data. It gives better + decompression speed than the software-implemented decompression, and + it costs lower CPU overhead. + + Hardware accelerator support is an experimental feature for now and + file systems are still readable without selecting this option. + + If unsure, say N. + config EROFS_FS_ONDEMAND bool "EROFS fscache-based on-demand read support (deprecated)" depends on EROFS_FS diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile index 4331d53c7109..549abc424763 100644 --- a/fs/erofs/Makefile +++ b/fs/erofs/Makefile @@ -7,5 +7,6 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o +erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 2704d7a592a5..510e922c5193 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -76,4 +76,14 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, unsigned int padbufsize); int __init z_erofs_init_decompressor(void); void z_erofs_exit_decompressor(void); +int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq, + struct page **pgpl); +int z_erofs_crypto_enable_engine(const char *name, int len); +#ifdef CONFIG_EROFS_FS_ZIP_ACCEL +void z_erofs_crypto_disable_all_engines(void); +int z_erofs_crypto_show_engines(char *buf, int size, char sep); +#else +static inline void z_erofs_crypto_disable_all_engines(void) {} +static inline int z_erofs_crypto_show_engines(char *buf, int size, char sep) { return 0; } +#endif #endif diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 2409d2ab0c28..3b1ba571c728 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -27,7 +27,7 @@ void erofs_put_metabuf(struct erofs_buf *buf) void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap) { - pgoff_t index = offset >> PAGE_SHIFT; + pgoff_t index = (buf->off + offset) >> PAGE_SHIFT; struct folio *folio = NULL; if (buf->page) { @@ -49,11 +49,19 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap) return buf->base + (offset & ~PAGE_MASK); } -void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) +int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb, + bool in_metabox) { struct erofs_sb_info *sbi = EROFS_SB(sb); buf->file = NULL; + if (in_metabox) { + if (unlikely(!sbi->metabox_inode)) + return -EFSCORRUPTED; + buf->mapping = sbi->metabox_inode->i_mapping; + return 0; + } + buf->off = sbi->dif0.fsoff; if (erofs_is_fileio_mode(sbi)) { buf->file = sbi->dif0.file; /* some fs like FUSE needs it */ buf->mapping = buf->file->f_mapping; @@ -61,13 +69,18 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) buf->mapping = sbi->dif0.fscache->inode->i_mapping; else buf->mapping = sb->s_bdev->bd_mapping; + return 0; } void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, - erofs_off_t offset, bool need_kmap) + erofs_off_t offset, bool in_metabox) { - erofs_init_metabuf(buf, sb); - return erofs_bread(buf, offset, need_kmap); + int err; + + err = erofs_init_metabuf(buf, sb, in_metabox); + if (err) + return ERR_PTR(err); + return erofs_bread(buf, offset, true); } int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) @@ -117,7 +130,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, unit) + unit * chunknr; - idx = erofs_read_metabuf(&buf, sb, pos, true); + idx = erofs_read_metabuf(&buf, sb, pos, erofs_inode_in_metabox(inode)); if (IS_ERR(idx)) { err = PTR_ERR(idx); goto out; @@ -213,9 +226,11 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) /* * bit 30: I/O error occurred on this folio + * bit 29: CPU has dirty data in D-cache (needs aliasing handling); * bit 0 - 29: remaining parts to complete this folio */ -#define EROFS_ONLINEFOLIO_EIO (1 << 30) +#define EROFS_ONLINEFOLIO_EIO 30 +#define EROFS_ONLINEFOLIO_DIRTY 29 void erofs_onlinefolio_init(struct folio *folio) { @@ -232,19 +247,23 @@ void erofs_onlinefolio_split(struct folio *folio) atomic_inc((atomic_t *)&folio->private); } -void erofs_onlinefolio_end(struct folio *folio, int err) +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty) { int orig, v; do { orig = atomic_read((atomic_t *)&folio->private); - v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0); + DBG_BUGON(orig <= 0); + v = dirty << EROFS_ONLINEFOLIO_DIRTY; + v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO); } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); - if (v & ~EROFS_ONLINEFOLIO_EIO) + if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1)) return; folio->private = 0; - folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO)); + if (v & BIT(EROFS_ONLINEFOLIO_DIRTY)) + flush_dcache_folio(folio); + folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO))); } static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, @@ -257,51 +276,51 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, map.m_la = offset; map.m_llen = length; - ret = erofs_map_blocks(inode, &map); if (ret < 0) return ret; - mdev = (struct erofs_map_dev) { - .m_deviceid = map.m_deviceid, - .m_pa = map.m_pa, - }; - ret = erofs_map_dev(sb, &mdev); - if (ret) - return ret; - iomap->offset = map.m_la; - if (flags & IOMAP_DAX) - iomap->dax_dev = mdev.m_dif->dax_dev; - else - iomap->bdev = mdev.m_bdev; iomap->length = map.m_llen; iomap->flags = 0; iomap->private = NULL; - + iomap->addr = IOMAP_NULL_ADDR; if (!(map.m_flags & EROFS_MAP_MAPPED)) { iomap->type = IOMAP_HOLE; - iomap->addr = IOMAP_NULL_ADDR; - if (!iomap->length) - iomap->length = length; return 0; } + if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(inode)) { + mdev = (struct erofs_map_dev) { + .m_deviceid = map.m_deviceid, + .m_pa = map.m_pa, + }; + ret = erofs_map_dev(sb, &mdev); + if (ret) + return ret; + + if (flags & IOMAP_DAX) + iomap->dax_dev = mdev.m_dif->dax_dev; + else + iomap->bdev = mdev.m_bdev; + iomap->addr = mdev.m_dif->fsoff + mdev.m_pa; + if (flags & IOMAP_DAX) + iomap->addr += mdev.m_dif->dax_part_off; + } + if (map.m_flags & EROFS_MAP_META) { void *ptr; struct erofs_buf buf = __EROFS_BUF_INITIALIZER; iomap->type = IOMAP_INLINE; - ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, true); + ptr = erofs_read_metabuf(&buf, sb, map.m_pa, + erofs_inode_in_metabox(inode)); if (IS_ERR(ptr)) return PTR_ERR(ptr); iomap->inline_data = ptr; iomap->private = buf.base; } else { iomap->type = IOMAP_MAPPED; - iomap->addr = mdev.m_pa; - if (flags & IOMAP_DAX) - iomap->addr += mdev.m_dif->dax_part_off; } return 0; } @@ -350,11 +369,16 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, */ static int erofs_read_folio(struct file *file, struct folio *folio) { + trace_erofs_read_folio(folio, true); + return iomap_read_folio(folio, &erofs_iomap_ops); } static void erofs_readahead(struct readahead_control *rac) { + trace_erofs_readahead(rac->mapping->host, readahead_index(rac), + readahead_count(rac), true); + return iomap_readahead(rac, &erofs_iomap_ops); } @@ -408,20 +432,20 @@ static const struct vm_operations_struct erofs_dax_vm_ops = { .huge_fault = erofs_dax_huge_fault, }; -static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int erofs_file_mmap_prepare(struct vm_area_desc *desc) { - if (!IS_DAX(file_inode(file))) - return generic_file_readonly_mmap(file, vma); + if (!IS_DAX(file_inode(desc->file))) + return generic_file_readonly_mmap_prepare(desc); - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + if ((desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE)) return -EINVAL; - vma->vm_ops = &erofs_dax_vm_ops; - vm_flags_set(vma, VM_HUGEPAGE); + desc->vm_ops = &erofs_dax_vm_ops; + desc->vm_flags |= VM_HUGEPAGE; return 0; } #else -#define erofs_file_mmap generic_file_readonly_mmap +#define erofs_file_mmap_prepare generic_file_readonly_mmap_prepare #endif static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence) @@ -451,7 +475,7 @@ static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence) const struct file_operations erofs_file_fops = { .llseek = erofs_file_llseek, .read_iter = erofs_file_read_iter, - .mmap = erofs_file_mmap, + .mmap_prepare = erofs_file_mmap_prepare, .get_unmapped_area = thp_get_unmapped_area, .splice_read = filemap_splice_read, }; diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index bf62e2836b60..354762c9723f 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -301,13 +301,11 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, cur = min(cur, rq->outputsize); if (cur && rq->out[0]) { kin = kmap_local_page(rq->in[nrpages_in - 1]); - if (rq->out[0] == rq->in[nrpages_in - 1]) { + if (rq->out[0] == rq->in[nrpages_in - 1]) memmove(kin + rq->pageofs_out, kin + pi, cur); - flush_dcache_page(rq->out[0]); - } else { + else memcpy_to_page(rq->out[0], rq->pageofs_out, kin + pi, cur); - } kunmap_local(kin); } rq->outputsize -= cur; @@ -325,14 +323,12 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK; DBG_BUGON(no >= nrpages_out); cnt = min(insz - pi, PAGE_SIZE - po); - if (rq->out[no] == rq->in[ni]) { + if (rq->out[no] == rq->in[ni]) memmove(kin + po, kin + rq->pageofs_in + pi, cnt); - flush_dcache_page(rq->out[no]); - } else if (rq->out[no]) { + else if (rq->out[no]) memcpy_to_page(rq->out[no], po, kin + rq->pageofs_in + pi, cnt); - } pi += cnt; } while (pi < insz); kunmap_local(kin); @@ -471,7 +467,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) return -EOPNOTSUPP; } - erofs_init_metabuf(&buf, sb); + (void)erofs_init_metabuf(&buf, sb, false); offset = EROFS_SUPER_OFFSET + sbi->sb_size; alg = 0; for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { diff --git a/fs/erofs/decompressor_crypto.c b/fs/erofs/decompressor_crypto.c new file mode 100644 index 000000000000..97b77ab64432 --- /dev/null +++ b/fs/erofs/decompressor_crypto.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/scatterlist.h> +#include <crypto/acompress.h> +#include "compress.h" + +static int __z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq, + struct crypto_acomp *tfm) +{ + struct sg_table st_src, st_dst; + struct acomp_req *req; + struct crypto_wait wait; + u8 *headpage; + int ret; + + headpage = kmap_local_page(*rq->in); + ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in, + min_t(unsigned int, rq->inputsize, + rq->sb->s_blocksize - rq->pageofs_in)); + kunmap_local(headpage); + if (ret) + return ret; + + req = acomp_request_alloc(tfm); + if (!req) + return -ENOMEM; + + ret = sg_alloc_table_from_pages_segment(&st_src, rq->in, rq->inpages, + rq->pageofs_in, rq->inputsize, UINT_MAX, GFP_KERNEL); + if (ret < 0) + goto failed_src_alloc; + + ret = sg_alloc_table_from_pages_segment(&st_dst, rq->out, rq->outpages, + rq->pageofs_out, rq->outputsize, UINT_MAX, GFP_KERNEL); + if (ret < 0) + goto failed_dst_alloc; + + acomp_request_set_params(req, st_src.sgl, + st_dst.sgl, rq->inputsize, rq->outputsize); + + crypto_init_wait(&wait); + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + + ret = crypto_wait_req(crypto_acomp_decompress(req), &wait); + if (ret) { + erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]", + ret, rq->inputsize, rq->pageofs_in, rq->outputsize); + ret = -EIO; + } + + sg_free_table(&st_dst); +failed_dst_alloc: + sg_free_table(&st_src); +failed_src_alloc: + acomp_request_free(req); + return ret; +} + +struct z_erofs_crypto_engine { + char *crypto_name; + struct crypto_acomp *tfm; +}; + +struct z_erofs_crypto_engine *z_erofs_crypto[Z_EROFS_COMPRESSION_MAX] = { + [Z_EROFS_COMPRESSION_LZ4] = (struct z_erofs_crypto_engine[]) { + {}, + }, + [Z_EROFS_COMPRESSION_LZMA] = (struct z_erofs_crypto_engine[]) { + {}, + }, + [Z_EROFS_COMPRESSION_DEFLATE] = (struct z_erofs_crypto_engine[]) { + { .crypto_name = "qat_deflate", }, + {}, + }, + [Z_EROFS_COMPRESSION_ZSTD] = (struct z_erofs_crypto_engine[]) { + {}, + }, +}; +static DECLARE_RWSEM(z_erofs_crypto_rwsem); + +static struct crypto_acomp *z_erofs_crypto_get_engine(int alg) +{ + struct z_erofs_crypto_engine *e; + + for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) + if (e->tfm) + return e->tfm; + return NULL; +} + +int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq, + struct page **pgpl) +{ + struct crypto_acomp *tfm; + int i, err; + + down_read(&z_erofs_crypto_rwsem); + tfm = z_erofs_crypto_get_engine(rq->alg); + if (!tfm) { + err = -EOPNOTSUPP; + goto out; + } + + for (i = 0; i < rq->outpages; i++) { + struct page *const page = rq->out[i]; + struct page *victim; + + if (!page) { + victim = __erofs_allocpage(pgpl, rq->gfp, true); + if (!victim) { + err = -ENOMEM; + goto out; + } + set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE); + rq->out[i] = victim; + } + } + err = __z_erofs_crypto_decompress(rq, tfm); +out: + up_read(&z_erofs_crypto_rwsem); + return err; +} + +int z_erofs_crypto_enable_engine(const char *name, int len) +{ + struct z_erofs_crypto_engine *e; + struct crypto_acomp *tfm; + int alg; + + down_write(&z_erofs_crypto_rwsem); + for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) { + for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) { + if (!strncmp(name, e->crypto_name, len)) { + if (e->tfm) + break; + tfm = crypto_alloc_acomp(e->crypto_name, 0, 0); + if (IS_ERR(tfm)) { + up_write(&z_erofs_crypto_rwsem); + return -EOPNOTSUPP; + } + e->tfm = tfm; + break; + } + } + } + up_write(&z_erofs_crypto_rwsem); + return 0; +} + +void z_erofs_crypto_disable_all_engines(void) +{ + struct z_erofs_crypto_engine *e; + int alg; + + down_write(&z_erofs_crypto_rwsem); + for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) { + for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) { + if (!e->tfm) + continue; + crypto_free_acomp(e->tfm); + e->tfm = NULL; + } + } + up_write(&z_erofs_crypto_rwsem); +} + +int z_erofs_crypto_show_engines(char *buf, int size, char sep) +{ + struct z_erofs_crypto_engine *e; + int alg, len = 0; + + for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) { + for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) { + if (!e->tfm) + continue; + len += scnprintf(buf + len, size - len, "%s%c", + e->crypto_name, sep); + } + } + return len; +} diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index c6908a487054..6909b2d529c7 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -97,8 +97,8 @@ failed: return -ENOMEM; } -static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, - struct page **pgpl) +static int __z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, + struct page **pgpl) { struct super_block *sb = rq->sb; struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 }; @@ -178,6 +178,22 @@ failed_zinit: return err; } +static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, + struct page **pgpl) +{ +#ifdef CONFIG_EROFS_FS_ZIP_ACCEL + int err; + + if (!rq->partial_decoding) { + err = z_erofs_crypto_decompress(rq, pgpl); + if (err != -EOPNOTSUPP) + return err; + + } +#endif + return __z_erofs_deflate_decompress(rq, pgpl); +} + const struct z_erofs_decompressor z_erofs_deflate_decomp = { .config = z_erofs_load_deflate_config, .decompress = z_erofs_deflate_decompress, diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2fae209d0274..debf469ad6bd 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -34,7 +34,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, } if (!dir_emit(ctx, de_name, de_namelen, - le64_to_cpu(de->nid), d_type)) + erofs_nid_to_ino64(EROFS_SB(dir->i_sb), + le64_to_cpu(de->nid)), d_type)) return 1; ++de; ctx->pos += sizeof(struct erofs_dirent); @@ -47,8 +48,12 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct inode *dir = file_inode(f); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = dir->i_sb; + struct file_ra_state *ra = &f->f_ra; unsigned long bsz = sb->s_blocksize; unsigned int ofs = erofs_blkoff(sb, ctx->pos); + pgoff_t ra_pages = DIV_ROUND_UP_POW2( + EROFS_I_SB(dir)->dir_ra_bytes, PAGE_SIZE); + pgoff_t nr_pages = DIV_ROUND_UP_POW2(dir->i_size, PAGE_SIZE); int err = 0; bool initial = true; @@ -58,6 +63,21 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_dirent *de; unsigned int nameoff, maxsize; + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + /* readahead blocks to enhance performance for large directories */ + if (ra_pages) { + pgoff_t idx = DIV_ROUND_UP_POW2(ctx->pos, PAGE_SIZE); + pgoff_t pages = min(nr_pages - idx, ra_pages); + + if (pages > 1 && !ra_has_index(ra, idx)) + page_cache_sync_readahead(dir->i_mapping, ra, + f, idx, pages); + } + de = erofs_bread(&buf, dbstart, true); if (IS_ERR(de)) { erofs_err(sb, "failed to readdir of logical block %llu of nid %llu", @@ -88,6 +108,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; ctx->pos = dbstart + maxsize; ofs = 0; + cond_resched(); } erofs_put_metabuf(&buf); if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) { diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 767fb4acdc93..377ee12b8b96 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -15,6 +15,7 @@ #define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 #define EROFS_FEATURE_COMPAT_MTIME 0x00000002 #define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004 +#define EROFS_FEATURE_COMPAT_SHARED_EA_IN_METABOX 0x00000008 /* * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should @@ -31,8 +32,9 @@ #define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 #define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040 #define EROFS_FEATURE_INCOMPAT_48BIT 0x00000080 +#define EROFS_FEATURE_INCOMPAT_METABOX 0x00000100 #define EROFS_ALL_FEATURE_INCOMPAT \ - ((EROFS_FEATURE_INCOMPAT_48BIT << 1) - 1) + ((EROFS_FEATURE_INCOMPAT_METABOX << 1) - 1) #define EROFS_SB_EXTSLOT_SIZE 16 @@ -46,7 +48,7 @@ struct erofs_deviceslot { }; #define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) -/* erofs on-disk super block (currently 128 bytes) */ +/* erofs on-disk super block (currently 144 bytes at maximum) */ struct erofs_super_block { __le32 magic; /* file system magic number */ __le32 checksum; /* crc32c to avoid unexpected on-disk overlap */ @@ -82,7 +84,9 @@ struct erofs_super_block { __u8 reserved[3]; __le32 build_time; /* seconds added to epoch for mkfs time */ __le64 rootnid_8b; /* (48BIT on) nid of root directory */ - __u8 reserved2[8]; + __le64 reserved2; + __le64 metabox_nid; /* (METABOX on) nid of the metabox inode */ + __le64 reserved3; /* [align to extslot 1] */ }; /* @@ -267,6 +271,9 @@ struct erofs_inode_chunk_index { __le32 startblk_lo; /* starting block number of this chunk */ }; +#define EROFS_DIRENT_NID_METABOX_BIT 63 +#define EROFS_DIRENT_NID_MASK (BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT) - 1) + /* dirent sorts in alphabet order, thus we can do binary search */ struct erofs_dirent { __le64 nid; /* node number */ @@ -434,7 +441,7 @@ static inline void erofs_check_ondisk_layout_definitions(void) .h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT }; - BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); + BUILD_BUG_ON(sizeof(struct erofs_super_block) != 144); BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12); diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 60c7cc4c105c..b7b3432a9882 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -38,7 +38,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) } else { bio_for_each_folio_all(fi, &rq->bio) { DBG_BUGON(folio_test_uptodate(fi.folio)); - erofs_onlinefolio_end(fi.folio, ret); + erofs_onlinefolio_end(fi.folio, ret, false); } } bio_uninit(&rq->bio); @@ -47,6 +47,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) { + const struct cred *old_cred; struct iov_iter iter; int ret; @@ -60,7 +61,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) rq->iocb.ki_flags = IOCB_DIRECT; iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt, rq->bio.bi_iter.bi_size); + old_cred = override_creds(rq->iocb.ki_filp->f_cred); ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter); + revert_creds(old_cred); if (ret != -EIOCBQUEUED) erofs_fileio_ki_complete(&rq->iocb, ret); } @@ -93,8 +96,6 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) struct erofs_map_blocks *map = &io->map; unsigned int cur = 0, end = folio_size(folio), len, attached = 0; loff_t pos = folio_pos(folio), ofs; - struct iov_iter iter; - struct bio_vec bv; int err = 0; erofs_onlinefolio_init(folio); @@ -114,18 +115,12 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) void *src; src = erofs_read_metabuf(&buf, inode->i_sb, - map->m_pa + ofs, true); + map->m_pa + ofs, erofs_inode_in_metabox(inode)); if (IS_ERR(src)) { err = PTR_ERR(src); break; } - bvec_set_folio(&bv, folio, len, cur); - iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len); - if (copy_to_iter(src, len, &iter) != len) { - erofs_put_metabuf(&buf); - err = -EIO; - break; - } + memcpy_to_folio(folio, cur, src, len); erofs_put_metabuf(&buf); } else if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, cur + len); @@ -147,7 +142,8 @@ io_retry: if (err) break; io->rq = erofs_fileio_rq_alloc(&io->dev); - io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9; + io->rq->bio.bi_iter.bi_sector = + (io->dev.m_dif->fsoff + io->dev.m_pa) >> 9; attached = 0; } if (!bio_add_folio(&io->rq->bio, folio, len, cur)) @@ -158,7 +154,7 @@ io_retry: } cur += len; } - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } @@ -180,7 +176,7 @@ static void erofs_fileio_readahead(struct readahead_control *rac) struct folio *folio; int err; - trace_erofs_readpages(inode, readahead_index(rac), + trace_erofs_readahead(inode, readahead_index(rac), readahead_count(rac), true); while ((folio = readahead_folio(rac))) { err = erofs_fileio_scan_folio(&io, folio); diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 9c9129bca346..362acf828279 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -102,8 +102,7 @@ static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) erofs_fscache_req_put(req); } -static void erofs_fscache_req_end_io(void *priv, - ssize_t transferred_or_error, bool was_async) +static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error) { struct erofs_fscache_io *io = priv; struct erofs_fscache_rq *req = io->private; @@ -180,8 +179,7 @@ struct erofs_fscache_bio { struct bio_vec bvecs[BIO_MAX_VECS]; }; -static void erofs_fscache_bio_endio(void *priv, - ssize_t transferred_or_error, bool was_async) +static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error) { struct erofs_fscache_bio *io = priv; @@ -276,7 +274,8 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) size_t size = map.m_llen; void *src; - src = erofs_read_metabuf(&buf, sb, map.m_pa, true); + src = erofs_read_metabuf(&buf, sb, map.m_pa, + erofs_inode_in_metabox(inode)); if (IS_ERR(src)) return PTR_ERR(src); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index a0ae0b4f7b01..9a2f59721522 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -29,6 +29,7 @@ static int erofs_read_inode(struct inode *inode) struct super_block *sb = inode->i_sb; erofs_blk_t blkaddr = erofs_blknr(sb, erofs_iloc(inode)); unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode)); + bool in_mbox = erofs_inode_in_metabox(inode); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_sb_info *sbi = EROFS_SB(sb); erofs_blk_t addrmask = BIT_ULL(48) - 1; @@ -39,10 +40,10 @@ static int erofs_read_inode(struct inode *inode) void *ptr; int err = 0; - ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), true); + ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), in_mbox); if (IS_ERR(ptr)) { err = PTR_ERR(ptr); - erofs_err(sb, "failed to get inode (nid: %llu) page, err %d", + erofs_err(sb, "failed to read inode meta block (nid: %llu): %d", vi->nid, err); goto err_out; } @@ -78,10 +79,10 @@ static int erofs_read_inode(struct inode *inode) memcpy(&copied, dic, gotten); ptr = erofs_read_metabuf(&buf, sb, - erofs_pos(sb, blkaddr + 1), true); + erofs_pos(sb, blkaddr + 1), in_mbox); if (IS_ERR(ptr)) { err = PTR_ERR(ptr); - erofs_err(sb, "failed to get inode payload block (nid: %llu), err %d", + erofs_err(sb, "failed to read inode payload block (nid: %llu): %d", vi->nid, err); goto err_out; } @@ -264,13 +265,13 @@ static int erofs_fill_inode(struct inode *inode) * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down * so that it will fit. */ -static ino_t erofs_squash_ino(erofs_nid_t nid) +static ino_t erofs_squash_ino(struct super_block *sb, erofs_nid_t nid) { - ino_t ino = (ino_t)nid; + u64 ino64 = erofs_nid_to_ino64(EROFS_SB(sb), nid); if (sizeof(ino_t) < sizeof(erofs_nid_t)) - ino ^= nid >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8; - return ino; + ino64 ^= ino64 >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8; + return (ino_t)ino64; } static int erofs_iget5_eq(struct inode *inode, void *opaque) @@ -282,7 +283,7 @@ static int erofs_iget5_set(struct inode *inode, void *opaque) { const erofs_nid_t nid = *(erofs_nid_t *)opaque; - inode->i_ino = erofs_squash_ino(nid); + inode->i_ino = erofs_squash_ino(inode->i_sb, nid); EROFS_I(inode)->nid = nid; return 0; } @@ -291,7 +292,7 @@ struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid) { struct inode *inode; - inode = iget5_locked(sb, erofs_squash_ino(nid), erofs_iget5_eq, + inode = iget5_locked(sb, erofs_squash_ino(sb, nid), erofs_iget5_eq, erofs_iget5_set, &nid); if (!inode) return ERR_PTR(-ENOMEM); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 4ac188d5d894..4ccc5f0ee8df 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -44,7 +44,7 @@ struct erofs_device_info { struct erofs_fscache *fscache; struct file *file; struct dax_device *dax_dev; - u64 dax_part_off; + u64 fsoff, dax_part_off; erofs_blk_t blocks; erofs_blk_t uniaddr; @@ -125,6 +125,7 @@ struct erofs_sb_info { struct erofs_sb_lz4_info lz4; #endif /* CONFIG_EROFS_FS_ZIP */ struct inode *packed_inode; + struct inode *metabox_inode; struct erofs_dev_context *devs; u64 total_blocks; @@ -148,6 +149,7 @@ struct erofs_sb_info { /* what we really care is nid, rather than ino.. */ erofs_nid_t root_nid; erofs_nid_t packed_nid; + erofs_nid_t metabox_nid; /* used for statfs, f_files - f_favail */ u64 inos; @@ -157,6 +159,7 @@ struct erofs_sb_info { /* sysfs support */ struct kobject s_kobj; /* /sys/fs/erofs/<devname> */ struct completion s_kobj_unregister; + erofs_off_t dir_ra_bytes; /* fscache support */ struct fscache_volume *volume; @@ -199,6 +202,7 @@ enum { struct erofs_buf { struct address_space *mapping; struct file *file; + u64 off; struct page *page; void *base; }; @@ -226,8 +230,27 @@ EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES) EROFS_FEATURE_FUNCS(48bit, incompat, INCOMPAT_48BIT) +EROFS_FEATURE_FUNCS(metabox, incompat, INCOMPAT_METABOX) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) +EROFS_FEATURE_FUNCS(shared_ea_in_metabox, compat, COMPAT_SHARED_EA_IN_METABOX) + +static inline u64 erofs_nid_to_ino64(struct erofs_sb_info *sbi, erofs_nid_t nid) +{ + if (!erofs_sb_has_metabox(sbi)) + return nid; + + /* + * When metadata compression is enabled, avoid generating excessively + * large inode numbers for metadata-compressed inodes. Shift NIDs in + * the 31-62 bit range left by one and move the metabox flag to bit 31. + * + * Note: on-disk NIDs remain unchanged as they are primarily used for + * compatibility with non-LFS 32-bit applications. + */ + return ((nid << 1) & GENMASK_ULL(63, 32)) | (nid & GENMASK(30, 0)) | + ((nid >> EROFS_DIRENT_NID_METABOX_BIT) << 31); +} /* atomic flag definitions */ #define EROFS_I_EA_INITED_BIT 0 @@ -237,6 +260,9 @@ EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) #define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1) #define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2) +/* default readahead size of directories */ +#define EROFS_DIR_RA_BYTES 16384 + struct erofs_inode { erofs_nid_t nid; @@ -278,12 +304,20 @@ struct erofs_inode { #define EROFS_I(ptr) container_of(ptr, struct erofs_inode, vfs_inode) +static inline bool erofs_inode_in_metabox(struct inode *inode) +{ + return EROFS_I(inode)->nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT); +} + static inline erofs_off_t erofs_iloc(struct inode *inode) { struct erofs_sb_info *sbi = EROFS_I_SB(inode); + erofs_nid_t nid_lo = EROFS_I(inode)->nid & EROFS_DIRENT_NID_MASK; + if (erofs_inode_in_metabox(inode)) + return nid_lo << sbi->islotbits; return erofs_pos(inode->i_sb, sbi->meta_blkaddr) + - (EROFS_I(inode)->nid << sbi->islotbits); + (nid_lo << sbi->islotbits); } static inline unsigned int erofs_inode_version(unsigned int ifmt) @@ -314,10 +348,12 @@ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as, /* The length of extent is full */ #define EROFS_MAP_FULL_MAPPED 0x0008 /* Located in the special packed inode */ -#define EROFS_MAP_FRAGMENT 0x0010 +#define __EROFS_MAP_FRAGMENT 0x0010 /* The extent refers to partial decompressed data */ #define EROFS_MAP_PARTIAL_REF 0x0020 +#define EROFS_MAP_FRAGMENT (EROFS_MAP_MAPPED | __EROFS_MAP_FRAGMENT) + struct erofs_map_blocks { struct erofs_buf buf; @@ -380,16 +416,17 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap); -void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb); +int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb, + bool in_metabox); void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, - erofs_off_t offset, bool need_kmap); + erofs_off_t offset, bool in_metabox); int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map); void erofs_onlinefolio_init(struct folio *folio); void erofs_onlinefolio_split(struct folio *folio); -void erofs_onlinefolio_end(struct folio *folio, int err); +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty); struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, diff --git a/fs/erofs/super.c b/fs/erofs/super.c index da6ee7c39290..e1020aa60771 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -141,7 +141,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, struct erofs_deviceslot *dis; struct file *file; - dis = erofs_read_metabuf(buf, sb, *pos, true); + dis = erofs_read_metabuf(buf, sb, *pos, false); if (IS_ERR(dis)) return PTR_ERR(dis); @@ -165,8 +165,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, filp_open(dif->path, O_RDONLY | O_LARGEFILE, 0) : bdev_file_open_by_path(dif->path, BLK_OPEN_READ, sb->s_type, NULL); - if (IS_ERR(file)) + if (IS_ERR(file)) { + if (file == ERR_PTR(-ENOTBLK)) + return -EINVAL; return PTR_ERR(file); + } if (!erofs_is_fileio_mode(sbi)) { dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), @@ -255,7 +258,7 @@ static int erofs_read_superblock(struct super_block *sb) void *data; int ret; - data = erofs_read_metabuf(&buf, sb, 0, true); + data = erofs_read_metabuf(&buf, sb, 0, false); if (IS_ERR(data)) { erofs_err(sb, "cannot read erofs superblock"); return PTR_ERR(data); @@ -316,6 +319,14 @@ static int erofs_read_superblock(struct super_block *sb) sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); } sbi->packed_nid = le64_to_cpu(dsb->packed_nid); + if (erofs_sb_has_metabox(sbi)) { + if (sbi->sb_size <= offsetof(struct erofs_super_block, + metabox_nid)) + return -EFSCORRUPTED; + sbi->metabox_nid = le64_to_cpu(dsb->metabox_nid); + if (sbi->metabox_nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT)) + return -EFSCORRUPTED; /* self-loop detection */ + } sbi->inos = le64_to_cpu(dsb->inos); sbi->epoch = (s64)le64_to_cpu(dsb->epoch); @@ -332,6 +343,8 @@ static int erofs_read_superblock(struct super_block *sb) if (erofs_sb_has_48bit(sbi)) erofs_info(sb, "EXPERIMENTAL 48-bit layout support in use. Use at your own risk!"); + if (erofs_sb_has_metabox(sbi)) + erofs_info(sb, "EXPERIMENTAL metadata compression support in use. Use at your own risk!"); if (erofs_is_fscache_mode(sb)) erofs_info(sb, "[deprecated] fscache-based on-demand read feature in use. Use at your own risk!"); out: @@ -356,7 +369,7 @@ static void erofs_default_options(struct erofs_sb_info *sbi) enum { Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, - Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, + Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset, }; static const struct constant_table erofs_param_cache_strategy[] = { @@ -383,6 +396,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = { fsparam_string("fsid", Opt_fsid), fsparam_string("domain_id", Opt_domain_id), fsparam_flag_no("directio", Opt_directio), + fsparam_u64("fsoffset", Opt_fsoffset), {} }; @@ -506,28 +520,59 @@ static int erofs_fc_parse_param(struct fs_context *fc, errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); #endif break; + case Opt_fsoffset: + sbi->dif0.fsoff = result.uint_64; + break; } return 0; } -static struct inode *erofs_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) +static int erofs_encode_fh(struct inode *inode, u32 *fh, int *max_len, + struct inode *parent) { - return erofs_iget(sb, ino); + erofs_nid_t nid = EROFS_I(inode)->nid; + int len = parent ? 6 : 3; + + if (*max_len < len) { + *max_len = len; + return FILEID_INVALID; + } + + fh[0] = (u32)(nid >> 32); + fh[1] = (u32)(nid & 0xffffffff); + fh[2] = inode->i_generation; + + if (parent) { + nid = EROFS_I(parent)->nid; + + fh[3] = (u32)(nid >> 32); + fh[4] = (u32)(nid & 0xffffffff); + fh[5] = parent->i_generation; + } + + *max_len = len; + return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; } static struct dentry *erofs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - erofs_nfs_get_inode); + if ((fh_type != FILEID_INO64_GEN && + fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) + return NULL; + + return d_obtain_alias(erofs_iget(sb, + ((u64)fid->raw[0] << 32) | fid->raw[1])); } static struct dentry *erofs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - erofs_nfs_get_inode); + if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) + return NULL; + + return d_obtain_alias(erofs_iget(sb, + ((u64)fid->raw[3] << 32) | fid->raw[4])); } static struct dentry *erofs_get_parent(struct dentry *child) @@ -543,7 +588,7 @@ static struct dentry *erofs_get_parent(struct dentry *child) } static const struct export_operations erofs_export_ops = { - .encode_fh = generic_encode_ino32_fh, + .encode_fh = erofs_encode_fh, .fh_to_dentry = erofs_fh_to_dentry, .fh_to_parent = erofs_fh_to_parent, .get_parent = erofs_get_parent, @@ -618,6 +663,14 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) } } + if (sbi->dif0.fsoff) { + if (sbi->dif0.fsoff & (sb->s_blocksize - 1)) + return invalfc(fc, "fsoffset %llu is not aligned to block size %lu", + sbi->dif0.fsoff, sb->s_blocksize); + if (erofs_is_fscache_mode(sb)) + return invalfc(fc, "cannot use fsoffset in fscache mode"); + } + if (test_opt(&sbi->opt, DAX_ALWAYS)) { if (!sbi->dif0.dax_dev) { errorfc(fc, "DAX unsupported by block device. Turning off DAX."); @@ -647,6 +700,12 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return PTR_ERR(inode); sbi->packed_inode = inode; } + if (erofs_sb_has_metabox(sbi)) { + inode = erofs_iget(sb, sbi->metabox_nid); + if (IS_ERR(inode)) + return PTR_ERR(inode); + sbi->metabox_inode = inode; + } inode = erofs_iget(sb, sbi->root_nid); if (IS_ERR(inode)) @@ -672,6 +731,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; + sbi->dir_ra_bytes = EROFS_DIR_RA_BYTES; erofs_info(sb, "mounted with root inode @ nid %llu.", sbi->root_nid); return 0; } @@ -802,6 +862,8 @@ static void erofs_drop_internal_inodes(struct erofs_sb_info *sbi) { iput(sbi->packed_inode); sbi->packed_inode = NULL; + iput(sbi->metabox_inode); + sbi->metabox_inode = NULL; #ifdef CONFIG_EROFS_FS_ZIP iput(sbi->managed_cache); sbi->managed_cache = NULL; @@ -947,6 +1009,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) if (sbi->domain_id) seq_printf(seq, ",domain_id=%s", sbi->domain_id); #endif + if (sbi->dif0.fsoff) + seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff); return 0; } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index dad4e6c6c155..1e0658a1d95b 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -7,12 +7,14 @@ #include <linux/kobject.h> #include "internal.h" +#include "compress.h" enum { attr_feature, attr_drop_caches, attr_pointer_ui, attr_pointer_bool, + attr_accel, }; enum { @@ -60,12 +62,25 @@ static struct erofs_attr erofs_attr_##_name = { \ EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts); EROFS_ATTR_FUNC(drop_caches, 0200); #endif +#ifdef CONFIG_EROFS_FS_ZIP_ACCEL +EROFS_ATTR_FUNC(accel, 0644); +#endif +EROFS_ATTR_RW_UI(dir_ra_bytes, erofs_sb_info); -static struct attribute *erofs_attrs[] = { +static struct attribute *erofs_sb_attrs[] = { #ifdef CONFIG_EROFS_FS_ZIP ATTR_LIST(sync_decompress), ATTR_LIST(drop_caches), #endif + ATTR_LIST(dir_ra_bytes), + NULL, +}; +ATTRIBUTE_GROUPS(erofs_sb); + +static struct attribute *erofs_attrs[] = { +#ifdef CONFIG_EROFS_FS_ZIP_ACCEL + ATTR_LIST(accel), +#endif NULL, }; ATTRIBUTE_GROUPS(erofs); @@ -82,6 +97,7 @@ EROFS_ATTR_FEATURE(ztailpacking); EROFS_ATTR_FEATURE(fragments); EROFS_ATTR_FEATURE(dedupe); EROFS_ATTR_FEATURE(48bit); +EROFS_ATTR_FEATURE(metabox); static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(zero_padding), @@ -95,6 +111,7 @@ static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(fragments), ATTR_LIST(dedupe), ATTR_LIST(48bit), + ATTR_LIST(metabox), NULL, }; ATTRIBUTE_GROUPS(erofs_feat); @@ -128,12 +145,14 @@ static ssize_t erofs_attr_show(struct kobject *kobj, if (!ptr) return 0; return sysfs_emit(buf, "%d\n", *(bool *)ptr); + case attr_accel: + return z_erofs_crypto_show_engines(buf, PAGE_SIZE, '\n'); } return 0; } static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) + const char *buf, size_t len) { struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, s_kobj); @@ -182,6 +201,19 @@ static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr, invalidate_mapping_pages(MNGD_MAPPING(sbi), 0, -1); return len; #endif +#ifdef CONFIG_EROFS_FS_ZIP_ACCEL + case attr_accel: + buf = skip_spaces(buf); + z_erofs_crypto_disable_all_engines(); + while (*buf) { + t = strcspn(buf, "\n"); + ret = z_erofs_crypto_enable_engine(buf, t); + if (ret < 0) + return ret; + buf += buf[t] != '\0' ? t + 1 : t; + } + return len; +#endif } return 0; } @@ -199,12 +231,13 @@ static const struct sysfs_ops erofs_attr_ops = { }; static const struct kobj_type erofs_sb_ktype = { - .default_groups = erofs_groups, + .default_groups = erofs_sb_groups, .sysfs_ops = &erofs_attr_ops, .release = erofs_sb_release, }; static const struct kobj_type erofs_ktype = { + .default_groups = erofs_groups, .sysfs_ops = &erofs_attr_ops, }; @@ -248,6 +281,12 @@ void erofs_unregister_sysfs(struct super_block *sb) } } +void erofs_exit_sysfs(void) +{ + kobject_put(&erofs_feat); + kset_unregister(&erofs_root); +} + int __init erofs_init_sysfs(void) { int ret; @@ -255,24 +294,12 @@ int __init erofs_init_sysfs(void) kobject_set_name(&erofs_root.kobj, "erofs"); erofs_root.kobj.parent = fs_kobj; ret = kset_register(&erofs_root); - if (ret) - goto root_err; - - ret = kobject_init_and_add(&erofs_feat, &erofs_feat_ktype, - NULL, "features"); - if (ret) - goto feat_err; - return ret; - -feat_err: - kobject_put(&erofs_feat); - kset_unregister(&erofs_root); -root_err: + if (!ret) { + ret = kobject_init_and_add(&erofs_feat, &erofs_feat_ktype, + NULL, "features"); + if (!ret) + return 0; + erofs_exit_sysfs(); + } return ret; } - -void erofs_exit_sysfs(void) -{ - kobject_put(&erofs_feat); - kset_unregister(&erofs_root); -} diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 9cf84717a92e..eaa9efd766ee 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -72,12 +72,14 @@ static int erofs_init_inode_xattrs(struct inode *inode) ret = -EFSCORRUPTED; goto out_unlock; /* xattr ondisk layout error */ } - ret = -ENOATTR; + ret = -ENODATA; goto out_unlock; } it.buf = __EROFS_BUF_INITIALIZER; - erofs_init_metabuf(&it.buf, sb); + ret = erofs_init_metabuf(&it.buf, sb, erofs_inode_in_metabox(inode)); + if (ret) + goto out_unlock; it.pos = erofs_iloc(inode) + vi->inode_isize; /* read in shared xattr array (non-atomic, see kmalloc below) */ @@ -266,20 +268,20 @@ static int erofs_getxattr_foreach(struct erofs_xattr_iter *it) (entry.e_name_index & EROFS_XATTR_LONG_PREFIX_MASK); if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count) - return -ENOATTR; + return -ENODATA; if (it->index != pf->prefix->base_index || it->name.len != entry.e_name_len + pf->infix_len) - return -ENOATTR; + return -ENODATA; if (memcmp(it->name.name, pf->prefix->infix, pf->infix_len)) - return -ENOATTR; + return -ENODATA; it->infix_len = pf->infix_len; } else { if (it->index != entry.e_name_index || it->name.len != entry.e_name_len) - return -ENOATTR; + return -ENODATA; it->infix_len = 0; } @@ -295,7 +297,7 @@ static int erofs_getxattr_foreach(struct erofs_xattr_iter *it) entry.e_name_len - processed); if (memcmp(it->name.name + it->infix_len + processed, it->kaddr, slice)) - return -ENOATTR; + return -ENODATA; it->pos += slice; } @@ -323,9 +325,12 @@ static int erofs_xattr_iter_inline(struct erofs_xattr_iter *it, sizeof(u32) * vi->xattr_shared_count; if (xattr_header_sz >= vi->xattr_isize) { DBG_BUGON(xattr_header_sz > vi->xattr_isize); - return -ENOATTR; + return -ENODATA; } + ret = erofs_init_metabuf(&it->buf, it->sb, erofs_inode_in_metabox(inode)); + if (ret) + return ret; remaining = vi->xattr_isize - xattr_header_sz; it->pos = erofs_iloc(inode) + vi->inode_isize + xattr_header_sz; @@ -347,7 +352,7 @@ static int erofs_xattr_iter_inline(struct erofs_xattr_iter *it, ret = erofs_getxattr_foreach(it); else ret = erofs_listxattr_foreach(it); - if ((getxattr && ret != -ENOATTR) || (!getxattr && ret)) + if ((getxattr && ret != -ENODATA) || (!getxattr && ret)) break; it->pos = next_pos; @@ -361,12 +366,17 @@ static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it, struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = it->sb; struct erofs_sb_info *sbi = EROFS_SB(sb); - unsigned int i; - int ret = -ENOATTR; + unsigned int i = 0; + int ret; - for (i = 0; i < vi->xattr_shared_count; ++i) { + ret = erofs_init_metabuf(&it->buf, sb, + erofs_sb_has_shared_ea_in_metabox(sbi)); + if (ret) + return ret; + + while (i < vi->xattr_shared_count) { it->pos = erofs_pos(sb, sbi->xattr_blkaddr) + - vi->xattr_shared_xattrs[i] * sizeof(__le32); + vi->xattr_shared_xattrs[i++] * sizeof(__le32); it->kaddr = erofs_bread(&it->buf, it->pos, true); if (IS_ERR(it->kaddr)) return PTR_ERR(it->kaddr); @@ -375,10 +385,10 @@ static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it, ret = erofs_getxattr_foreach(it); else ret = erofs_listxattr_foreach(it); - if ((getxattr && ret != -ENOATTR) || (!getxattr && ret)) + if ((getxattr && ret != -ENODATA) || (!getxattr && ret)) break; } - return ret; + return i ? ret : -ENODATA; } int erofs_getxattr(struct inode *inode, int index, const char *name, @@ -403,7 +413,7 @@ int erofs_getxattr(struct inode *inode, int index, const char *name, EROFS_XATTR_FILTER_SEED + index); hashbit &= EROFS_XATTR_FILTER_BITS - 1; if (vi->xattr_name_filter & (1U << hashbit)) - return -ENOATTR; + return -ENODATA; } it.index = index; @@ -413,13 +423,12 @@ int erofs_getxattr(struct inode *inode, int index, const char *name, it.sb = inode->i_sb; it.buf = __EROFS_BUF_INITIALIZER; - erofs_init_metabuf(&it.buf, it.sb); it.buffer = buffer; it.buffer_size = buffer_size; it.buffer_ofs = 0; ret = erofs_xattr_iter_inline(&it, inode, true); - if (ret == -ENOATTR) + if (ret == -ENODATA) ret = erofs_xattr_iter_shared(&it, inode, true); erofs_put_metabuf(&it.buf); return ret ? ret : it.buffer_ofs; @@ -432,23 +441,22 @@ ssize_t erofs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) struct inode *inode = d_inode(dentry); ret = erofs_init_inode_xattrs(inode); - if (ret == -ENOATTR) + if (ret == -ENODATA) return 0; if (ret) return ret; it.sb = dentry->d_sb; it.buf = __EROFS_BUF_INITIALIZER; - erofs_init_metabuf(&it.buf, it.sb); it.dentry = dentry; it.buffer = buffer; it.buffer_size = buffer_size; it.buffer_ofs = 0; ret = erofs_xattr_iter_inline(&it, inode, false); - if (!ret || ret == -ENOATTR) + if (!ret || ret == -ENODATA) ret = erofs_xattr_iter_shared(&it, inode, false); - if (ret == -ENOATTR) + if (ret == -ENODATA) ret = 0; erofs_put_metabuf(&it.buf); return ret ? ret : it.buffer_ofs; @@ -485,7 +493,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb) if (sbi->packed_inode) buf.mapping = sbi->packed_inode->i_mapping; else - erofs_init_metabuf(&buf, sb); + (void)erofs_init_metabuf(&buf, sb, false); for (i = 0; i < sbi->xattr_prefix_count; i++) { void *ptr = erofs_read_metadata(sb, &buf, &pos, &len); @@ -539,7 +547,7 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu) rc = erofs_getxattr(inode, prefix, "", value, rc); } - if (rc == -ENOATTR) + if (rc == -ENODATA) acl = NULL; else if (rc < 0) acl = ERR_PTR(rc); diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h index b246cd0e135e..6317caa8413e 100644 --- a/fs/erofs/xattr.h +++ b/fs/erofs/xattr.h @@ -10,9 +10,6 @@ #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> -/* Attribute not found */ -#define ENOATTR ENODATA - #ifdef CONFIG_EROFS_FS_XATTR extern const struct xattr_handler erofs_xattr_user_handler; extern const struct xattr_handler erofs_xattr_trusted_handler; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index b8e6b76c23d5..792f20888a8f 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -288,6 +288,7 @@ static struct workqueue_struct *z_erofs_workqueue __read_mostly; #ifdef CONFIG_EROFS_FS_PCPU_KTHREAD static struct kthread_worker __rcu **z_erofs_pcpu_workers; +static atomic_t erofs_percpu_workers_initialized = ATOMIC_INIT(0); static void erofs_destroy_percpu_workers(void) { @@ -333,12 +334,8 @@ static int erofs_init_percpu_workers(void) } return 0; } -#else -static inline void erofs_destroy_percpu_workers(void) {} -static inline int erofs_init_percpu_workers(void) { return 0; } -#endif -#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_EROFS_FS_PCPU_KTHREAD) +#ifdef CONFIG_HOTPLUG_CPU static DEFINE_SPINLOCK(z_erofs_pcpu_worker_lock); static enum cpuhp_state erofs_cpuhp_state; @@ -395,17 +392,56 @@ static void erofs_cpu_hotplug_destroy(void) if (erofs_cpuhp_state) cpuhp_remove_state_nocalls(erofs_cpuhp_state); } -#else /* !CONFIG_HOTPLUG_CPU || !CONFIG_EROFS_FS_PCPU_KTHREAD */ +#else /* !CONFIG_HOTPLUG_CPU */ static inline int erofs_cpu_hotplug_init(void) { return 0; } static inline void erofs_cpu_hotplug_destroy(void) {} -#endif +#endif/* CONFIG_HOTPLUG_CPU */ +static int z_erofs_init_pcpu_workers(struct super_block *sb) +{ + int err; -void z_erofs_exit_subsystem(void) + if (atomic_xchg(&erofs_percpu_workers_initialized, 1)) + return 0; + + err = erofs_init_percpu_workers(); + if (err) { + erofs_err(sb, "per-cpu workers: failed to allocate."); + goto err_init_percpu_workers; + } + + err = erofs_cpu_hotplug_init(); + if (err < 0) { + erofs_err(sb, "per-cpu workers: failed CPU hotplug init."); + goto err_cpuhp_init; + } + erofs_info(sb, "initialized per-cpu workers successfully."); + return err; + +err_cpuhp_init: + erofs_destroy_percpu_workers(); +err_init_percpu_workers: + atomic_set(&erofs_percpu_workers_initialized, 0); + return err; +} + +static void z_erofs_destroy_pcpu_workers(void) { + if (!atomic_xchg(&erofs_percpu_workers_initialized, 0)) + return; erofs_cpu_hotplug_destroy(); erofs_destroy_percpu_workers(); +} +#else /* !CONFIG_EROFS_FS_PCPU_KTHREAD */ +static inline int z_erofs_init_pcpu_workers(struct super_block *sb) { return 0; } +static inline void z_erofs_destroy_pcpu_workers(void) {} +#endif/* CONFIG_EROFS_FS_PCPU_KTHREAD */ + +void z_erofs_exit_subsystem(void) +{ + z_erofs_destroy_pcpu_workers(); destroy_workqueue(z_erofs_workqueue); z_erofs_destroy_pcluster_pool(); + z_erofs_crypto_disable_all_engines(); z_erofs_exit_decompressor(); } @@ -427,19 +463,8 @@ int __init z_erofs_init_subsystem(void) goto err_workqueue_init; } - err = erofs_init_percpu_workers(); - if (err) - goto err_pcpu_worker; - - err = erofs_cpu_hotplug_init(); - if (err < 0) - goto err_cpuhp_init; return err; -err_cpuhp_init: - erofs_destroy_percpu_workers(); -err_pcpu_worker: - destroy_workqueue(z_erofs_workqueue); err_workqueue_init: z_erofs_destroy_pcluster_pool(); err_pcluster_pool: @@ -641,8 +666,14 @@ static const struct address_space_operations z_erofs_cache_aops = { int z_erofs_init_super(struct super_block *sb) { - struct inode *const inode = new_inode(sb); + struct inode *inode; + int err; + + err = z_erofs_init_pcpu_workers(sb); + if (err) + return err; + inode = new_inode(sb); if (!inode) return -ENOMEM; set_nlink(inode, 1); @@ -774,6 +805,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe) struct erofs_map_blocks *map = &fe->map; struct super_block *sb = fe->inode->i_sb; struct z_erofs_pcluster *pcl = NULL; + void *ptr; int ret; DBG_BUGON(fe->pcl); @@ -823,15 +855,17 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe) /* bind cache first when cached decompression is preferred */ z_erofs_bind_cache(fe); } else { - void *mptr; - - mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, false); - if (IS_ERR(mptr)) { - ret = PTR_ERR(mptr); - erofs_err(sb, "failed to get inline data %d", ret); + ret = erofs_init_metabuf(&map->buf, sb, + erofs_inode_in_metabox(fe->inode)); + if (ret) + return ret; + ptr = erofs_bread(&map->buf, map->m_pa, false); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + erofs_err(sb, "failed to get inline folio %d", ret); return ret; } - get_page(map->buf.page); + folio_get(page_folio(map->buf.page)); WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page); fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK; fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; @@ -1003,7 +1037,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, end); tight = false; - } else if (map->m_flags & EROFS_MAP_FRAGMENT) { + } else if (map->m_flags & __EROFS_MAP_FRAGMENT) { erofs_off_t fpos = offset + cur - map->m_la; err = z_erofs_read_fragment(inode->i_sb, folio, cur, @@ -1060,7 +1094,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, tight = (bs == PAGE_SIZE); } } while ((end = cur) > 0); - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } @@ -1165,7 +1199,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err) cur += len; } kunmap_local(dst); - erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); + erofs_onlinefolio_end(page_folio(bvi->bvec.page), err, true); list_del(p); kfree(bvi); } @@ -1294,9 +1328,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) /* must handle all compressed pages before actual file pages */ if (pcl->from_meta) { - page = pcl->compressed_bvecs[0].page; + folio_put(page_folio(pcl->compressed_bvecs[0].page)); WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL); - put_page(page); } else { /* managed folios are still left in compressed_bvecs[] */ for (i = 0; i < pclusterpages; ++i) { @@ -1324,7 +1357,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) DBG_BUGON(z_erofs_page_is_invalidated(page)); if (!z_erofs_is_shortlived_page(page)) { - erofs_onlinefolio_end(page_folio(page), err); + erofs_onlinefolio_end(page_folio(page), err, true); continue; } if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) { @@ -1707,7 +1740,8 @@ drain_io: bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, REQ_OP_READ, GFP_NOIO); bio->bi_end_io = z_erofs_endio; - bio->bi_iter.bi_sector = cur >> 9; + bio->bi_iter.bi_sector = + (mdev.m_dif->fsoff + cur) >> 9; bio->bi_private = q[JQ_SUBMIT]; if (readahead) bio->bi_opf |= REQ_RAHEAD; @@ -1855,13 +1889,12 @@ static void z_erofs_readahead(struct readahead_control *rac) { struct inode *const inode = rac->mapping->host; Z_EROFS_DEFINE_FRONTEND(f, inode, readahead_pos(rac)); - struct folio *head = NULL, *folio; unsigned int nrpages = readahead_count(rac); + struct folio *head = NULL, *folio; int err; + trace_erofs_readahead(inode, readahead_index(rac), nrpages, false); z_erofs_pcluster_readmore(&f, rac, true); - nrpages = readahead_count(rac); - trace_erofs_readpages(inode, readahead_index(rac), nrpages, false); while ((folio = readahead_folio(rac))) { folio->private = head; head = folio; diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 14ea47f954f5..a93efd95c555 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -17,7 +17,7 @@ struct z_erofs_maprecorder { u16 delta[2]; erofs_blk_t pblk, compressedblks; erofs_off_t nextpackoff; - bool partialref; + bool partialref, in_mbox; }; static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, @@ -31,7 +31,7 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, struct z_erofs_lcluster_index *di; unsigned int advise; - di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, true); + di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox); if (IS_ERR(di)) return PTR_ERR(di); m->lcn = lcn; @@ -146,7 +146,7 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, else return -EOPNOTSUPP; - in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, true); + in = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox); if (IS_ERR(in)) return PTR_ERR(in); @@ -240,6 +240,13 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, unsigned int lcn, bool lookahead) { + if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { + erofs_err(m->inode->i_sb, "unknown type %u @ lcn %u of nid %llu", + m->type, lcn, EROFS_I(m->inode)->nid); + DBG_BUGON(1); + return -EOPNOTSUPP; + } + switch (EROFS_I(m->inode)->datalayout) { case EROFS_INODE_COMPRESSED_FULL: return z_erofs_load_full_lcluster(m, lcn); @@ -265,12 +272,7 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, if (err) return err; - if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { - erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu", - m->type, lcn, vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; - } else if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { + if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { lookback_distance = m->delta[0]; if (!lookback_distance) break; @@ -325,25 +327,18 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, DBG_BUGON(lcn == initial_lcn && m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); - if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { - if (m->delta[0] != 1) { - erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } - if (m->compressedblks) - goto out; - } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) { - /* - * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type - * rather than CBLKCNT, it's a 1 block-sized pcluster. - */ - m->compressedblks = 1; - goto out; + if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD && m->delta[0] != 1) { + erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; } - erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; + + /* + * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type rather + * than CBLKCNT, it's a 1 block-sized pcluster. + */ + if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD || !m->compressedblks) + m->compressedblks = 1; out: m->map->m_plen = erofs_pos(sb, m->compressedblks); return 0; @@ -379,11 +374,6 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) if (lcn != headlcn) break; /* ends at the next HEAD lcluster */ m->delta[1] = 1; - } else { - erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu", - m->type, lcn, vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; } lcn += m->delta[1]; } @@ -402,6 +392,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, struct z_erofs_maprecorder m = { .inode = inode, .map = map, + .in_mbox = erofs_inode_in_metabox(inode), }; int err = 0; unsigned int endoff, afmt; @@ -413,8 +404,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, !vi->z_tailextent_headlcn) { map->m_la = 0; map->m_llen = inode->i_size; - map->m_flags = EROFS_MAP_MAPPED | - EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; return 0; } initial_lcn = ofs >> lclusterbits; @@ -429,44 +419,33 @@ static int z_erofs_map_blocks_fo(struct inode *inode, map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; end = (m.lcn + 1ULL) << lclusterbits; - switch (m.type) { - case Z_EROFS_LCLUSTER_TYPE_PLAIN: - case Z_EROFS_LCLUSTER_TYPE_HEAD1: - case Z_EROFS_LCLUSTER_TYPE_HEAD2: - if (endoff >= m.clusterofs) { - m.headtype = m.type; - map->m_la = (m.lcn << lclusterbits) | m.clusterofs; - /* - * For ztailpacking files, in order to inline data more - * effectively, special EOF lclusters are now supported - * which can have three parts at most. - */ - if (ztailpacking && end > inode->i_size) - end = inode->i_size; - break; - } - /* m.lcn should be >= 1 if endoff < m.clusterofs */ - if (!m.lcn) { - erofs_err(sb, "invalid logical cluster 0 at nid %llu", - vi->nid); - err = -EFSCORRUPTED; - goto unmap_out; + if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && endoff >= m.clusterofs) { + m.headtype = m.type; + map->m_la = (m.lcn << lclusterbits) | m.clusterofs; + /* + * For ztailpacking files, in order to inline data more + * effectively, special EOF lclusters are now supported + * which can have three parts at most. + */ + if (ztailpacking && end > inode->i_size) + end = inode->i_size; + } else { + if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) { + /* m.lcn should be >= 1 if endoff < m.clusterofs */ + if (!m.lcn) { + erofs_err(sb, "invalid logical cluster 0 at nid %llu", + vi->nid); + err = -EFSCORRUPTED; + goto unmap_out; + } + end = (m.lcn << lclusterbits) | m.clusterofs; + map->m_flags |= EROFS_MAP_FULL_MAPPED; + m.delta[0] = 1; } - end = (m.lcn << lclusterbits) | m.clusterofs; - map->m_flags |= EROFS_MAP_FULL_MAPPED; - m.delta[0] = 1; - fallthrough; - case Z_EROFS_LCLUSTER_TYPE_NONHEAD: /* get the corresponding first chunk */ err = z_erofs_extent_lookback(&m, m.delta[0]); if (err) goto unmap_out; - break; - default: - erofs_err(sb, "unknown type %u @ offset %llu of nid %llu", - m.type, ofs, vi->nid); - err = -EOPNOTSUPP; - goto unmap_out; } if (m.partialref) map->m_flags |= EROFS_MAP_PARTIAL_REF; @@ -489,7 +468,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, goto unmap_out; } } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { - map->m_flags |= EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; } else { map->m_pa = erofs_pos(sb, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); @@ -543,6 +522,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, unsigned int recsz = z_erofs_extent_recsize(vi->z_advise); erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize), recsz); + bool in_mbox = erofs_inode_in_metabox(inode); erofs_off_t lend = inode->i_size; erofs_off_t l, r, mid, pa, la, lstart; struct z_erofs_extent *ext; @@ -552,7 +532,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, map->m_flags = 0; if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) { if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) { - ext = erofs_read_metabuf(&map->buf, sb, pos, true); + ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); pa = le64_to_cpu(*(__le64 *)ext); @@ -565,7 +545,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, } for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) { - ext = erofs_read_metabuf(&map->buf, sb, pos, true); + ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); map->m_plen = le32_to_cpu(ext->plen); @@ -585,7 +565,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, for (l = 0, r = vi->z_extents; l < r; ) { mid = l + (r - l) / 2; ext = erofs_read_metabuf(&map->buf, sb, - pos + mid * recsz, true); + pos + mid * recsz, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); @@ -597,6 +577,10 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (la > map->m_la) { r = mid; + if (la > lend) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } lend = la; } else { l = mid + 1; @@ -613,7 +597,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (lstart < lend) { map->m_la = lstart; if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { - map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; vi->z_fragmentoff = map->m_plen; if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; @@ -635,22 +619,15 @@ static int z_erofs_map_blocks_ext(struct inode *inode, } } map->m_llen = lend - map->m_la; - if (!last && map->m_llen < sb->s_blocksize) { - erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu", - map->m_llen, map->m_la, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } return 0; } -static int z_erofs_fill_inode_lazy(struct inode *inode) +static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map) { struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = inode->i_sb; int err, headnr; erofs_off_t pos; - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct z_erofs_map_header *h; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { @@ -670,7 +647,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) goto out_unlock; pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); - h = erofs_read_metabuf(&buf, sb, pos, true); + h = erofs_read_metabuf(&map->buf, sb, pos, erofs_inode_in_metabox(inode)); if (IS_ERR(h)) { err = PTR_ERR(h); goto out_unlock; @@ -708,7 +685,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel", headnr + 1, vi->z_algorithmtype[headnr], vi->nid); err = -EOPNOTSUPP; - goto out_put_metabuf; + goto out_unlock; } if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && @@ -717,7 +694,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto out_put_metabuf; + goto out_unlock; } if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ @@ -725,27 +702,25 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto out_put_metabuf; + goto out_unlock; } if (vi->z_idata_size || (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { - struct erofs_map_blocks map = { + struct erofs_map_blocks tm = { .buf = __EROFS_BUF_INITIALIZER }; - err = z_erofs_map_blocks_fo(inode, &map, + err = z_erofs_map_blocks_fo(inode, &tm, EROFS_GET_BLOCKS_FINDTAIL); - erofs_put_metabuf(&map.buf); + erofs_put_metabuf(&tm.buf); if (err < 0) - goto out_put_metabuf; + goto out_unlock; } done: /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); -out_put_metabuf: - erofs_put_metabuf(&buf); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; @@ -763,7 +738,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, map->m_la = inode->i_size; map->m_flags = 0; } else { - err = z_erofs_fill_inode_lazy(inode); + err = z_erofs_fill_inode(inode, map); if (!err) { if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) @@ -799,7 +774,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; - iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? + iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ? IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; |