summaryrefslogtreecommitdiff
path: root/drivers/staging/erofs
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/erofs')
-rw-r--r--drivers/staging/erofs/Kconfig141
-rw-r--r--drivers/staging/erofs/Makefile13
-rw-r--r--drivers/staging/erofs/TODO45
-rw-r--r--drivers/staging/erofs/data.c385
-rw-r--r--drivers/staging/erofs/dir.c145
-rw-r--r--drivers/staging/erofs/erofs_fs.h266
-rw-r--r--drivers/staging/erofs/include/linux/tagptr.h110
-rw-r--r--drivers/staging/erofs/include/trace/events/erofs.h240
-rw-r--r--drivers/staging/erofs/inode.c283
-rw-r--r--drivers/staging/erofs/internal.h556
-rw-r--r--drivers/staging/erofs/lz4defs.h227
-rw-r--r--drivers/staging/erofs/namei.c251
-rw-r--r--drivers/staging/erofs/super.c649
-rw-r--r--drivers/staging/erofs/unzip_lz4.c251
-rw-r--r--drivers/staging/erofs/unzip_pagevec.h172
-rw-r--r--drivers/staging/erofs/unzip_vle.c1656
-rw-r--r--drivers/staging/erofs/unzip_vle.h239
-rw-r--r--drivers/staging/erofs/unzip_vle_lz4.c209
-rw-r--r--drivers/staging/erofs/utils.c271
-rw-r--r--drivers/staging/erofs/xattr.c577
-rw-r--r--drivers/staging/erofs/xattr.h93
21 files changed, 6779 insertions, 0 deletions
diff --git a/drivers/staging/erofs/Kconfig b/drivers/staging/erofs/Kconfig
new file mode 100644
index 000000000000..96f614934df1
--- /dev/null
+++ b/drivers/staging/erofs/Kconfig
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config EROFS_FS
+ tristate "EROFS filesystem support"
+ depends on BROKEN
+ help
+ EROFS(Enhanced Read-Only File System) is a lightweight
+ read-only file system with modern designs (eg. page-sized
+ blocks, inline xattrs/data, etc.) for scenarios which need
+ high-performance read-only requirements, eg. firmwares in
+ mobile phone or LIVECDs.
+
+ It also provides VLE compression support, focusing on
+ random read improvements, keeping relatively lower
+ compression ratios, which is useful for high-performance
+ devices with limited memory and ROM space.
+
+ If unsure, say N.
+
+config EROFS_FS_DEBUG
+ bool "EROFS debugging feature"
+ depends on EROFS_FS
+ help
+ Print EROFS debugging messages and enable more BUG_ONs
+ which check the filesystem consistency aggressively.
+
+ For daily use, say N.
+
+config EROFS_FS_XATTR
+ bool "EROFS extended attributes"
+ depends on EROFS_FS
+ default y
+ help
+ Extended attributes are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page, or visit
+ <http://acl.bestbits.at/> for details).
+
+ If unsure, say N.
+
+config EROFS_FS_POSIX_ACL
+ bool "EROFS Access Control Lists"
+ depends on EROFS_FS_XATTR
+ select FS_POSIX_ACL
+ default y
+ help
+ Posix Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N.
+
+config EROFS_FS_SECURITY
+ bool "EROFS Security Labels"
+ depends on EROFS_FS_XATTR
+ help
+ Security labels provide an access control facility to support Linux
+ Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
+ Linux. This option enables an extended attribute handler for file
+ security labels in the erofs filesystem, so that it requires enabling
+ the extended attribute support in advance.
+
+ If you are not using a security module, say N.
+
+config EROFS_FS_USE_VM_MAP_RAM
+ bool "EROFS VM_MAP_RAM Support"
+ depends on EROFS_FS
+ help
+ use vm_map_ram/vm_unmap_ram instead of vmap/vunmap.
+
+ If you don't know what these are, say N.
+
+config EROFS_FAULT_INJECTION
+ bool "EROFS fault injection facility"
+ depends on EROFS_FS
+ help
+ Test EROFS to inject faults such as ENOMEM, EIO, and so on.
+ If unsure, say N.
+
+config EROFS_FS_ZIP
+ bool "EROFS Data Compresssion Support"
+ depends on EROFS_FS
+ help
+ Currently we support VLE Compression only.
+ Play at your own risk.
+
+ If you don't want to use compression feature, say N.
+
+config EROFS_FS_CLUSTER_PAGE_LIMIT
+ int "EROFS Cluster Pages Hard Limit"
+ depends on EROFS_FS_ZIP
+ range 1 256
+ default "1"
+ help
+ Indicates VLE compressed pages hard limit of a
+ compressed cluster.
+
+ For example, if files of a image are compressed
+ into 8k-unit, the hard limit should not be less
+ than 2. Otherwise, the image cannot be mounted
+ correctly on this kernel.
+
+choice
+ prompt "EROFS VLE Data Decompression mode"
+ depends on EROFS_FS_ZIP
+ default EROFS_FS_ZIP_CACHE_BIPOLAR
+ help
+ EROFS supports three options for VLE decompression.
+ "In-place Decompression Only" consumes the minimum memory
+ with lowest random read.
+
+ "Bipolar Cached Decompression" consumes the maximum memory
+ with highest random read.
+
+ If unsure, select "Bipolar Cached Decompression"
+
+config EROFS_FS_ZIP_NO_CACHE
+ bool "In-place Decompression Only"
+ help
+ Read compressed data into page cache and do in-place
+ decompression directly.
+
+config EROFS_FS_ZIP_CACHE_UNIPOLAR
+ bool "Unipolar Cached Decompression"
+ help
+ For each request, it caches the last compressed page
+ for further reading.
+ It still decompresses in place for the rest compressed pages.
+
+config EROFS_FS_ZIP_CACHE_BIPOLAR
+ bool "Bipolar Cached Decompression"
+ help
+ For each request, it caches the both end compressed pages
+ for further reading.
+ It still decompresses in place for the rest compressed pages.
+
+ Recommended for performance priority.
+
+endchoice
+
diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile
new file mode 100644
index 000000000000..9a766eb7ed75
--- /dev/null
+++ b/drivers/staging/erofs/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+
+EROFS_VERSION = "1.0pre1"
+
+ccflags-y += -Wall -DEROFS_VERSION=\"$(EROFS_VERSION)\"
+
+obj-$(CONFIG_EROFS_FS) += erofs.o
+# staging requirement: to be self-contained in its own directory
+ccflags-y += -I$(src)/include
+erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
+erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_lz4.o unzip_vle_lz4.o
+
diff --git a/drivers/staging/erofs/TODO b/drivers/staging/erofs/TODO
new file mode 100644
index 000000000000..f99ddb842f99
--- /dev/null
+++ b/drivers/staging/erofs/TODO
@@ -0,0 +1,45 @@
+
+EROFS is still working in progress, thus it is not suitable
+for all productive uses. play at your own risk :)
+
+TODO List:
+ - add the missing error handling code
+ (mainly existed in xattr and decompression submodules);
+
+ - finalize erofs ondisk format design (which means that
+ minor on-disk revisions could happen later);
+
+ - documentation and detailed technical analysis;
+
+ - general code review and clean up
+ (including confusing variable names and code snippets);
+
+ - support larger compressed clustersizes for selection
+ (currently erofs only works as expected with the page-sized
+ compressed cluster configuration, usually 4KB);
+
+ - support more lossless data compression algorithms
+ in addition to LZ4 algorithms in VLE approach;
+
+ - data deduplication and other useful features.
+
+erofs-mkfs (preview version) binaries for i386 / x86_64 are available at:
+
+ https://github.com/hsiangkao/erofs_mkfs_binary
+
+It is still in progress opening mkfs source code to public,
+in any case an open-source mkfs will be released in the near future.
+
+
+Code, suggestions, etc, are welcome. Please feel free to
+ask and send patches,
+
+To:
+ linux-erofs mailing list <linux-erofs@lists.ozlabs.org>
+ Gao Xiang <gaoxiang25@huawei.com>
+ Chao Yu <yuchao0@huawei.com>
+
+Cc: (for linux-kernel upstream patches)
+ Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ linux-staging mailing list <devel@driverdev.osuosl.org>
+
diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c
new file mode 100644
index 000000000000..ac263a180253
--- /dev/null
+++ b/drivers/staging/erofs/data.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/data.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "internal.h"
+#include <linux/prefetch.h>
+
+#include <trace/events/erofs.h>
+
+static inline void read_endio(struct bio *bio)
+{
+ int i;
+ struct bio_vec *bvec;
+ const blk_status_t err = bio->bi_status;
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
+
+ /* page is already locked */
+ BUG_ON(PageUptodate(page));
+
+ if (unlikely(err))
+ SetPageError(page);
+ else
+ SetPageUptodate(page);
+
+ unlock_page(page);
+ /* page could be reclaimed now */
+ }
+ bio_put(bio);
+}
+
+/* prio -- true is used for dir */
+struct page *erofs_get_meta_page(struct super_block *sb,
+ erofs_blk_t blkaddr, bool prio)
+{
+ struct inode *bd_inode = sb->s_bdev->bd_inode;
+ struct address_space *mapping = bd_inode->i_mapping;
+ struct page *page;
+
+repeat:
+ page = find_or_create_page(mapping, blkaddr,
+ /*
+ * Prefer looping in the allocator rather than here,
+ * at least that code knows what it's doing.
+ */
+ mapping_gfp_constraint(mapping, ~__GFP_FS) | __GFP_NOFAIL);
+
+ BUG_ON(!page || !PageLocked(page));
+
+ if (!PageUptodate(page)) {
+ struct bio *bio;
+ int err;
+
+ bio = prepare_bio(sb, blkaddr, 1, read_endio);
+ err = bio_add_page(bio, page, PAGE_SIZE, 0);
+ BUG_ON(err != PAGE_SIZE);
+
+ __submit_bio(bio, REQ_OP_READ,
+ REQ_META | (prio ? REQ_PRIO : 0));
+
+ lock_page(page);
+
+ /* the page has been truncated by others? */
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ put_page(page);
+ goto repeat;
+ }
+
+ /* more likely a read error */
+ if (unlikely(!PageUptodate(page))) {
+ unlock_page(page);
+ put_page(page);
+
+ page = ERR_PTR(-EIO);
+ }
+ }
+ return page;
+}
+
+static int erofs_map_blocks_flatmode(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags)
+{
+ erofs_blk_t nblocks, lastblk;
+ u64 offset = map->m_la;
+ struct erofs_vnode *vi = EROFS_V(inode);
+
+ trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
+ BUG_ON(is_inode_layout_compression(inode));
+
+ nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+ lastblk = nblocks - is_inode_layout_inline(inode);
+
+ if (unlikely(offset >= inode->i_size)) {
+ /* leave out-of-bound access unmapped */
+ map->m_flags = 0;
+ map->m_plen = 0;
+ goto out;
+ }
+
+ /* there is no hole in flatmode */
+ map->m_flags = EROFS_MAP_MAPPED;
+
+ if (offset < blknr_to_addr(lastblk)) {
+ map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
+ map->m_plen = blknr_to_addr(lastblk) - offset;
+ } else if (is_inode_layout_inline(inode)) {
+ /* 2 - inode inline B: inode, [xattrs], inline last blk... */
+ struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+
+ map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
+ vi->xattr_isize + erofs_blkoff(map->m_la);
+ map->m_plen = inode->i_size - offset;
+
+ /* inline data should locate in one meta block */
+ BUG_ON(erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE);
+ map->m_flags |= EROFS_MAP_META;
+ } else {
+ errln("internal error @ nid: %llu (size %llu), m_la 0x%llx",
+ vi->nid, inode->i_size, map->m_la);
+ BUG();
+ }
+
+out:
+ map->m_llen = map->m_plen;
+ trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
+ return 0;
+}
+
+#ifdef CONFIG_EROFS_FS_ZIP
+extern int z_erofs_map_blocks_iter(struct inode *,
+ struct erofs_map_blocks *, struct page **, int);
+#endif
+
+int erofs_map_blocks_iter(struct inode *inode,
+ struct erofs_map_blocks *map,
+ struct page **mpage_ret, int flags)
+{
+ /* by default, reading raw data never use erofs_map_blocks_iter */
+ if (unlikely(!is_inode_layout_compression(inode))) {
+ if (*mpage_ret != NULL)
+ put_page(*mpage_ret);
+ *mpage_ret = NULL;
+
+ return erofs_map_blocks(inode, map, flags);
+ }
+
+#ifdef CONFIG_EROFS_FS_ZIP
+ return z_erofs_map_blocks_iter(inode, map, mpage_ret, flags);
+#else
+ /* data compression is not available */
+ return -ENOTSUPP;
+#endif
+}
+
+int erofs_map_blocks(struct inode *inode,
+ struct erofs_map_blocks *map, int flags)
+{
+ if (unlikely(is_inode_layout_compression(inode))) {
+ struct page *mpage = NULL;
+ int err;
+
+ err = erofs_map_blocks_iter(inode, map, &mpage, flags);
+ if (mpage != NULL)
+ put_page(mpage);
+ return err;
+ }
+ return erofs_map_blocks_flatmode(inode, map, flags);
+}
+
+static inline struct bio *erofs_read_raw_page(
+ struct bio *bio,
+ struct address_space *mapping,
+ struct page *page,
+ erofs_off_t *last_block,
+ unsigned nblocks,
+ bool ra)
+{
+ struct inode *inode = mapping->host;
+ erofs_off_t current_block = (erofs_off_t)page->index;
+ int err;
+
+ BUG_ON(!nblocks);
+
+ if (PageUptodate(page)) {
+ err = 0;
+ goto has_updated;
+ }
+
+ if (cleancache_get_page(page) == 0) {
+ err = 0;
+ SetPageUptodate(page);
+ goto has_updated;
+ }
+
+ /* note that for readpage case, bio also equals to NULL */
+ if (bio != NULL &&
+ /* not continuous */
+ *last_block + 1 != current_block) {
+submit_bio_retry:
+ __submit_bio(bio, REQ_OP_READ, 0);
+ bio = NULL;
+ }
+
+ if (bio == NULL) {
+ struct erofs_map_blocks map = {
+ .m_la = blknr_to_addr(current_block),
+ };
+ erofs_blk_t blknr;
+ unsigned blkoff;
+
+ err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+ if (unlikely(err))
+ goto err_out;
+
+ /* zero out the holed page */
+ if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) {
+ zero_user_segment(page, 0, PAGE_SIZE);
+ SetPageUptodate(page);
+
+ /* imply err = 0, see erofs_map_blocks */
+ goto has_updated;
+ }
+
+ /* for RAW access mode, m_plen must be equal to m_llen */
+ BUG_ON(map.m_plen != map.m_llen);
+
+ blknr = erofs_blknr(map.m_pa);
+ blkoff = erofs_blkoff(map.m_pa);
+
+ /* deal with inline page */
+ if (map.m_flags & EROFS_MAP_META) {
+ void *vsrc, *vto;
+ struct page *ipage;
+
+ BUG_ON(map.m_plen > PAGE_SIZE);
+
+ ipage = erofs_get_meta_page(inode->i_sb, blknr, 0);
+
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto err_out;
+ }
+
+ vsrc = kmap_atomic(ipage);
+ vto = kmap_atomic(page);
+ memcpy(vto, vsrc + blkoff, map.m_plen);
+ memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
+ kunmap_atomic(vto);
+ kunmap_atomic(vsrc);
+ flush_dcache_page(page);
+
+ SetPageUptodate(page);
+ /* TODO: could we unlock the page earlier? */
+ unlock_page(ipage);
+ put_page(ipage);
+
+ /* imply err = 0, see erofs_map_blocks */
+ goto has_updated;
+ }
+
+ /* pa must be block-aligned for raw reading */
+ BUG_ON(erofs_blkoff(map.m_pa) != 0);
+
+ /* max # of continuous pages */
+ if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
+ nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
+ if (nblocks > BIO_MAX_PAGES)
+ nblocks = BIO_MAX_PAGES;
+
+ bio = prepare_bio(inode->i_sb, blknr, nblocks, read_endio);
+ }
+
+ err = bio_add_page(bio, page, PAGE_SIZE, 0);
+ /* out of the extent or bio is full */
+ if (err < PAGE_SIZE)
+ goto submit_bio_retry;
+
+ *last_block = current_block;
+
+ /* shift in advance in case of it followed by too many gaps */
+ if (unlikely(bio->bi_vcnt >= bio->bi_max_vecs)) {
+ /* err should reassign to 0 after submitting */
+ err = 0;
+ goto submit_bio_out;
+ }
+
+ return bio;
+
+err_out:
+ /* for sync reading, set page error immediately */
+ if (!ra) {
+ SetPageError(page);
+ ClearPageUptodate(page);
+ }
+has_updated:
+ unlock_page(page);
+
+ /* if updated manually, continuous pages has a gap */
+ if (bio != NULL)
+submit_bio_out:
+ __submit_bio(bio, REQ_OP_READ, 0);
+
+ return unlikely(err) ? ERR_PTR(err) : NULL;
+}
+
+/*
+ * since we dont have write or truncate flows, so no inode
+ * locking needs to be held at the moment.
+ */
+static int erofs_raw_access_readpage(struct file *file, struct page *page)
+{
+ erofs_off_t last_block;
+ struct bio *bio;
+
+ trace_erofs_readpage(page, true);
+
+ bio = erofs_read_raw_page(NULL, page->mapping,
+ page, &last_block, 1, false);
+
+ if (IS_ERR(bio))
+ return PTR_ERR(bio);
+
+ BUG_ON(bio != NULL); /* since we have only one bio -- must be NULL */
+ return 0;
+}
+
+static int erofs_raw_access_readpages(struct file *filp,
+ struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ erofs_off_t last_block;
+ struct bio *bio = NULL;
+ gfp_t gfp = readahead_gfp_mask(mapping);
+ struct page *page = list_last_entry(pages, struct page, lru);
+
+ trace_erofs_readpages(mapping->host, page, nr_pages, true);
+
+ for (; nr_pages; --nr_pages) {
+ page = list_entry(pages->prev, struct page, lru);
+
+ prefetchw(&page->flags);
+ list_del(&page->lru);
+
+ if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+ bio = erofs_read_raw_page(bio, mapping, page,
+ &last_block, nr_pages, true);
+
+ /* all the page errors are ignored when readahead */
+ if (IS_ERR(bio)) {
+ pr_err("%s, readahead error at page %lu of nid %llu\n",
+ __func__, page->index,
+ EROFS_V(mapping->host)->nid);
+
+ bio = NULL;
+ }
+ }
+
+ /* pages could still be locked */
+ put_page(page);
+ }
+ BUG_ON(!list_empty(pages));
+
+ /* the rare case (end in gaps) */
+ if (unlikely(bio != NULL))
+ __submit_bio(bio, REQ_OP_READ, 0);
+ return 0;
+}
+
+/* for uncompressed (aligned) files and raw access for other files */
+const struct address_space_operations erofs_raw_access_aops = {
+ .readpage = erofs_raw_access_readpage,
+ .readpages = erofs_raw_access_readpages,
+};
+
diff --git a/drivers/staging/erofs/dir.c b/drivers/staging/erofs/dir.c
new file mode 100644
index 000000000000..be6ae3b1bdbe
--- /dev/null
+++ b/drivers/staging/erofs/dir.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/dir.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "internal.h"
+
+static const unsigned char erofs_filetype_table[EROFS_FT_MAX] = {
+ [EROFS_FT_UNKNOWN] = DT_UNKNOWN,
+ [EROFS_FT_REG_FILE] = DT_REG,
+ [EROFS_FT_DIR] = DT_DIR,
+ [EROFS_FT_CHRDEV] = DT_CHR,
+ [EROFS_FT_BLKDEV] = DT_BLK,
+ [EROFS_FT_FIFO] = DT_FIFO,
+ [EROFS_FT_SOCK] = DT_SOCK,
+ [EROFS_FT_SYMLINK] = DT_LNK,
+};
+
+static int erofs_fill_dentries(struct dir_context *ctx,
+ void *dentry_blk, unsigned *ofs,
+ unsigned nameoff, unsigned maxsize)
+{
+ struct erofs_dirent *de = dentry_blk;
+ const struct erofs_dirent *end = dentry_blk + nameoff;
+
+ de = dentry_blk + *ofs;
+ while (de < end) {
+ const char *de_name;
+ int de_namelen;
+ unsigned char d_type;
+#ifdef CONFIG_EROFS_FS_DEBUG
+ unsigned dbg_namelen;
+ unsigned char dbg_namebuf[EROFS_NAME_LEN];
+#endif
+
+ if (unlikely(de->file_type < EROFS_FT_MAX))
+ d_type = erofs_filetype_table[de->file_type];
+ else
+ d_type = DT_UNKNOWN;
+
+ nameoff = le16_to_cpu(de->nameoff);
+ de_name = (char *)dentry_blk + nameoff;
+
+ de_namelen = unlikely(de + 1 >= end) ?
+ /* last directory entry */
+ strnlen(de_name, maxsize - nameoff) :
+ le16_to_cpu(de[1].nameoff) - nameoff;
+
+ /* the corrupted directory found */
+ BUG_ON(de_namelen < 0);
+
+#ifdef CONFIG_EROFS_FS_DEBUG
+ dbg_namelen = min(EROFS_NAME_LEN - 1, de_namelen);
+ memcpy(dbg_namebuf, de_name, dbg_namelen);
+ dbg_namebuf[dbg_namelen] = '\0';
+
+ debugln("%s, found de_name %s de_len %d d_type %d", __func__,
+ dbg_namebuf, de_namelen, d_type);
+#endif
+
+ if (!dir_emit(ctx, de_name, de_namelen,
+ le64_to_cpu(de->nid), d_type))
+ /* stoped by some reason */
+ return 1;
+ ++de;
+ *ofs += sizeof(struct erofs_dirent);
+ }
+ *ofs = maxsize;
+ return 0;
+}
+
+static int erofs_readdir(struct file *f, struct dir_context *ctx)
+{
+ struct inode *dir = file_inode(f);
+ struct address_space *mapping = dir->i_mapping;
+ const size_t dirsize = i_size_read(dir);
+ unsigned i = ctx->pos / EROFS_BLKSIZ;
+ unsigned ofs = ctx->pos % EROFS_BLKSIZ;
+ int err = 0;
+ bool initial = true;
+
+ while (ctx->pos < dirsize) {
+ struct page *dentry_page;
+ struct erofs_dirent *de;
+ unsigned nameoff, maxsize;
+
+ dentry_page = read_mapping_page(mapping, i, NULL);
+ if (IS_ERR(dentry_page))
+ continue;
+
+ lock_page(dentry_page);
+ de = (struct erofs_dirent *)kmap(dentry_page);
+
+ nameoff = le16_to_cpu(de->nameoff);
+
+ if (unlikely(nameoff < sizeof(struct erofs_dirent) ||
+ nameoff >= PAGE_SIZE)) {
+ errln("%s, invalid de[0].nameoff %u",
+ __func__, nameoff);
+
+ err = -EIO;
+ goto skip_this;
+ }
+
+ maxsize = min_t(unsigned, dirsize - ctx->pos + ofs, PAGE_SIZE);
+
+ /* search dirents at the arbitrary position */
+ if (unlikely(initial)) {
+ initial = false;
+
+ ofs = roundup(ofs, sizeof(struct erofs_dirent));
+ if (unlikely(ofs >= nameoff))
+ goto skip_this;
+ }
+
+ err = erofs_fill_dentries(ctx, de, &ofs, nameoff, maxsize);
+skip_this:
+ kunmap(dentry_page);
+
+ unlock_page(dentry_page);
+ put_page(dentry_page);
+
+ ctx->pos = blknr_to_addr(i) + ofs;
+
+ if (unlikely(err))
+ break;
+ ++i;
+ ofs = 0;
+ }
+ return err < 0 ? err : 0;
+}
+
+const struct file_operations erofs_dir_fops = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .iterate = erofs_readdir,
+};
+
diff --git a/drivers/staging/erofs/erofs_fs.h b/drivers/staging/erofs/erofs_fs.h
new file mode 100644
index 000000000000..2f8e2bf70941
--- /dev/null
+++ b/drivers/staging/erofs/erofs_fs.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Apache-2.0
+ *
+ * linux/drivers/staging/erofs/erofs_fs.h
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is dual-licensed; you may select either the GNU General Public
+ * License version 2 or Apache License, Version 2.0. See the file COPYING
+ * in the main directory of the Linux distribution for more details.
+ */
+#ifndef __EROFS_FS_H
+#define __EROFS_FS_H
+
+/* Enhanced(Extended) ROM File System */
+#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2
+#define EROFS_SUPER_OFFSET 1024
+
+struct erofs_super_block {
+/* 0 */__le32 magic; /* in the little endian */
+/* 4 */__le32 checksum; /* crc32c(super_block) */
+/* 8 */__le32 features;
+/* 12 */__u8 blkszbits; /* support block_size == PAGE_SIZE only */
+/* 13 */__u8 reserved;
+
+/* 14 */__le16 root_nid;
+/* 16 */__le64 inos; /* total valid ino # (== f_files - f_favail) */
+
+/* 24 */__le64 build_time; /* inode v1 time derivation */
+/* 32 */__le32 build_time_nsec;
+/* 36 */__le32 blocks; /* used for statfs */
+/* 40 */__le32 meta_blkaddr;
+/* 44 */__le32 xattr_blkaddr;
+/* 48 */__u8 uuid[16]; /* 128-bit uuid for volume */
+/* 64 */__u8 volume_name[16]; /* volume name */
+
+/* 80 */__u8 reserved2[48]; /* 128 bytes */
+} __packed;
+
+#define __EROFS_BIT(_prefix, _cur, _pre) enum { \
+ _prefix ## _cur ## _BIT = _prefix ## _pre ## _BIT + \
+ _prefix ## _pre ## _BITS }
+
+/*
+ * erofs inode data mapping:
+ * 0 - inode plain without inline data A:
+ * inode, [xattrs], ... | ... | no-holed data
+ * 1 - inode VLE compression B:
+ * inode, [xattrs], extents ... | ...
+ * 2 - inode plain with inline data C:
+ * inode, [xattrs], last_inline_data, ... | ... | no-holed data
+ * 3~7 - reserved
+ */
+enum {
+ EROFS_INODE_LAYOUT_PLAIN,
+ EROFS_INODE_LAYOUT_COMPRESSION,
+ EROFS_INODE_LAYOUT_INLINE,
+ EROFS_INODE_LAYOUT_MAX
+};
+#define EROFS_I_VERSION_BITS 1
+#define EROFS_I_DATA_MAPPING_BITS 3
+
+#define EROFS_I_VERSION_BIT 0
+__EROFS_BIT(EROFS_I_, DATA_MAPPING, VERSION);
+
+struct erofs_inode_v1 {
+/* 0 */__le16 i_advise;
+
+/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+/* 2 */__le16 i_xattr_icount;
+/* 4 */__le16 i_mode;
+/* 6 */__le16 i_nlink;
+/* 8 */__le32 i_size;
+/* 12 */__le32 i_reserved;
+/* 16 */union {
+ /* file total compressed blocks for data mapping 1 */
+ __le32 compressed_blocks;
+ __le32 raw_blkaddr;
+
+ /* for device files, used to indicate old/new device # */
+ __le32 rdev;
+ } i_u __packed;
+/* 20 */__le32 i_ino; /* only used for 32-bit stat compatibility */
+/* 24 */__le16 i_uid;
+/* 26 */__le16 i_gid;
+/* 28 */__le32 i_checksum;
+} __packed;
+
+/* 32 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_V1 0
+/* 64 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_V2 1
+
+struct erofs_inode_v2 {
+ __le16 i_advise;
+
+ /* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+ __le16 i_xattr_icount;
+ __le16 i_mode;
+ __le16 i_reserved; /* 8 bytes */
+ __le64 i_size; /* 16 bytes */
+ union {
+ /* file total compressed blocks for data mapping 1 */
+ __le32 compressed_blocks;
+ __le32 raw_blkaddr;
+
+ /* for device files, used to indicate old/new device # */
+ __le32 rdev;
+ } i_u __packed;
+
+ /* only used for 32-bit stat compatibility */
+ __le32 i_ino; /* 24 bytes */
+
+ __le32 i_uid;
+ __le32 i_gid;
+ __le64 i_ctime; /* 32 bytes */
+ __le32 i_ctime_nsec;
+ __le32 i_nlink;
+ __u8 i_reserved2[12];
+ __le32 i_checksum; /* 64 bytes */
+} __packed;
+
+#define EROFS_MAX_SHARED_XATTRS (128)
+/* h_shared_count between 129 ... 255 are special # */
+#define EROFS_SHARED_XATTR_EXTENT (255)
+
+/*
+ * inline xattrs (n == i_xattr_icount):
+ * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
+ * 12 bytes / \
+ * / \
+ * /-----------------------\
+ * | erofs_xattr_entries+ |
+ * +-----------------------+
+ * inline xattrs must starts in erofs_xattr_ibody_header,
+ * for read-only fs, no need to introduce h_refcount
+ */
+struct erofs_xattr_ibody_header {
+ __le32 h_checksum;
+ __u8 h_shared_count;
+ __u8 h_reserved[7];
+ __le32 h_shared_xattrs[0]; /* shared xattr id array */
+} __packed;
+
+/* Name indexes */
+#define EROFS_XATTR_INDEX_USER 1
+#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS 2
+#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+#define EROFS_XATTR_INDEX_TRUSTED 4
+#define EROFS_XATTR_INDEX_LUSTRE 5
+#define EROFS_XATTR_INDEX_SECURITY 6
+
+/* xattr entry (for both inline & shared xattrs) */
+struct erofs_xattr_entry {
+ __u8 e_name_len; /* length of name */
+ __u8 e_name_index; /* attribute name index */
+ __le16 e_value_size; /* size of attribute value */
+ /* followed by e_name and e_value */
+ char e_name[0]; /* attribute name */
+} __packed;
+
+#define ondisk_xattr_ibody_size(count) ({\
+ u32 __count = le16_to_cpu(count); \
+ ((__count) == 0) ? 0 : \
+ sizeof(struct erofs_xattr_ibody_header) + \
+ sizeof(__u32) * ((__count) - 1); })
+
+#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry))
+#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \
+ sizeof(struct erofs_xattr_entry) + \
+ (entry)->e_name_len + le16_to_cpu((entry)->e_value_size))
+
+/* have to be aligned with 8 bytes on disk */
+struct erofs_extent_header {
+ __le32 eh_checksum;
+ __le32 eh_reserved[3];
+} __packed;
+
+/*
+ * Z_EROFS Variable-sized Logical Extent cluster type:
+ * 0 - literal (uncompressed) cluster
+ * 1 - compressed cluster (for the head logical cluster)
+ * 2 - compressed cluster (for the other logical clusters)
+ *
+ * In detail,
+ * 0 - literal (uncompressed) cluster,
+ * di_advise = 0
+ * di_clusterofs = the literal data offset of the cluster
+ * di_blkaddr = the blkaddr of the literal cluster
+ *
+ * 1 - compressed cluster (for the head logical cluster)
+ * di_advise = 1
+ * di_clusterofs = the decompressed data offset of the cluster
+ * di_blkaddr = the blkaddr of the compressed cluster
+ *
+ * 2 - compressed cluster (for the other logical clusters)
+ * di_advise = 2
+ * di_clusterofs =
+ * the decompressed data offset in its own head cluster
+ * di_u.delta[0] = distance to its corresponding head cluster
+ * di_u.delta[1] = distance to its corresponding tail cluster
+ * (di_advise could be 0, 1 or 2)
+ */
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0
+
+struct z_erofs_vle_decompressed_index {
+ __le16 di_advise;
+ /* where to decompress in the head cluster */
+ __le16 di_clusterofs;
+
+ union {
+ /* for the head cluster */
+ __le32 blkaddr;
+ /*
+ * for the rest clusters
+ * eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
+ * [0] - pointing to the head cluster
+ * [1] - pointing to the tail cluster
+ */
+ __le16 delta[2];
+ } di_u __packed; /* 8 bytes */
+} __packed;
+
+#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \
+ sizeof(struct z_erofs_vle_decompressed_index))
+
+/* dirent sorts in alphabet order, thus we can do binary search */
+struct erofs_dirent {
+ __le64 nid; /* 0, node number */
+ __le16 nameoff; /* 8, start offset of file name */
+ __u8 file_type; /* 10, file type */
+ __u8 reserved; /* 11, reserved */
+} __packed;
+
+/* file types used in inode_info->flags */
+enum {
+ EROFS_FT_UNKNOWN,
+ EROFS_FT_REG_FILE,
+ EROFS_FT_DIR,
+ EROFS_FT_CHRDEV,
+ EROFS_FT_BLKDEV,
+ EROFS_FT_FIFO,
+ EROFS_FT_SOCK,
+ EROFS_FT_SYMLINK,
+ EROFS_FT_MAX
+};
+
+#define EROFS_NAME_LEN 255
+
+/* check the EROFS on-disk layout strictly at compile time */
+static inline void erofs_check_ondisk_layout_definitions(void)
+{
+ BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
+ BUILD_BUG_ON(sizeof(struct erofs_inode_v1) != 32);
+ BUILD_BUG_ON(sizeof(struct erofs_inode_v2) != 64);
+ BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
+ BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
+ BUILD_BUG_ON(sizeof(struct erofs_extent_header) != 16);
+ BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
+ BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
+}
+
+#endif
+
diff --git a/drivers/staging/erofs/include/linux/tagptr.h b/drivers/staging/erofs/include/linux/tagptr.h
new file mode 100644
index 000000000000..ccd106dbd48e
--- /dev/null
+++ b/drivers/staging/erofs/include/linux/tagptr.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Tagged pointer implementation
+ *
+ * Copyright (C) 2018 Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef _LINUX_TAGPTR_H
+#define _LINUX_TAGPTR_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/*
+ * the name of tagged pointer types are tagptr{1, 2, 3...}_t
+ * avoid directly using the internal structs __tagptr{1, 2, 3...}
+ */
+#define __MAKE_TAGPTR(n) \
+typedef struct __tagptr##n { \
+ uintptr_t v; \
+} tagptr##n##_t;
+
+__MAKE_TAGPTR(1)
+__MAKE_TAGPTR(2)
+__MAKE_TAGPTR(3)
+__MAKE_TAGPTR(4)
+
+#undef __MAKE_TAGPTR
+
+extern void __compiletime_error("bad tagptr tags")
+ __bad_tagptr_tags(void);
+
+extern void __compiletime_error("bad tagptr type")
+ __bad_tagptr_type(void);
+
+/* fix the broken usage of "#define tagptr2_t tagptr3_t" by users */
+#define __tagptr_mask_1(ptr, n) \
+ __builtin_types_compatible_p(typeof(ptr), struct __tagptr##n) ? \
+ (1UL << (n)) - 1 :
+
+#define __tagptr_mask(ptr) (\
+ __tagptr_mask_1(ptr, 1) ( \
+ __tagptr_mask_1(ptr, 2) ( \
+ __tagptr_mask_1(ptr, 3) ( \
+ __tagptr_mask_1(ptr, 4) ( \
+ __bad_tagptr_type(), 0)))))
+
+/* generate a tagged pointer from a raw value */
+#define tagptr_init(type, val) \
+ ((typeof(type)){ .v = (uintptr_t)(val) })
+
+/*
+ * directly cast a tagged pointer to the native pointer type, which
+ * could be used for backward compatibility of existing code.
+ */
+#define tagptr_cast_ptr(tptr) ((void *)(tptr).v)
+
+/* encode tagged pointers */
+#define tagptr_fold(type, ptr, _tags) ({ \
+ const typeof(_tags) tags = (_tags); \
+ if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(type))) \
+ __bad_tagptr_tags(); \
+tagptr_init(type, (uintptr_t)(ptr) | tags); })
+
+/* decode tagged pointers */
+#define tagptr_unfold_ptr(tptr) \
+ ((void *)((tptr).v & ~__tagptr_mask(tptr)))
+
+#define tagptr_unfold_tags(tptr) \
+ ((tptr).v & __tagptr_mask(tptr))
+
+/* operations for the tagger pointer */
+#define tagptr_eq(_tptr1, _tptr2) ({ \
+ typeof(_tptr1) tptr1 = (_tptr1); \
+ typeof(_tptr2) tptr2 = (_tptr2); \
+ (void)(&tptr1 == &tptr2); \
+(tptr1).v == (tptr2).v; })
+
+/* lock-free CAS operation */
+#define tagptr_cmpxchg(_ptptr, _o, _n) ({ \
+ typeof(_ptptr) ptptr = (_ptptr); \
+ typeof(_o) o = (_o); \
+ typeof(_n) n = (_n); \
+ (void)(&o == &n); \
+ (void)(&o == ptptr); \
+tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); })
+
+/* wrap WRITE_ONCE if atomic update is needed */
+#define tagptr_replace_tags(_ptptr, tags) ({ \
+ typeof(_ptptr) ptptr = (_ptptr); \
+ *ptptr = tagptr_fold(*ptptr, tagptr_unfold_ptr(*ptptr), tags); \
+*ptptr; })
+
+#define tagptr_set_tags(_ptptr, _tags) ({ \
+ typeof(_ptptr) ptptr = (_ptptr); \
+ const typeof(_tags) tags = (_tags); \
+ if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
+ __bad_tagptr_tags(); \
+ ptptr->v |= tags; \
+*ptptr; })
+
+#define tagptr_clear_tags(_ptptr, _tags) ({ \
+ typeof(_ptptr) ptptr = (_ptptr); \
+ const typeof(_tags) tags = (_tags); \
+ if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
+ __bad_tagptr_tags(); \
+ ptptr->v &= ~tags; \
+*ptptr; })
+
+#endif
+
diff --git a/drivers/staging/erofs/include/trace/events/erofs.h b/drivers/staging/erofs/include/trace/events/erofs.h
new file mode 100644
index 000000000000..5aead93a762f
--- /dev/null
+++ b/drivers/staging/erofs/include/trace/events/erofs.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM erofs
+
+#if !defined(_TRACE_EROFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EROFS_H
+
+#include <linux/tracepoint.h>
+
+#define show_dev(dev) MAJOR(dev), MINOR(dev)
+#define show_dev_nid(entry) show_dev(entry->dev), entry->nid
+
+#define show_file_type(type) \
+ __print_symbolic(type, \
+ { 0, "FILE" }, \
+ { 1, "DIR" })
+
+#define show_map_flags(flags) __print_flags(flags, "|", \
+ { EROFS_GET_BLOCKS_RAW, "RAW" })
+
+#define show_mflags(flags) __print_flags(flags, "", \
+ { EROFS_MAP_MAPPED, "M" }, \
+ { EROFS_MAP_META, "I" }, \
+ { EROFS_MAP_ZIPPED, "Z" })
+
+TRACE_EVENT(erofs_lookup,
+
+ TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
+
+ TP_ARGS(dir, dentry, flags),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(erofs_nid_t, nid )
+ __field(const char *, name )
+ __field(unsigned int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->nid = EROFS_V(dir)->nid;
+ __entry->name = dentry->d_name.name;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x",
+ show_dev_nid(__entry),
+ __entry->name,
+ __entry->flags)
+);
+
+TRACE_EVENT(erofs_fill_inode,
+ TP_PROTO(struct inode *inode, int isdir),
+ TP_ARGS(inode, isdir),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(erofs_nid_t, nid )
+ __field(erofs_blk_t, blkaddr )
+ __field(unsigned int, ofs )
+ __field(int, isdir )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->nid = EROFS_V(inode)->nid;
+ __entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid));
+ __entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid));
+ __entry->isdir = isdir;
+ ),
+
+ TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d",
+ show_dev_nid(__entry),
+ __entry->blkaddr, __entry->ofs,
+ __entry->isdir)
+);
+
+TRACE_EVENT(erofs_readpage,
+
+ TP_PROTO(struct page *page, bool raw),
+
+ TP_ARGS(page, raw),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(erofs_nid_t, nid )
+ __field(int, dir )
+ __field(pgoff_t, index )
+ __field(int, uptodate)
+ __field(bool, raw )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = page->mapping->host->i_sb->s_dev;
+ __entry->nid = EROFS_V(page->mapping->host)->nid;
+ __entry->dir = S_ISDIR(page->mapping->host->i_mode);
+ __entry->index = page->index;
+ __entry->uptodate = PageUptodate(page);
+ __entry->raw = raw;
+ ),
+
+ TP_printk("dev = (%d,%d), nid = %llu, %s, index = %lu, uptodate = %d "
+ "raw = %d",
+ show_dev_nid(__entry),
+ show_file_type(__entry->dir),
+ (unsigned long)__entry->index,
+ __entry->uptodate,
+ __entry->raw)
+);
+
+TRACE_EVENT(erofs_readpages,
+
+ TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage,
+ bool raw),
+
+ TP_ARGS(inode, page, nrpage, raw),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(erofs_nid_t, nid )
+ __field(pgoff_t, start )
+ __field(unsigned int, nrpage )
+ __field(bool, raw )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->nid = EROFS_V(inode)->nid;
+ __entry->start = page->index;
+ __entry->nrpage = nrpage;
+ __entry->raw = raw;
+ ),
+
+ TP_printk("dev = (%d,%d), nid = %llu, start = %lu nrpage = %u raw = %d",
+ show_dev_nid(__entry),
+ (unsigned long)__entry->start,
+ __entry->nrpage,
+ __entry->raw)
+);
+
+DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
+ TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+ unsigned int flags),
+
+ TP_ARGS(inode, map, flags),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( erofs_nid_t, nid )
+ __field( erofs_off_t, la )
+ __field( u64, llen )
+ __field( unsigned int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->nid = EROFS_V(inode)->nid;
+ __entry->la = map->m_la;
+ __entry->llen = map->m_llen;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev = (%d,%d), nid = %llu, la %llu llen %llu flags %s",
+ show_dev_nid(__entry),
+ __entry->la, __entry->llen, show_map_flags(__entry->flags))
+);
+
+DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter,
+ TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+ unsigned flags),
+
+ TP_ARGS(inode, map, flags)
+);
+
+DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
+ TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+ unsigned int flags, int ret),
+
+ TP_ARGS(inode, map, flags, ret),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( erofs_nid_t, nid )
+ __field( unsigned int, flags )
+ __field( erofs_off_t, la )
+ __field( erofs_off_t, pa )
+ __field( u64, llen )
+ __field( u64, plen )
+ __field( unsigned int, mflags )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->nid = EROFS_V(inode)->nid;
+ __entry->flags = flags;
+ __entry->la = map->m_la;
+ __entry->pa = map->m_pa;
+ __entry->llen = map->m_llen;
+ __entry->plen = map->m_plen;
+ __entry->mflags = map->m_flags;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev = (%d,%d), nid = %llu, flags %s "
+ "la %llu pa %llu llen %llu plen %llu mflags %s ret %d",
+ show_dev_nid(__entry), show_map_flags(__entry->flags),
+ __entry->la, __entry->pa, __entry->llen, __entry->plen,
+ show_mflags(__entry->mflags), __entry->ret)
+);
+
+DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit,
+ TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+ unsigned flags, int ret),
+
+ TP_ARGS(inode, map, flags, ret)
+);
+
+TRACE_EVENT(erofs_destroy_inode,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( erofs_nid_t, nid )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->nid = EROFS_V(inode)->nid;
+ ),
+
+ TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry))
+);
+
+#endif /* _TRACE_EROFS_H */
+
+ /* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
new file mode 100644
index 000000000000..fbf6ff25cd1b
--- /dev/null
+++ b/drivers/staging/erofs/inode.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/inode.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "xattr.h"
+
+#include <trace/events/erofs.h>
+
+/* no locking */
+static int read_inode(struct inode *inode, void *data)
+{
+ struct erofs_vnode *vi = EROFS_V(inode);
+ struct erofs_inode_v1 *v1 = data;
+ const unsigned advise = le16_to_cpu(v1->i_advise);
+
+ vi->data_mapping_mode = __inode_data_mapping(advise);
+
+ if (unlikely(vi->data_mapping_mode >= EROFS_INODE_LAYOUT_MAX)) {
+ errln("unknown data mapping mode %u of nid %llu",
+ vi->data_mapping_mode, vi->nid);
+ DBG_BUGON(1);
+ return -EIO;
+ }
+
+ if (__inode_version(advise) == EROFS_INODE_LAYOUT_V2) {
+ struct erofs_inode_v2 *v2 = data;
+
+ vi->inode_isize = sizeof(struct erofs_inode_v2);
+ vi->xattr_isize = ondisk_xattr_ibody_size(v2->i_xattr_icount);
+
+ inode->i_mode = le16_to_cpu(v2->i_mode);
+ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) {
+ vi->raw_blkaddr = le32_to_cpu(v2->i_u.raw_blkaddr);
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ inode->i_rdev =
+ new_decode_dev(le32_to_cpu(v2->i_u.rdev));
+ } else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ inode->i_rdev = 0;
+ } else {
+ return -EIO;
+ }
+
+ i_uid_write(inode, le32_to_cpu(v2->i_uid));
+ i_gid_write(inode, le32_to_cpu(v2->i_gid));
+ set_nlink(inode, le32_to_cpu(v2->i_nlink));
+
+ /* ns timestamp */
+ inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
+ le64_to_cpu(v2->i_ctime);
+ inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
+ le32_to_cpu(v2->i_ctime_nsec);
+
+ inode->i_size = le64_to_cpu(v2->i_size);
+ } else if (__inode_version(advise) == EROFS_INODE_LAYOUT_V1) {
+ struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+
+ vi->inode_isize = sizeof(struct erofs_inode_v1);
+ vi->xattr_isize = ondisk_xattr_ibody_size(v1->i_xattr_icount);
+
+ inode->i_mode = le16_to_cpu(v1->i_mode);
+ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) {
+ vi->raw_blkaddr = le32_to_cpu(v1->i_u.raw_blkaddr);
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ inode->i_rdev =
+ new_decode_dev(le32_to_cpu(v1->i_u.rdev));
+ } else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ inode->i_rdev = 0;
+ } else {
+ return -EIO;
+ }
+
+ i_uid_write(inode, le16_to_cpu(v1->i_uid));
+ i_gid_write(inode, le16_to_cpu(v1->i_gid));
+ set_nlink(inode, le16_to_cpu(v1->i_nlink));
+
+ /* use build time to derive all file time */
+ inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
+ sbi->build_time;
+ inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
+ sbi->build_time_nsec;
+
+ inode->i_size = le32_to_cpu(v1->i_size);
+ } else {
+ errln("unsupported on-disk inode version %u of nid %llu",
+ __inode_version(advise), vi->nid);
+ DBG_BUGON(1);
+ return -EIO;
+ }
+
+ /* measure inode.i_blocks as the generic filesystem */
+ inode->i_blocks = ((inode->i_size - 1) >> 9) + 1;
+ return 0;
+}
+
+/*
+ * try_lock can be required since locking order is:
+ * file data(fs_inode)
+ * meta(bd_inode)
+ * but the majority of the callers is "iget",
+ * in that case we are pretty sure no deadlock since
+ * no data operations exist. However I tend to
+ * try_lock since it takes no much overhead and
+ * will success immediately.
+ */
+static int fill_inline_data(struct inode *inode, void *data, unsigned m_pofs)
+{
+ struct erofs_vnode *vi = EROFS_V(inode);
+ struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+ int mode = vi->data_mapping_mode;
+
+ DBG_BUGON(mode >= EROFS_INODE_LAYOUT_MAX);
+
+ /* should be inode inline C */
+ if (mode != EROFS_INODE_LAYOUT_INLINE)
+ return 0;
+
+ /* fast symlink (following ext4) */
+ if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) {
+ char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL);
+
+ if (unlikely(lnk == NULL))
+ return -ENOMEM;
+
+ m_pofs += vi->inode_isize + vi->xattr_isize;
+ BUG_ON(m_pofs + inode->i_size > PAGE_SIZE);
+
+ /* get in-page inline data */
+ memcpy(lnk, data + m_pofs, inode->i_size);
+ lnk[inode->i_size] = '\0';
+
+ inode->i_link = lnk;
+ set_inode_fast_symlink(inode);
+ }
+ return -EAGAIN;
+}
+
+static int fill_inode(struct inode *inode, int isdir)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+ struct erofs_vnode *vi = EROFS_V(inode);
+ struct page *page;
+ void *data;
+ int err;
+ erofs_blk_t blkaddr;
+ unsigned ofs;
+
+ trace_erofs_fill_inode(inode, isdir);
+
+ blkaddr = erofs_blknr(iloc(sbi, vi->nid));
+ ofs = erofs_blkoff(iloc(sbi, vi->nid));
+
+ debugln("%s, reading inode nid %llu at %u of blkaddr %u",
+ __func__, vi->nid, ofs, blkaddr);
+
+ page = erofs_get_meta_page(inode->i_sb, blkaddr, isdir);
+
+ if (IS_ERR(page)) {
+ errln("failed to get inode (nid: %llu) page, err %ld",
+ vi->nid, PTR_ERR(page));
+ return PTR_ERR(page);
+ }
+
+ BUG_ON(!PageUptodate(page));
+ data = page_address(page);
+
+ err = read_inode(inode, data + ofs);
+ if (!err) {
+ /* setup the new inode */
+ if (S_ISREG(inode->i_mode)) {
+#ifdef CONFIG_EROFS_FS_XATTR
+ if (vi->xattr_isize)
+ inode->i_op = &erofs_generic_xattr_iops;
+#endif
+ inode->i_fop = &generic_ro_fops;
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op =
+#ifdef CONFIG_EROFS_FS_XATTR
+ vi->xattr_isize ? &erofs_dir_xattr_iops :
+#endif
+ &erofs_dir_iops;
+ inode->i_fop = &erofs_dir_fops;
+ } else if (S_ISLNK(inode->i_mode)) {
+ /* by default, page_get_link is used for symlink */
+ inode->i_op =
+#ifdef CONFIG_EROFS_FS_XATTR
+ &erofs_symlink_xattr_iops,
+#else
+ &page_symlink_inode_operations;
+#endif
+ inode_nohighmem(inode);
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+#ifdef CONFIG_EROFS_FS_XATTR
+ inode->i_op = &erofs_special_inode_operations;
+#endif
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ } else {
+ err = -EIO;
+ goto out_unlock;
+ }
+
+ if (is_inode_layout_compression(inode)) {
+#ifdef CONFIG_EROFS_FS_ZIP
+ inode->i_mapping->a_ops =
+ &z_erofs_vle_normalaccess_aops;
+#else
+ err = -ENOTSUPP;
+#endif
+ goto out_unlock;
+ }
+
+ inode->i_mapping->a_ops = &erofs_raw_access_aops;
+
+ /* fill last page if inline data is available */
+ fill_inline_data(inode, data, ofs);
+ }
+
+out_unlock:
+ unlock_page(page);
+ put_page(page);
+ return err;
+}
+
+struct inode *erofs_iget(struct super_block *sb,
+ erofs_nid_t nid, bool isdir)
+{
+ struct inode *inode = iget_locked(sb, nid);
+
+ if (unlikely(inode == NULL))
+ return ERR_PTR(-ENOMEM);
+
+ if (inode->i_state & I_NEW) {
+ int err;
+ struct erofs_vnode *vi = EROFS_V(inode);
+ vi->nid = nid;
+
+ err = fill_inode(inode, isdir);
+ if (likely(!err))
+ unlock_new_inode(inode);
+ else {
+ iget_failed(inode);
+ inode = ERR_PTR(err);
+ }
+ }
+ return inode;
+}
+
+#ifdef CONFIG_EROFS_FS_XATTR
+const struct inode_operations erofs_generic_xattr_iops = {
+ .listxattr = erofs_listxattr,
+};
+#endif
+
+#ifdef CONFIG_EROFS_FS_XATTR
+const struct inode_operations erofs_symlink_xattr_iops = {
+ .get_link = page_get_link,
+ .listxattr = erofs_listxattr,
+};
+#endif
+
+const struct inode_operations erofs_special_inode_operations = {
+#ifdef CONFIG_EROFS_FS_XATTR
+ .listxattr = erofs_listxattr,
+#endif
+};
+
+#ifdef CONFIG_EROFS_FS_XATTR
+const struct inode_operations erofs_fast_symlink_xattr_iops = {
+ .get_link = simple_get_link,
+ .listxattr = erofs_listxattr,
+};
+#endif
+
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
new file mode 100644
index 000000000000..367b39fe46e5
--- /dev/null
+++ b/drivers/staging/erofs/internal.h
@@ -0,0 +1,556 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * linux/drivers/staging/erofs/internal.h
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#ifndef __INTERNAL_H
+#define __INTERNAL_H
+
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/buffer_head.h>
+#include <linux/cleancache.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "erofs_fs.h"
+
+/* redefine pr_fmt "erofs: " */
+#undef pr_fmt
+#define pr_fmt(fmt) "erofs: " fmt
+
+#define errln(x, ...) pr_err(x "\n", ##__VA_ARGS__)
+#define infoln(x, ...) pr_info(x "\n", ##__VA_ARGS__)
+#ifdef CONFIG_EROFS_FS_DEBUG
+#define debugln(x, ...) pr_debug(x "\n", ##__VA_ARGS__)
+
+#define dbg_might_sleep might_sleep
+#define DBG_BUGON BUG_ON
+#else
+#define debugln(x, ...) ((void)0)
+
+#define dbg_might_sleep() ((void)0)
+#define DBG_BUGON(...) ((void)0)
+#endif
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+enum {
+ FAULT_KMALLOC,
+ FAULT_MAX,
+};
+
+extern char *erofs_fault_name[FAULT_MAX];
+#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
+
+struct erofs_fault_info {
+ atomic_t inject_ops;
+ unsigned int inject_rate;
+ unsigned int inject_type;
+};
+#endif
+
+#ifdef CONFIG_EROFS_FS_ZIP_CACHE_BIPOLAR
+#define EROFS_FS_ZIP_CACHE_LVL (2)
+#elif defined(EROFS_FS_ZIP_CACHE_UNIPOLAR)
+#define EROFS_FS_ZIP_CACHE_LVL (1)
+#else
+#define EROFS_FS_ZIP_CACHE_LVL (0)
+#endif
+
+#if (!defined(EROFS_FS_HAS_MANAGED_CACHE) && (EROFS_FS_ZIP_CACHE_LVL > 0))
+#define EROFS_FS_HAS_MANAGED_CACHE
+#endif
+
+/* EROFS_SUPER_MAGIC_V1 to represent the whole file system */
+#define EROFS_SUPER_MAGIC EROFS_SUPER_MAGIC_V1
+
+typedef u64 erofs_nid_t;
+
+struct erofs_sb_info {
+ /* list for all registered superblocks, mainly for shrinker */
+ struct list_head list;
+ struct mutex umount_mutex;
+
+ u32 blocks;
+ u32 meta_blkaddr;
+#ifdef CONFIG_EROFS_FS_XATTR
+ u32 xattr_blkaddr;
+#endif
+
+ /* inode slot unit size in bit shift */
+ unsigned char islotbits;
+#ifdef CONFIG_EROFS_FS_ZIP
+ /* cluster size in bit shift */
+ unsigned char clusterbits;
+
+ /* the dedicated workstation for compression */
+ struct radix_tree_root workstn_tree;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ struct inode *managed_cache;
+#endif
+
+#endif
+
+ u32 build_time_nsec;
+ u64 build_time;
+
+ /* what we really care is nid, rather than ino.. */
+ erofs_nid_t root_nid;
+ /* used for statfs, f_files - f_favail */
+ u64 inos;
+
+ u8 uuid[16]; /* 128-bit uuid for volume */
+ u8 volume_name[16]; /* volume name */
+ char *dev_name;
+
+ unsigned int mount_opt;
+ unsigned int shrinker_run_no;
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+ struct erofs_fault_info fault_info; /* For fault injection */
+#endif
+};
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+#define erofs_show_injection_info(type) \
+ infoln("inject %s in %s of %pS", erofs_fault_name[type], \
+ __func__, __builtin_return_address(0))
+
+static inline bool time_to_inject(struct erofs_sb_info *sbi, int type)
+{
+ struct erofs_fault_info *ffi = &sbi->fault_info;
+
+ if (!ffi->inject_rate)
+ return false;
+
+ if (!IS_FAULT_SET(ffi, type))
+ return false;
+
+ atomic_inc(&ffi->inject_ops);
+ if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
+ atomic_set(&ffi->inject_ops, 0);
+ return true;
+ }
+ return false;
+}
+#endif
+
+static inline void *erofs_kmalloc(struct erofs_sb_info *sbi,
+ size_t size, gfp_t flags)
+{
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+ if (time_to_inject(sbi, FAULT_KMALLOC)) {
+ erofs_show_injection_info(FAULT_KMALLOC);
+ return NULL;
+ }
+#endif
+ return kmalloc(size, flags);
+}
+
+#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
+#define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info)
+
+/* Mount flags set via mount options or defaults */
+#define EROFS_MOUNT_XATTR_USER 0x00000010
+#define EROFS_MOUNT_POSIX_ACL 0x00000020
+#define EROFS_MOUNT_FAULT_INJECTION 0x00000040
+
+#define clear_opt(sbi, option) ((sbi)->mount_opt &= ~EROFS_MOUNT_##option)
+#define set_opt(sbi, option) ((sbi)->mount_opt |= EROFS_MOUNT_##option)
+#define test_opt(sbi, option) ((sbi)->mount_opt & EROFS_MOUNT_##option)
+
+#ifdef CONFIG_EROFS_FS_ZIP
+#define erofs_workstn_lock(sbi) xa_lock(&(sbi)->workstn_tree)
+#define erofs_workstn_unlock(sbi) xa_unlock(&(sbi)->workstn_tree)
+
+/* basic unit of the workstation of a super_block */
+struct erofs_workgroup {
+ /* the workgroup index in the workstation */
+ pgoff_t index;
+
+ /* overall workgroup reference count */
+ atomic_t refcount;
+};
+
+#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL)
+
+static inline bool erofs_workgroup_try_to_freeze(
+ struct erofs_workgroup *grp, int v)
+{
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+ if (v != atomic_cmpxchg(&grp->refcount,
+ v, EROFS_LOCKED_MAGIC))
+ return false;
+ preempt_disable();
+#else
+ preempt_disable();
+ if (atomic_read(&grp->refcount) != v) {
+ preempt_enable();
+ return false;
+ }
+#endif
+ return true;
+}
+
+static inline void erofs_workgroup_unfreeze(
+ struct erofs_workgroup *grp, int v)
+{
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+ atomic_set(&grp->refcount, v);
+#endif
+ preempt_enable();
+}
+
+static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt)
+{
+ const int locked = (int)EROFS_LOCKED_MAGIC;
+ int o;
+
+repeat:
+ o = atomic_read(&grp->refcount);
+
+ /* spin if it is temporarily locked at the reclaim path */
+ if (unlikely(o == locked)) {
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+ do
+ cpu_relax();
+ while (atomic_read(&grp->refcount) == locked);
+#endif
+ goto repeat;
+ }
+
+ if (unlikely(o <= 0))
+ return -1;
+
+ if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
+ goto repeat;
+
+ *ocnt = o;
+ return 0;
+}
+
+#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
+
+extern int erofs_workgroup_put(struct erofs_workgroup *grp);
+
+extern struct erofs_workgroup *erofs_find_workgroup(
+ struct super_block *sb, pgoff_t index, bool *tag);
+
+extern int erofs_register_workgroup(struct super_block *sb,
+ struct erofs_workgroup *grp, bool tag);
+
+extern unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
+ unsigned long nr_shrink, bool cleanup);
+
+static inline void erofs_workstation_cleanup_all(struct super_block *sb)
+{
+ erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true);
+}
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+#define EROFS_UNALLOCATED_CACHED_PAGE ((void *)0x5F0EF00D)
+
+extern int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *egrp);
+extern int erofs_try_to_free_cached_page(struct address_space *mapping,
+ struct page *page);
+#endif
+
+#endif
+
+/* we strictly follow PAGE_SIZE and no buffer head yet */
+#define LOG_BLOCK_SIZE PAGE_SHIFT
+
+#undef LOG_SECTORS_PER_BLOCK
+#define LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9)
+
+#undef SECTORS_PER_BLOCK
+#define SECTORS_PER_BLOCK (1 << SECTORS_PER_BLOCK)
+
+#define EROFS_BLKSIZ (1 << LOG_BLOCK_SIZE)
+
+#if (EROFS_BLKSIZ % 4096 || !EROFS_BLKSIZ)
+#error erofs cannot be used in this platform
+#endif
+
+#define ROOT_NID(sb) ((sb)->root_nid)
+
+#ifdef CONFIG_EROFS_FS_ZIP
+/* hard limit of pages per compressed cluster */
+#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
+
+/* page count of a compressed cluster */
+#define erofs_clusterpages(sbi) ((1 << (sbi)->clusterbits) / PAGE_SIZE)
+#endif
+
+typedef u64 erofs_off_t;
+
+/* data type for filesystem-wide blocks number */
+typedef u32 erofs_blk_t;
+
+#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ)
+#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ)
+#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ)
+
+static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
+{
+ return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits);
+}
+
+#define inode_set_inited_xattr(inode) (EROFS_V(inode)->flags |= 1)
+#define inode_has_inited_xattr(inode) (EROFS_V(inode)->flags & 1)
+
+struct erofs_vnode {
+ erofs_nid_t nid;
+ unsigned int flags;
+
+ unsigned char data_mapping_mode;
+ /* inline size in bytes */
+ unsigned char inode_isize;
+ unsigned short xattr_isize;
+
+ unsigned xattr_shared_count;
+ unsigned *xattr_shared_xattrs;
+
+ erofs_blk_t raw_blkaddr;
+
+ /* the corresponding vfs inode */
+ struct inode vfs_inode;
+};
+
+#define EROFS_V(ptr) \
+ container_of(ptr, struct erofs_vnode, vfs_inode)
+
+#define __inode_advise(x, bit, bits) \
+ (((x) >> (bit)) & ((1 << (bits)) - 1))
+
+#define __inode_version(advise) \
+ __inode_advise(advise, EROFS_I_VERSION_BIT, \
+ EROFS_I_VERSION_BITS)
+
+#define __inode_data_mapping(advise) \
+ __inode_advise(advise, EROFS_I_DATA_MAPPING_BIT,\
+ EROFS_I_DATA_MAPPING_BITS)
+
+static inline unsigned long inode_datablocks(struct inode *inode)
+{
+ /* since i_size cannot be changed */
+ return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+}
+
+static inline bool is_inode_layout_plain(struct inode *inode)
+{
+ return EROFS_V(inode)->data_mapping_mode == EROFS_INODE_LAYOUT_PLAIN;
+}
+
+static inline bool is_inode_layout_compression(struct inode *inode)
+{
+ return EROFS_V(inode)->data_mapping_mode ==
+ EROFS_INODE_LAYOUT_COMPRESSION;
+}
+
+static inline bool is_inode_layout_inline(struct inode *inode)
+{
+ return EROFS_V(inode)->data_mapping_mode == EROFS_INODE_LAYOUT_INLINE;
+}
+
+extern const struct super_operations erofs_sops;
+extern const struct inode_operations erofs_dir_iops;
+extern const struct file_operations erofs_dir_fops;
+
+extern const struct address_space_operations erofs_raw_access_aops;
+#ifdef CONFIG_EROFS_FS_ZIP
+extern const struct address_space_operations z_erofs_vle_normalaccess_aops;
+#endif
+
+/*
+ * Logical to physical block mapping, used by erofs_map_blocks()
+ *
+ * Different with other file systems, it is used for 2 access modes:
+ *
+ * 1) RAW access mode:
+ *
+ * Users pass a valid (m_lblk, m_lofs -- usually 0) pair,
+ * and get the valid m_pblk, m_pofs and the longest m_len(in bytes).
+ *
+ * Note that m_lblk in the RAW access mode refers to the number of
+ * the compressed ondisk block rather than the uncompressed
+ * in-memory block for the compressed file.
+ *
+ * m_pofs equals to m_lofs except for the inline data page.
+ *
+ * 2) Normal access mode:
+ *
+ * If the inode is not compressed, it has no difference with
+ * the RAW access mode. However, if the inode is compressed,
+ * users should pass a valid (m_lblk, m_lofs) pair, and get
+ * the needed m_pblk, m_pofs, m_len to get the compressed data
+ * and the updated m_lblk, m_lofs which indicates the start
+ * of the corresponding uncompressed data in the file.
+ */
+enum {
+ BH_Zipped = BH_PrivateStart,
+};
+
+/* Has a disk mapping */
+#define EROFS_MAP_MAPPED (1 << BH_Mapped)
+/* Located in metadata (could be copied from bd_inode) */
+#define EROFS_MAP_META (1 << BH_Meta)
+/* The extent has been compressed */
+#define EROFS_MAP_ZIPPED (1 << BH_Zipped)
+
+struct erofs_map_blocks {
+ erofs_off_t m_pa, m_la;
+ u64 m_plen, m_llen;
+
+ unsigned int m_flags;
+};
+
+/* Flags used by erofs_map_blocks() */
+#define EROFS_GET_BLOCKS_RAW 0x0001
+
+/* data.c */
+static inline struct bio *prepare_bio(
+ struct super_block *sb,
+ erofs_blk_t blkaddr, unsigned nr_pages,
+ bio_end_io_t endio)
+{
+ gfp_t gfp = GFP_NOIO;
+ struct bio *bio = bio_alloc(gfp, nr_pages);
+
+ if (unlikely(bio == NULL) &&
+ (current->flags & PF_MEMALLOC)) {
+ do {
+ nr_pages /= 2;
+ if (unlikely(!nr_pages)) {
+ bio = bio_alloc(gfp | __GFP_NOFAIL, 1);
+ BUG_ON(bio == NULL);
+ break;
+ }
+ bio = bio_alloc(gfp, nr_pages);
+ } while (bio == NULL);
+ }
+
+ bio->bi_end_io = endio;
+ bio_set_dev(bio, sb->s_bdev);
+ bio->bi_iter.bi_sector = blkaddr << LOG_SECTORS_PER_BLOCK;
+ return bio;
+}
+
+static inline void __submit_bio(struct bio *bio, unsigned op, unsigned op_flags)
+{
+ bio_set_op_attrs(bio, op, op_flags);
+ submit_bio(bio);
+}
+
+extern struct page *erofs_get_meta_page(struct super_block *sb,
+ erofs_blk_t blkaddr, bool prio);
+extern int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int);
+extern int erofs_map_blocks_iter(struct inode *, struct erofs_map_blocks *,
+ struct page **, int);
+
+struct erofs_map_blocks_iter {
+ struct erofs_map_blocks map;
+ struct page *mpage;
+};
+
+
+static inline struct page *erofs_get_inline_page(struct inode *inode,
+ erofs_blk_t blkaddr)
+{
+ return erofs_get_meta_page(inode->i_sb,
+ blkaddr, S_ISDIR(inode->i_mode));
+}
+
+/* inode.c */
+extern struct inode *erofs_iget(struct super_block *sb,
+ erofs_nid_t nid, bool dir);
+
+/* dir.c */
+int erofs_namei(struct inode *dir, struct qstr *name,
+ erofs_nid_t *nid, unsigned *d_type);
+
+/* xattr.c */
+#ifdef CONFIG_EROFS_FS_XATTR
+extern const struct xattr_handler *erofs_xattr_handlers[];
+#endif
+
+/* symlink */
+#ifdef CONFIG_EROFS_FS_XATTR
+extern const struct inode_operations erofs_symlink_xattr_iops;
+extern const struct inode_operations erofs_fast_symlink_xattr_iops;
+extern const struct inode_operations erofs_special_inode_operations;
+#endif
+
+static inline void set_inode_fast_symlink(struct inode *inode)
+{
+#ifdef CONFIG_EROFS_FS_XATTR
+ inode->i_op = &erofs_fast_symlink_xattr_iops;
+#else
+ inode->i_op = &simple_symlink_inode_operations;
+#endif
+}
+
+static inline bool is_inode_fast_symlink(struct inode *inode)
+{
+#ifdef CONFIG_EROFS_FS_XATTR
+ return inode->i_op == &erofs_fast_symlink_xattr_iops;
+#else
+ return inode->i_op == &simple_symlink_inode_operations;
+#endif
+}
+
+static inline void *erofs_vmap(struct page **pages, unsigned int count)
+{
+#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM
+ int i = 0;
+
+ while (1) {
+ void *addr = vm_map_ram(pages, count, -1, PAGE_KERNEL);
+ /* retry two more times (totally 3 times) */
+ if (addr != NULL || ++i >= 3)
+ return addr;
+ vm_unmap_aliases();
+ }
+ return NULL;
+#else
+ return vmap(pages, count, VM_MAP, PAGE_KERNEL);
+#endif
+}
+
+static inline void erofs_vunmap(const void *mem, unsigned int count)
+{
+#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM
+ vm_unmap_ram(mem, count);
+#else
+ vunmap(mem);
+#endif
+}
+
+/* utils.c */
+extern struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
+
+extern void erofs_register_super(struct super_block *sb);
+extern void erofs_unregister_super(struct super_block *sb);
+
+extern unsigned long erofs_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+extern unsigned long erofs_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
+
+#ifndef lru_to_page
+#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+#endif
+
+#endif
+
diff --git a/drivers/staging/erofs/lz4defs.h b/drivers/staging/erofs/lz4defs.h
new file mode 100644
index 000000000000..00a0b58a0871
--- /dev/null
+++ b/drivers/staging/erofs/lz4defs.h
@@ -0,0 +1,227 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
+/*
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
+ *
+ * Changed for kernel usage by:
+ * Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+ */
+
+#include <asm/unaligned.h>
+#include <linux/string.h> /* memset, memcpy */
+
+#define FORCE_INLINE __always_inline
+
+/*-************************************
+ * Basic Types
+ **************************************/
+#include <linux/types.h>
+
+typedef uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+/*-************************************
+ * Architecture specifics
+ **************************************/
+#if defined(CONFIG_64BIT)
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+#if defined(__LITTLE_ENDIAN)
+#define LZ4_LITTLE_ENDIAN 1
+#else
+#define LZ4_LITTLE_ENDIAN 0
+#endif
+
+/*-************************************
+ * Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
+
+/* Increase this value ==> compression run slower on incompressible data */
+#define LZ4_SKIPTRIGGER 6
+
+#define HASH_UNIT sizeof(size_t)
+
+#define KB (1 << 10)
+#define MB (1 << 20)
+#define GB (1U << 30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS 4
+#define ML_MASK ((1U << ML_BITS) - 1)
+#define RUN_BITS (8 - ML_BITS)
+#define RUN_MASK ((1U << RUN_BITS) - 1)
+
+/*-************************************
+ * Reading and writing into memory
+ **************************************/
+static FORCE_INLINE U16 LZ4_read16(const void *ptr)
+{
+ return get_unaligned((const U16 *)ptr);
+}
+
+static FORCE_INLINE U32 LZ4_read32(const void *ptr)
+{
+ return get_unaligned((const U32 *)ptr);
+}
+
+static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
+{
+ return get_unaligned((const size_t *)ptr);
+}
+
+static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
+{
+ put_unaligned(value, (U16 *)memPtr);
+}
+
+static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
+{
+ put_unaligned(value, (U32 *)memPtr);
+}
+
+static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
+{
+ return get_unaligned_le16(memPtr);
+}
+
+static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
+{
+ return put_unaligned_le16(value, memPtr);
+}
+
+static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
+{
+#if LZ4_ARCH64
+ U64 a = get_unaligned((const U64 *)src);
+
+ put_unaligned(a, (U64 *)dst);
+#else
+ U32 a = get_unaligned((const U32 *)src);
+ U32 b = get_unaligned((const U32 *)src + 1);
+
+ put_unaligned(a, (U32 *)dst);
+ put_unaligned(b, (U32 *)dst + 1);
+#endif
+}
+
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
+ const void *srcPtr, void *dstEnd)
+{
+ BYTE *d = (BYTE *)dstPtr;
+ const BYTE *s = (const BYTE *)srcPtr;
+ BYTE *const e = (BYTE *)dstEnd;
+
+ do {
+ LZ4_copy8(d, s);
+ d += 8;
+ s += 8;
+ } while (d < e);
+}
+
+static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
+{
+#if LZ4_LITTLE_ENDIAN
+ return __ffs(val) >> 3;
+#else
+ return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
+#endif
+}
+
+static FORCE_INLINE unsigned int LZ4_count(
+ const BYTE *pIn,
+ const BYTE *pMatch,
+ const BYTE *pInLimit)
+{
+ const BYTE *const pStart = pIn;
+
+ while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
+ size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+ if (!diff) {
+ pIn += STEPSIZE;
+ pMatch += STEPSIZE;
+ continue;
+ }
+
+ pIn += LZ4_NbCommonBytes(diff);
+
+ return (unsigned int)(pIn - pStart);
+ }
+
+#if LZ4_ARCH64
+ if ((pIn < (pInLimit - 3))
+ && (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+ pIn += 4;
+ pMatch += 4;
+ }
+#endif
+
+ if ((pIn < (pInLimit - 1))
+ && (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+ pIn += 2;
+ pMatch += 2;
+ }
+
+ if ((pIn < pInLimit) && (*pMatch == *pIn))
+ pIn++;
+
+ return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+#endif
diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
new file mode 100644
index 000000000000..546a47156101
--- /dev/null
+++ b/drivers/staging/erofs/namei.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/namei.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "internal.h"
+#include "xattr.h"
+
+#include <trace/events/erofs.h>
+
+/* based on the value of qn->len is accurate */
+static inline int dirnamecmp(struct qstr *qn,
+ struct qstr *qd, unsigned *matched)
+{
+ unsigned i = *matched, len = min(qn->len, qd->len);
+loop:
+ if (unlikely(i >= len)) {
+ *matched = i;
+ if (qn->len < qd->len) {
+ /*
+ * actually (qn->len == qd->len)
+ * when qd->name[i] == '\0'
+ */
+ return qd->name[i] == '\0' ? 0 : -1;
+ }
+ return (qn->len > qd->len);
+ }
+
+ if (qn->name[i] != qd->name[i]) {
+ *matched = i;
+ return qn->name[i] > qd->name[i] ? 1 : -1;
+ }
+
+ ++i;
+ goto loop;
+}
+
+static struct erofs_dirent *find_target_dirent(
+ struct qstr *name,
+ u8 *data, int maxsize)
+{
+ unsigned ndirents, head, back;
+ unsigned startprfx, endprfx;
+ struct erofs_dirent *const de = (struct erofs_dirent *)data;
+
+ /* make sure that maxsize is valid */
+ BUG_ON(maxsize < sizeof(struct erofs_dirent));
+
+ ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
+
+ /* corrupted dir (may be unnecessary...) */
+ BUG_ON(!ndirents);
+
+ head = 0;
+ back = ndirents - 1;
+ startprfx = endprfx = 0;
+
+ while (head <= back) {
+ unsigned mid = head + (back - head) / 2;
+ unsigned nameoff = le16_to_cpu(de[mid].nameoff);
+ unsigned matched = min(startprfx, endprfx);
+
+ struct qstr dname = QSTR_INIT(data + nameoff,
+ unlikely(mid >= ndirents - 1) ?
+ maxsize - nameoff :
+ le16_to_cpu(de[mid + 1].nameoff) - nameoff);
+
+ /* string comparison without already matched prefix */
+ int ret = dirnamecmp(name, &dname, &matched);
+
+ if (unlikely(!ret))
+ return de + mid;
+ else if (ret > 0) {
+ head = mid + 1;
+ startprfx = matched;
+ } else if (unlikely(mid < 1)) /* fix "mid" overflow */
+ break;
+ else {
+ back = mid - 1;
+ endprfx = matched;
+ }
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct page *find_target_block_classic(
+ struct inode *dir,
+ struct qstr *name, int *_diff)
+{
+ unsigned startprfx, endprfx;
+ unsigned head, back;
+ struct address_space *const mapping = dir->i_mapping;
+ struct page *candidate = ERR_PTR(-ENOENT);
+
+ startprfx = endprfx = 0;
+ head = 0;
+ back = inode_datablocks(dir) - 1;
+
+ while (head <= back) {
+ unsigned mid = head + (back - head) / 2;
+ struct page *page = read_mapping_page(mapping, mid, NULL);
+
+ if (IS_ERR(page)) {
+exact_out:
+ if (!IS_ERR(candidate)) /* valid candidate */
+ put_page(candidate);
+ return page;
+ } else {
+ int diff;
+ unsigned ndirents, matched;
+ struct qstr dname;
+ struct erofs_dirent *de = kmap_atomic(page);
+ unsigned nameoff = le16_to_cpu(de->nameoff);
+
+ ndirents = nameoff / sizeof(*de);
+
+ /* corrupted dir (should have one entry at least) */
+ BUG_ON(!ndirents || nameoff > PAGE_SIZE);
+
+ matched = min(startprfx, endprfx);
+
+ dname.name = (u8 *)de + nameoff;
+ dname.len = ndirents == 1 ?
+ /* since the rest of the last page is 0 */
+ EROFS_BLKSIZ - nameoff
+ : le16_to_cpu(de[1].nameoff) - nameoff;
+
+ /* string comparison without already matched prefix */
+ diff = dirnamecmp(name, &dname, &matched);
+ kunmap_atomic(de);
+
+ if (unlikely(!diff)) {
+ *_diff = 0;
+ goto exact_out;
+ } else if (diff > 0) {
+ head = mid + 1;
+ startprfx = matched;
+
+ if (likely(!IS_ERR(candidate)))
+ put_page(candidate);
+ candidate = page;
+ } else {
+ put_page(page);
+
+ if (unlikely(mid < 1)) /* fix "mid" overflow */
+ break;
+
+ back = mid - 1;
+ endprfx = matched;
+ }
+ }
+ }
+ *_diff = 1;
+ return candidate;
+}
+
+int erofs_namei(struct inode *dir,
+ struct qstr *name,
+ erofs_nid_t *nid, unsigned *d_type)
+{
+ int diff;
+ struct page *page;
+ u8 *data;
+ struct erofs_dirent *de;
+
+ if (unlikely(!dir->i_size))
+ return -ENOENT;
+
+ diff = 1;
+ page = find_target_block_classic(dir, name, &diff);
+
+ if (unlikely(IS_ERR(page)))
+ return PTR_ERR(page);
+
+ data = kmap_atomic(page);
+ /* the target page has been mapped */
+ de = likely(diff) ?
+ /* since the rest of the last page is 0 */
+ find_target_dirent(name, data, EROFS_BLKSIZ) :
+ (struct erofs_dirent *)data;
+
+ if (likely(!IS_ERR(de))) {
+ *nid = le64_to_cpu(de->nid);
+ *d_type = de->file_type;
+ }
+
+ kunmap_atomic(data);
+ put_page(page);
+
+ return PTR_ERR_OR_ZERO(de);
+}
+
+/* NOTE: i_mutex is already held by vfs */
+static struct dentry *erofs_lookup(struct inode *dir,
+ struct dentry *dentry, unsigned int flags)
+{
+ int err;
+ erofs_nid_t nid;
+ unsigned d_type;
+ struct inode *inode;
+
+ DBG_BUGON(!d_really_is_negative(dentry));
+ /* dentry must be unhashed in lookup, no need to worry about */
+ DBG_BUGON(!d_unhashed(dentry));
+
+ trace_erofs_lookup(dir, dentry, flags);
+
+ /* file name exceeds fs limit */
+ if (unlikely(dentry->d_name.len > EROFS_NAME_LEN))
+ return ERR_PTR(-ENAMETOOLONG);
+
+ /* false uninitialized warnings on gcc 4.8.x */
+ err = erofs_namei(dir, &dentry->d_name, &nid, &d_type);
+
+ if (err == -ENOENT) {
+ /* negative dentry */
+ inode = NULL;
+ goto negative_out;
+ } else if (unlikely(err))
+ return ERR_PTR(err);
+
+ debugln("%s, %s (nid %llu) found, d_type %u", __func__,
+ dentry->d_name.name, nid, d_type);
+
+ inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+negative_out:
+ return d_splice_alias(inode, dentry);
+}
+
+const struct inode_operations erofs_dir_iops = {
+ .lookup = erofs_lookup,
+};
+
+const struct inode_operations erofs_dir_xattr_iops = {
+ .lookup = erofs_lookup,
+#ifdef CONFIG_EROFS_FS_XATTR
+ .listxattr = erofs_listxattr,
+#endif
+};
+
diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c
new file mode 100644
index 000000000000..1aec509c805f
--- /dev/null
+++ b/drivers/staging/erofs/super.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/super.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/statfs.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include "internal.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/erofs.h>
+
+static struct kmem_cache *erofs_inode_cachep __read_mostly;
+
+static void init_once(void *ptr)
+{
+ struct erofs_vnode *vi = ptr;
+
+ inode_init_once(&vi->vfs_inode);
+}
+
+static int erofs_init_inode_cache(void)
+{
+ erofs_inode_cachep = kmem_cache_create("erofs_inode",
+ sizeof(struct erofs_vnode), 0,
+ SLAB_RECLAIM_ACCOUNT, init_once);
+
+ return erofs_inode_cachep != NULL ? 0 : -ENOMEM;
+}
+
+static void erofs_exit_inode_cache(void)
+{
+ BUG_ON(erofs_inode_cachep == NULL);
+ kmem_cache_destroy(erofs_inode_cachep);
+}
+
+static struct inode *alloc_inode(struct super_block *sb)
+{
+ struct erofs_vnode *vi =
+ kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
+
+ if (vi == NULL)
+ return NULL;
+
+ /* zero out everything except vfs_inode */
+ memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode));
+ return &vi->vfs_inode;
+}
+
+static void i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct erofs_vnode *vi = EROFS_V(inode);
+
+ /* be careful RCU symlink path (see ext4_inode_info->i_data)! */
+ if (is_inode_fast_symlink(inode))
+ kfree(inode->i_link);
+
+ kfree(vi->xattr_shared_xattrs);
+
+ kmem_cache_free(erofs_inode_cachep, vi);
+}
+
+static void destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, i_callback);
+}
+
+static int superblock_read(struct super_block *sb)
+{
+ struct erofs_sb_info *sbi;
+ struct buffer_head *bh;
+ struct erofs_super_block *layout;
+ unsigned blkszbits;
+ int ret;
+
+ bh = sb_bread(sb, 0);
+
+ if (bh == NULL) {
+ errln("cannot read erofs superblock");
+ return -EIO;
+ }
+
+ sbi = EROFS_SB(sb);
+ layout = (struct erofs_super_block *)((u8 *)bh->b_data
+ + EROFS_SUPER_OFFSET);
+
+ ret = -EINVAL;
+ if (le32_to_cpu(layout->magic) != EROFS_SUPER_MAGIC_V1) {
+ errln("cannot find valid erofs superblock");
+ goto out;
+ }
+
+ blkszbits = layout->blkszbits;
+ /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
+ if (unlikely(blkszbits != LOG_BLOCK_SIZE)) {
+ errln("blksize %u isn't supported on this platform",
+ 1 << blkszbits);
+ goto out;
+ }
+
+ sbi->blocks = le32_to_cpu(layout->blocks);
+ sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
+#ifdef CONFIG_EROFS_FS_XATTR
+ sbi->xattr_blkaddr = le32_to_cpu(layout->xattr_blkaddr);
+#endif
+ sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
+#ifdef CONFIG_EROFS_FS_ZIP
+ sbi->clusterbits = 12;
+
+ if (1 << (sbi->clusterbits - 12) > Z_EROFS_CLUSTER_MAX_PAGES)
+ errln("clusterbits %u is not supported on this kernel",
+ sbi->clusterbits);
+#endif
+
+ sbi->root_nid = le16_to_cpu(layout->root_nid);
+ sbi->inos = le64_to_cpu(layout->inos);
+
+ sbi->build_time = le64_to_cpu(layout->build_time);
+ sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
+
+ memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
+ memcpy(sbi->volume_name, layout->volume_name,
+ sizeof(layout->volume_name));
+
+ ret = 0;
+out:
+ brelse(bh);
+ return ret;
+}
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+char *erofs_fault_name[FAULT_MAX] = {
+ [FAULT_KMALLOC] = "kmalloc",
+};
+
+static void erofs_build_fault_attr(struct erofs_sb_info *sbi,
+ unsigned int rate)
+{
+ struct erofs_fault_info *ffi = &sbi->fault_info;
+
+ if (rate) {
+ atomic_set(&ffi->inject_ops, 0);
+ ffi->inject_rate = rate;
+ ffi->inject_type = (1 << FAULT_MAX) - 1;
+ } else {
+ memset(ffi, 0, sizeof(struct erofs_fault_info));
+ }
+}
+#endif
+
+static void default_options(struct erofs_sb_info *sbi)
+{
+#ifdef CONFIG_EROFS_FS_XATTR
+ set_opt(sbi, XATTR_USER);
+#endif
+
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+ set_opt(sbi, POSIX_ACL);
+#endif
+}
+
+enum {
+ Opt_user_xattr,
+ Opt_nouser_xattr,
+ Opt_acl,
+ Opt_noacl,
+ Opt_fault_injection,
+ Opt_err
+};
+
+static match_table_t erofs_tokens = {
+ {Opt_user_xattr, "user_xattr"},
+ {Opt_nouser_xattr, "nouser_xattr"},
+ {Opt_acl, "acl"},
+ {Opt_noacl, "noacl"},
+ {Opt_fault_injection, "fault_injection=%u"},
+ {Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options)
+{
+ substring_t args[MAX_OPT_ARGS];
+ char *p;
+ int arg = 0;
+
+ if (!options)
+ return 0;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ args[0].to = args[0].from = NULL;
+ token = match_token(p, erofs_tokens, args);
+
+ switch (token) {
+#ifdef CONFIG_EROFS_FS_XATTR
+ case Opt_user_xattr:
+ set_opt(EROFS_SB(sb), XATTR_USER);
+ break;
+ case Opt_nouser_xattr:
+ clear_opt(EROFS_SB(sb), XATTR_USER);
+ break;
+#else
+ case Opt_user_xattr:
+ infoln("user_xattr options not supported");
+ break;
+ case Opt_nouser_xattr:
+ infoln("nouser_xattr options not supported");
+ break;
+#endif
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+ case Opt_acl:
+ set_opt(EROFS_SB(sb), POSIX_ACL);
+ break;
+ case Opt_noacl:
+ clear_opt(EROFS_SB(sb), POSIX_ACL);
+ break;
+#else
+ case Opt_acl:
+ infoln("acl options not supported");
+ break;
+ case Opt_noacl:
+ infoln("noacl options not supported");
+ break;
+#endif
+ case Opt_fault_injection:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+ erofs_build_fault_attr(EROFS_SB(sb), arg);
+ set_opt(EROFS_SB(sb), FAULT_INJECTION);
+#else
+ infoln("FAULT_INJECTION was not selected");
+#endif
+ break;
+ default:
+ errln("Unrecognized mount option \"%s\" "
+ "or missing value", p);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+
+static const struct address_space_operations managed_cache_aops;
+
+static int managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
+{
+ int ret = 1; /* 0 - busy */
+ struct address_space *const mapping = page->mapping;
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(mapping->a_ops != &managed_cache_aops);
+
+ if (PagePrivate(page))
+ ret = erofs_try_to_free_cached_page(mapping, page);
+
+ return ret;
+}
+
+static void managed_cache_invalidatepage(struct page *page,
+ unsigned int offset, unsigned int length)
+{
+ const unsigned int stop = length + offset;
+
+ BUG_ON(!PageLocked(page));
+
+ /* Check for overflow */
+ BUG_ON(stop > PAGE_SIZE || stop < length);
+
+ if (offset == 0 && stop == PAGE_SIZE)
+ while (!managed_cache_releasepage(page, GFP_NOFS))
+ cond_resched();
+}
+
+static const struct address_space_operations managed_cache_aops = {
+ .releasepage = managed_cache_releasepage,
+ .invalidatepage = managed_cache_invalidatepage,
+};
+
+static struct inode *erofs_init_managed_cache(struct super_block *sb)
+{
+ struct inode *inode = new_inode(sb);
+
+ if (unlikely(inode == NULL))
+ return ERR_PTR(-ENOMEM);
+
+ set_nlink(inode, 1);
+ inode->i_size = OFFSET_MAX;
+
+ inode->i_mapping->a_ops = &managed_cache_aops;
+ mapping_set_gfp_mask(inode->i_mapping,
+ GFP_NOFS | __GFP_HIGHMEM |
+ __GFP_MOVABLE | __GFP_NOFAIL);
+ return inode;
+}
+
+#endif
+
+static int erofs_read_super(struct super_block *sb,
+ const char *dev_name, void *data, int silent)
+{
+ struct inode *inode;
+ struct erofs_sb_info *sbi;
+ int err = -EINVAL;
+
+ infoln("read_super, device -> %s", dev_name);
+ infoln("options -> %s", (char *)data);
+
+ if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) {
+ errln("failed to set erofs blksize");
+ goto err;
+ }
+
+ sbi = kzalloc(sizeof(struct erofs_sb_info), GFP_KERNEL);
+ if (unlikely(sbi == NULL)) {
+ err = -ENOMEM;
+ goto err;
+ }
+ sb->s_fs_info = sbi;
+
+ err = superblock_read(sb);
+ if (err)
+ goto err_sbread;
+
+ sb->s_magic = EROFS_SUPER_MAGIC;
+ sb->s_flags |= MS_RDONLY | MS_NOATIME;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_time_gran = 1;
+
+ sb->s_op = &erofs_sops;
+
+#ifdef CONFIG_EROFS_FS_XATTR
+ sb->s_xattr = erofs_xattr_handlers;
+#endif
+
+ /* set erofs default mount options */
+ default_options(sbi);
+
+ err = parse_options(sb, data);
+ if (err)
+ goto err_parseopt;
+
+ if (!silent)
+ infoln("root inode @ nid %llu", ROOT_NID(sbi));
+
+#ifdef CONFIG_EROFS_FS_ZIP
+ INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
+#endif
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ sbi->managed_cache = erofs_init_managed_cache(sb);
+ if (IS_ERR(sbi->managed_cache)) {
+ err = PTR_ERR(sbi->managed_cache);
+ goto err_init_managed_cache;
+ }
+#endif
+
+ /* get the root inode */
+ inode = erofs_iget(sb, ROOT_NID(sbi), true);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto err_iget;
+ }
+
+ if (!S_ISDIR(inode->i_mode)) {
+ errln("rootino(nid %llu) is not a directory(i_mode %o)",
+ ROOT_NID(sbi), inode->i_mode);
+ err = -EINVAL;
+ goto err_isdir;
+ }
+
+ sb->s_root = d_make_root(inode);
+ if (sb->s_root == NULL) {
+ err = -ENOMEM;
+ goto err_makeroot;
+ }
+
+ /* save the device name to sbi */
+ sbi->dev_name = __getname();
+ if (sbi->dev_name == NULL) {
+ err = -ENOMEM;
+ goto err_devname;
+ }
+
+ snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name);
+ sbi->dev_name[PATH_MAX - 1] = '\0';
+
+ erofs_register_super(sb);
+
+ /*
+ * We already have a positive dentry, which was instantiated
+ * by d_make_root. Just need to d_rehash it.
+ */
+ d_rehash(sb->s_root);
+
+ if (!silent)
+ infoln("mounted on %s with opts: %s.", dev_name,
+ (char *)data);
+ return 0;
+ /*
+ * please add a label for each exit point and use
+ * the following name convention, thus new features
+ * can be integrated easily without renaming labels.
+ */
+err_devname:
+ dput(sb->s_root);
+err_makeroot:
+err_isdir:
+ if (sb->s_root == NULL)
+ iput(inode);
+err_iget:
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ iput(sbi->managed_cache);
+err_init_managed_cache:
+#endif
+err_parseopt:
+err_sbread:
+ sb->s_fs_info = NULL;
+ kfree(sbi);
+err:
+ return err;
+}
+
+/*
+ * could be triggered after deactivate_locked_super()
+ * is called, thus including umount and failed to initialize.
+ */
+static void erofs_put_super(struct super_block *sb)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+ /* for cases which are failed in "read_super" */
+ if (sbi == NULL)
+ return;
+
+ WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
+
+ infoln("unmounted for %s", sbi->dev_name);
+ __putname(sbi->dev_name);
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ iput(sbi->managed_cache);
+#endif
+
+ mutex_lock(&sbi->umount_mutex);
+
+#ifdef CONFIG_EROFS_FS_ZIP
+ erofs_workstation_cleanup_all(sb);
+#endif
+
+ erofs_unregister_super(sb);
+ mutex_unlock(&sbi->umount_mutex);
+
+ kfree(sbi);
+ sb->s_fs_info = NULL;
+}
+
+
+struct erofs_mount_private {
+ const char *dev_name;
+ char *options;
+};
+
+/* support mount_bdev() with options */
+static int erofs_fill_super(struct super_block *sb,
+ void *_priv, int silent)
+{
+ struct erofs_mount_private *priv = _priv;
+
+ return erofs_read_super(sb, priv->dev_name,
+ priv->options, silent);
+}
+
+static struct dentry *erofs_mount(
+ struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ struct erofs_mount_private priv = {
+ .dev_name = dev_name,
+ .options = data
+ };
+
+ return mount_bdev(fs_type, flags, dev_name,
+ &priv, erofs_fill_super);
+}
+
+static void erofs_kill_sb(struct super_block *sb)
+{
+ kill_block_super(sb);
+}
+
+static struct shrinker erofs_shrinker_info = {
+ .scan_objects = erofs_shrink_scan,
+ .count_objects = erofs_shrink_count,
+ .seeks = DEFAULT_SEEKS,
+};
+
+static struct file_system_type erofs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "erofs",
+ .mount = erofs_mount,
+ .kill_sb = erofs_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("erofs");
+
+#ifdef CONFIG_EROFS_FS_ZIP
+extern int z_erofs_init_zip_subsystem(void);
+extern void z_erofs_exit_zip_subsystem(void);
+#endif
+
+static int __init erofs_module_init(void)
+{
+ int err;
+
+ erofs_check_ondisk_layout_definitions();
+ infoln("initializing erofs " EROFS_VERSION);
+
+ err = erofs_init_inode_cache();
+ if (err)
+ goto icache_err;
+
+ err = register_shrinker(&erofs_shrinker_info);
+ if (err)
+ goto shrinker_err;
+
+#ifdef CONFIG_EROFS_FS_ZIP
+ err = z_erofs_init_zip_subsystem();
+ if (err)
+ goto zip_err;
+#endif
+
+ err = register_filesystem(&erofs_fs_type);
+ if (err)
+ goto fs_err;
+
+ infoln("successfully to initialize erofs");
+ return 0;
+
+fs_err:
+#ifdef CONFIG_EROFS_FS_ZIP
+ z_erofs_exit_zip_subsystem();
+zip_err:
+#endif
+ unregister_shrinker(&erofs_shrinker_info);
+shrinker_err:
+ erofs_exit_inode_cache();
+icache_err:
+ return err;
+}
+
+static void __exit erofs_module_exit(void)
+{
+ unregister_filesystem(&erofs_fs_type);
+#ifdef CONFIG_EROFS_FS_ZIP
+ z_erofs_exit_zip_subsystem();
+#endif
+ unregister_shrinker(&erofs_shrinker_info);
+ erofs_exit_inode_cache();
+ infoln("successfully finalize erofs");
+}
+
+/* get filesystem statistics */
+static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+ u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+
+ buf->f_type = sb->s_magic;
+ buf->f_bsize = EROFS_BLKSIZ;
+ buf->f_blocks = sbi->blocks;
+ buf->f_bfree = buf->f_bavail = 0;
+
+ buf->f_files = ULLONG_MAX;
+ buf->f_ffree = ULLONG_MAX - sbi->inos;
+
+ buf->f_namelen = EROFS_NAME_LEN;
+
+ buf->f_fsid.val[0] = (u32)id;
+ buf->f_fsid.val[1] = (u32)(id >> 32);
+ return 0;
+}
+
+static int erofs_show_options(struct seq_file *seq, struct dentry *root)
+{
+ struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
+
+#ifdef CONFIG_EROFS_FS_XATTR
+ if (test_opt(sbi, XATTR_USER))
+ seq_puts(seq, ",user_xattr");
+ else
+ seq_puts(seq, ",nouser_xattr");
+#endif
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+ if (test_opt(sbi, POSIX_ACL))
+ seq_puts(seq, ",acl");
+ else
+ seq_puts(seq, ",noacl");
+#endif
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+ if (test_opt(sbi, FAULT_INJECTION))
+ seq_printf(seq, ",fault_injection=%u",
+ sbi->fault_info.inject_rate);
+#endif
+ return 0;
+}
+
+static int erofs_remount(struct super_block *sb, int *flags, char *data)
+{
+ BUG_ON(!sb_rdonly(sb));
+
+ *flags |= MS_RDONLY;
+ return 0;
+}
+
+const struct super_operations erofs_sops = {
+ .put_super = erofs_put_super,
+ .alloc_inode = alloc_inode,
+ .destroy_inode = destroy_inode,
+ .statfs = erofs_statfs,
+ .show_options = erofs_show_options,
+ .remount_fs = erofs_remount,
+};
+
+module_init(erofs_module_init);
+module_exit(erofs_module_exit);
+
+MODULE_DESCRIPTION("Enhanced ROM File System");
+MODULE_AUTHOR("Gao Xiang, Yu Chao, Miao Xie, CONSUMER BG, HUAWEI Inc.");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/staging/erofs/unzip_lz4.c b/drivers/staging/erofs/unzip_lz4.c
new file mode 100644
index 000000000000..b1ea23f66c4e
--- /dev/null
+++ b/drivers/staging/erofs/unzip_lz4.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * linux/drivers/staging/erofs/unzip_lz4.c
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * Original code taken from 'linux/lib/lz4/lz4_decompress.c'
+ */
+
+/*
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
+ *
+ * Changed for kernel usage by:
+ * Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+ */
+#include "internal.h"
+#include <asm/unaligned.h>
+#include "lz4defs.h"
+
+/*
+ * no public solution to solve our requirement yet.
+ * see: <required buffer size for LZ4_decompress_safe_partial>
+ * https://groups.google.com/forum/#!topic/lz4c/_3kkz5N6n00
+ */
+static FORCE_INLINE int customized_lz4_decompress_safe_partial(
+ const void * const source,
+ void * const dest,
+ int inputSize,
+ int outputSize)
+{
+ /* Local Variables */
+ const BYTE *ip = (const BYTE *) source;
+ const BYTE * const iend = ip + inputSize;
+
+ BYTE *op = (BYTE *) dest;
+ BYTE * const oend = op + outputSize;
+ BYTE *cpy;
+
+ static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };
+ static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
+
+ /* Empty output buffer */
+ if (unlikely(outputSize == 0))
+ return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+ /* Main Loop : decode sequences */
+ while (1) {
+ size_t length;
+ const BYTE *match;
+ size_t offset;
+
+ /* get literal length */
+ unsigned int const token = *ip++;
+
+ length = token>>ML_BITS;
+
+ if (length == RUN_MASK) {
+ unsigned int s;
+
+ do {
+ s = *ip++;
+ length += s;
+ } while ((ip < iend - RUN_MASK) & (s == 255));
+
+ if (unlikely((size_t)(op + length) < (size_t)(op))) {
+ /* overflow detection */
+ goto _output_error;
+ }
+ if (unlikely((size_t)(ip + length) < (size_t)(ip))) {
+ /* overflow detection */
+ goto _output_error;
+ }
+ }
+
+ /* copy literals */
+ cpy = op + length;
+ if ((cpy > oend - WILDCOPYLENGTH) ||
+ (ip + length > iend - (2 + 1 + LASTLITERALS))) {
+ if (cpy > oend) {
+ memcpy(op, ip, length = oend - op);
+ op += length;
+ break;
+ }
+
+ if (unlikely(ip + length > iend)) {
+ /*
+ * Error :
+ * read attempt beyond
+ * end of input buffer
+ */
+ goto _output_error;
+ }
+
+ memcpy(op, ip, length);
+ ip += length;
+ op += length;
+
+ if (ip > iend - 2)
+ break;
+ /* Necessarily EOF, due to parsing restrictions */
+ /* break; */
+ } else {
+ LZ4_wildCopy(op, ip, cpy);
+ ip += length;
+ op = cpy;
+ }
+
+ /* get offset */
+ offset = LZ4_readLE16(ip);
+ ip += 2;
+ match = op - offset;
+
+ if (unlikely(match < (const BYTE *)dest)) {
+ /* Error : offset outside buffers */
+ goto _output_error;
+ }
+
+ /* get matchlength */
+ length = token & ML_MASK;
+ if (length == ML_MASK) {
+ unsigned int s;
+
+ do {
+ s = *ip++;
+
+ if (ip > iend - LASTLITERALS)
+ goto _output_error;
+
+ length += s;
+ } while (s == 255);
+
+ if (unlikely((size_t)(op + length) < (size_t)op)) {
+ /* overflow detection */
+ goto _output_error;
+ }
+ }
+
+ length += MINMATCH;
+
+ /* copy match within block */
+ cpy = op + length;
+
+ if (unlikely(cpy >= oend - WILDCOPYLENGTH)) {
+ if (cpy >= oend) {
+ while (op < oend)
+ *op++ = *match++;
+ break;
+ }
+ goto __match;
+ }
+
+ /* costs ~1%; silence an msan warning when offset == 0 */
+ LZ4_write32(op, (U32)offset);
+
+ if (unlikely(offset < 8)) {
+ const int dec64 = dec64table[offset];
+
+ op[0] = match[0];
+ op[1] = match[1];
+ op[2] = match[2];
+ op[3] = match[3];
+ match += dec32table[offset];
+ memcpy(op + 4, match, 4);
+ match -= dec64;
+ } else {
+ LZ4_copy8(op, match);
+ match += 8;
+ }
+
+ op += 8;
+
+ if (unlikely(cpy > oend - 12)) {
+ BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+ if (op < oCopyLimit) {
+ LZ4_wildCopy(op, match, oCopyLimit);
+ match += oCopyLimit - op;
+ op = oCopyLimit;
+ }
+__match:
+ while (op < cpy)
+ *op++ = *match++;
+ } else {
+ LZ4_copy8(op, match);
+
+ if (length > 16)
+ LZ4_wildCopy(op + 8, match + 8, cpy);
+ }
+
+ op = cpy; /* correction */
+ }
+ DBG_BUGON((void *)ip - source > inputSize);
+ DBG_BUGON((void *)op - dest > outputSize);
+
+ /* Nb of output bytes decoded */
+ return (int) ((void *)op - dest);
+
+ /* Overflow error detected */
+_output_error:
+ return -ERANGE;
+}
+
+int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen)
+{
+ int ret = customized_lz4_decompress_safe_partial(in,
+ out, inlen, outlen);
+
+ if (ret >= 0)
+ return ret;
+
+ /*
+ * LZ4_decompress_safe will return an error code
+ * (< 0) if decompression failed
+ */
+ errln("%s, failed to decompress, in[%p, %zu] outlen[%p, %zu]",
+ __func__, in, inlen, out, outlen);
+ WARN_ON(1);
+ print_hex_dump(KERN_DEBUG, "raw data [in]: ", DUMP_PREFIX_OFFSET,
+ 16, 1, in, inlen, true);
+ print_hex_dump(KERN_DEBUG, "raw data [out]: ", DUMP_PREFIX_OFFSET,
+ 16, 1, out, outlen, true);
+ return -EIO;
+}
+
diff --git a/drivers/staging/erofs/unzip_pagevec.h b/drivers/staging/erofs/unzip_pagevec.h
new file mode 100644
index 000000000000..0956615b86f7
--- /dev/null
+++ b/drivers/staging/erofs/unzip_pagevec.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * linux/drivers/staging/erofs/unzip_pagevec.h
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#ifndef __EROFS_UNZIP_PAGEVEC_H
+#define __EROFS_UNZIP_PAGEVEC_H
+
+#include <linux/tagptr.h>
+
+/* page type in pagevec for unzip subsystem */
+enum z_erofs_page_type {
+ /* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
+ Z_EROFS_PAGE_TYPE_EXCLUSIVE,
+
+ Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
+
+ Z_EROFS_VLE_PAGE_TYPE_HEAD,
+ Z_EROFS_VLE_PAGE_TYPE_MAX
+};
+
+extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
+ __bad_page_type_exclusive(void);
+
+/* pagevec tagged pointer */
+typedef tagptr2_t erofs_vtptr_t;
+
+/* pagevec collector */
+struct z_erofs_pagevec_ctor {
+ struct page *curr, *next;
+ erofs_vtptr_t *pages;
+
+ unsigned int nr, index;
+};
+
+static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
+ bool atomic)
+{
+ if (ctor->curr == NULL)
+ return;
+
+ if (atomic)
+ kunmap_atomic(ctor->pages);
+ else
+ kunmap(ctor->curr);
+}
+
+static inline struct page *
+z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
+ unsigned nr)
+{
+ unsigned index;
+
+ /* keep away from occupied pages */
+ if (ctor->next != NULL)
+ return ctor->next;
+
+ for (index = 0; index < nr; ++index) {
+ const erofs_vtptr_t t = ctor->pages[index];
+ const unsigned tags = tagptr_unfold_tags(t);
+
+ if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
+ return tagptr_unfold_ptr(t);
+ }
+
+ if (unlikely(nr >= ctor->nr))
+ BUG();
+
+ return NULL;
+}
+
+static inline void
+z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
+ bool atomic)
+{
+ struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
+
+ z_erofs_pagevec_ctor_exit(ctor, atomic);
+
+ ctor->curr = next;
+ ctor->next = NULL;
+ ctor->pages = atomic ?
+ kmap_atomic(ctor->curr) : kmap(ctor->curr);
+
+ ctor->nr = PAGE_SIZE / sizeof(struct page *);
+ ctor->index = 0;
+}
+
+static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
+ unsigned nr,
+ erofs_vtptr_t *pages, unsigned i)
+{
+ ctor->nr = nr;
+ ctor->curr = ctor->next = NULL;
+ ctor->pages = pages;
+
+ if (i >= nr) {
+ i -= nr;
+ z_erofs_pagevec_ctor_pagedown(ctor, false);
+ while (i > ctor->nr) {
+ i -= ctor->nr;
+ z_erofs_pagevec_ctor_pagedown(ctor, false);
+ }
+ }
+
+ ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
+ ctor->index = i;
+}
+
+static inline bool
+z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor,
+ struct page *page,
+ enum z_erofs_page_type type,
+ bool *occupied)
+{
+ *occupied = false;
+ if (unlikely(ctor->next == NULL && type))
+ if (ctor->index + 1 == ctor->nr)
+ return false;
+
+ if (unlikely(ctor->index >= ctor->nr))
+ z_erofs_pagevec_ctor_pagedown(ctor, false);
+
+ /* exclusive page type must be 0 */
+ if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
+ __bad_page_type_exclusive();
+
+ /* should remind that collector->next never equal to 1, 2 */
+ if (type == (uintptr_t)ctor->next) {
+ ctor->next = page;
+ *occupied = true;
+ }
+
+ ctor->pages[ctor->index++] =
+ tagptr_fold(erofs_vtptr_t, page, type);
+ return true;
+}
+
+static inline struct page *
+z_erofs_pagevec_ctor_dequeue(struct z_erofs_pagevec_ctor *ctor,
+ enum z_erofs_page_type *type)
+{
+ erofs_vtptr_t t;
+
+ if (unlikely(ctor->index >= ctor->nr)) {
+ BUG_ON(ctor->next == NULL);
+ z_erofs_pagevec_ctor_pagedown(ctor, true);
+ }
+
+ t = ctor->pages[ctor->index];
+
+ *type = tagptr_unfold_tags(t);
+
+ /* should remind that collector->next never equal to 1, 2 */
+ if (*type == (uintptr_t)ctor->next)
+ ctor->next = tagptr_unfold_ptr(t);
+
+ ctor->pages[ctor->index++] =
+ tagptr_fold(erofs_vtptr_t, NULL, 0);
+
+ return tagptr_unfold_ptr(t);
+}
+
+#endif
+
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
new file mode 100644
index 000000000000..8721f0a41d15
--- /dev/null
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -0,0 +1,1656 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/unzip_vle.c
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "unzip_vle.h"
+#include <linux/prefetch.h>
+
+static struct workqueue_struct *z_erofs_workqueue __read_mostly;
+static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly;
+
+void z_erofs_exit_zip_subsystem(void)
+{
+ BUG_ON(z_erofs_workqueue == NULL);
+ BUG_ON(z_erofs_workgroup_cachep == NULL);
+
+ destroy_workqueue(z_erofs_workqueue);
+ kmem_cache_destroy(z_erofs_workgroup_cachep);
+}
+
+static inline int init_unzip_workqueue(void)
+{
+ const unsigned onlinecpus = num_possible_cpus();
+
+ /*
+ * we don't need too many threads, limiting threads
+ * could improve scheduling performance.
+ */
+ z_erofs_workqueue = alloc_workqueue("erofs_unzipd",
+ WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
+ onlinecpus + onlinecpus / 4);
+
+ return z_erofs_workqueue != NULL ? 0 : -ENOMEM;
+}
+
+int z_erofs_init_zip_subsystem(void)
+{
+ z_erofs_workgroup_cachep =
+ kmem_cache_create("erofs_compress",
+ Z_EROFS_WORKGROUP_SIZE, 0,
+ SLAB_RECLAIM_ACCOUNT, NULL);
+
+ if (z_erofs_workgroup_cachep != NULL) {
+ if (!init_unzip_workqueue())
+ return 0;
+
+ kmem_cache_destroy(z_erofs_workgroup_cachep);
+ }
+ return -ENOMEM;
+}
+
+enum z_erofs_vle_work_role {
+ Z_EROFS_VLE_WORK_SECONDARY,
+ Z_EROFS_VLE_WORK_PRIMARY,
+ /*
+ * The current work has at least been linked with the following
+ * processed chained works, which means if the processing page
+ * is the tail partial page of the work, the current work can
+ * safely use the whole page, as illustrated below:
+ * +--------------+-------------------------------------------+
+ * | tail page | head page (of the previous work) |
+ * +--------------+-------------------------------------------+
+ * /\ which belongs to the current work
+ * [ (*) this page can be used for the current work itself. ]
+ */
+ Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
+ Z_EROFS_VLE_WORK_MAX
+};
+
+struct z_erofs_vle_work_builder {
+ enum z_erofs_vle_work_role role;
+ /*
+ * 'hosted = false' means that the current workgroup doesn't belong to
+ * the owned chained workgroups. In the other words, it is none of our
+ * business to submit this workgroup.
+ */
+ bool hosted;
+
+ struct z_erofs_vle_workgroup *grp;
+ struct z_erofs_vle_work *work;
+ struct z_erofs_pagevec_ctor vector;
+
+ /* pages used for reading the compressed data */
+ struct page **compressed_pages;
+ unsigned compressed_deficit;
+};
+
+#define VLE_WORK_BUILDER_INIT() \
+ { .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED }
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+
+static bool grab_managed_cache_pages(struct address_space *mapping,
+ erofs_blk_t start,
+ struct page **compressed_pages,
+ int clusterblks,
+ bool reserve_allocation)
+{
+ bool noio = true;
+ unsigned int i;
+
+ /* TODO: optimize by introducing find_get_pages_range */
+ for (i = 0; i < clusterblks; ++i) {
+ struct page *page, *found;
+
+ if (READ_ONCE(compressed_pages[i]) != NULL)
+ continue;
+
+ page = found = find_get_page(mapping, start + i);
+ if (found == NULL) {
+ noio = false;
+ if (!reserve_allocation)
+ continue;
+ page = EROFS_UNALLOCATED_CACHED_PAGE;
+ }
+
+ if (NULL == cmpxchg(compressed_pages + i, NULL, page))
+ continue;
+
+ if (found != NULL)
+ put_page(found);
+ }
+ return noio;
+}
+
+/* called by erofs_shrinker to get rid of all compressed_pages */
+int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *egrp)
+{
+ struct z_erofs_vle_workgroup *const grp =
+ container_of(egrp, struct z_erofs_vle_workgroup, obj);
+ struct address_space *const mapping = sbi->managed_cache->i_mapping;
+ const int clusterpages = erofs_clusterpages(sbi);
+ int i;
+
+ /*
+ * refcount of workgroup is now freezed as 1,
+ * therefore no need to worry about available decompression users.
+ */
+ for (i = 0; i < clusterpages; ++i) {
+ struct page *page = grp->compressed_pages[i];
+
+ if (page == NULL || page->mapping != mapping)
+ continue;
+
+ /* block other users from reclaiming or migrating the page */
+ if (!trylock_page(page))
+ return -EBUSY;
+
+ /* barrier is implied in the following 'unlock_page' */
+ WRITE_ONCE(grp->compressed_pages[i], NULL);
+
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+
+ unlock_page(page);
+ put_page(page);
+ }
+ return 0;
+}
+
+int erofs_try_to_free_cached_page(struct address_space *mapping,
+ struct page *page)
+{
+ struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb);
+ const unsigned int clusterpages = erofs_clusterpages(sbi);
+
+ struct z_erofs_vle_workgroup *grp;
+ int ret = 0; /* 0 - busy */
+
+ /* prevent the workgroup from being freed */
+ rcu_read_lock();
+ grp = (void *)page_private(page);
+
+ if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) {
+ unsigned int i;
+
+ for (i = 0; i < clusterpages; ++i) {
+ if (grp->compressed_pages[i] == page) {
+ WRITE_ONCE(grp->compressed_pages[i], NULL);
+ ret = 1;
+ break;
+ }
+ }
+ erofs_workgroup_unfreeze(&grp->obj, 1);
+ }
+ rcu_read_unlock();
+
+ if (ret) {
+ ClearPagePrivate(page);
+ put_page(page);
+ }
+ return ret;
+}
+#endif
+
+/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
+static inline bool try_to_reuse_as_compressed_page(
+ struct z_erofs_vle_work_builder *b,
+ struct page *page)
+{
+ while (b->compressed_deficit) {
+ --b->compressed_deficit;
+ if (NULL == cmpxchg(b->compressed_pages++, NULL, page))
+ return true;
+ }
+
+ return false;
+}
+
+/* callers must be with work->lock held */
+static int z_erofs_vle_work_add_page(
+ struct z_erofs_vle_work_builder *builder,
+ struct page *page,
+ enum z_erofs_page_type type)
+{
+ int ret;
+ bool occupied;
+
+ /* give priority for the compressed data storage */
+ if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY &&
+ type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
+ try_to_reuse_as_compressed_page(builder, page))
+ return 0;
+
+ ret = z_erofs_pagevec_ctor_enqueue(&builder->vector,
+ page, type, &occupied);
+ builder->work->vcnt += (unsigned)ret;
+
+ return ret ? 0 : -EAGAIN;
+}
+
+static inline bool try_to_claim_workgroup(
+ struct z_erofs_vle_workgroup *grp,
+ z_erofs_vle_owned_workgrp_t *owned_head,
+ bool *hosted)
+{
+ DBG_BUGON(*hosted == true);
+
+ /* let's claim these following types of workgroup */
+retry:
+ if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) {
+ /* type 1, nil workgroup */
+ if (Z_EROFS_VLE_WORKGRP_NIL != cmpxchg(&grp->next,
+ Z_EROFS_VLE_WORKGRP_NIL, *owned_head))
+ goto retry;
+
+ *owned_head = grp;
+ *hosted = true;
+ } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
+ /*
+ * type 2, link to the end of a existing open chain,
+ * be careful that its submission itself is governed
+ * by the original owned chain.
+ */
+ if (Z_EROFS_VLE_WORKGRP_TAIL != cmpxchg(&grp->next,
+ Z_EROFS_VLE_WORKGRP_TAIL, *owned_head))
+ goto retry;
+
+ *owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
+ } else
+ return false; /* :( better luck next time */
+
+ return true; /* lucky, I am the followee :) */
+}
+
+static struct z_erofs_vle_work *
+z_erofs_vle_work_lookup(struct super_block *sb,
+ pgoff_t idx, unsigned pageofs,
+ struct z_erofs_vle_workgroup **grp_ret,
+ enum z_erofs_vle_work_role *role,
+ z_erofs_vle_owned_workgrp_t *owned_head,
+ bool *hosted)
+{
+ bool tag, primary;
+ struct erofs_workgroup *egrp;
+ struct z_erofs_vle_workgroup *grp;
+ struct z_erofs_vle_work *work;
+
+ egrp = erofs_find_workgroup(sb, idx, &tag);
+ if (egrp == NULL) {
+ *grp_ret = NULL;
+ return NULL;
+ }
+
+ *grp_ret = grp = container_of(egrp,
+ struct z_erofs_vle_workgroup, obj);
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+ work = z_erofs_vle_grab_work(grp, pageofs);
+ primary = true;
+#else
+ BUG();
+#endif
+
+ DBG_BUGON(work->pageofs != pageofs);
+
+ /*
+ * lock must be taken first to avoid grp->next == NIL between
+ * claiming workgroup and adding pages:
+ * grp->next != NIL
+ * grp->next = NIL
+ * mutex_unlock_all
+ * mutex_lock(&work->lock)
+ * add all pages to pagevec
+ *
+ * [correct locking case 1]:
+ * mutex_lock(grp->work[a])
+ * ...
+ * mutex_lock(grp->work[b]) mutex_lock(grp->work[c])
+ * ... *role = SECONDARY
+ * add all pages to pagevec
+ * ...
+ * mutex_unlock(grp->work[c])
+ * mutex_lock(grp->work[c])
+ * ...
+ * grp->next = NIL
+ * mutex_unlock_all
+ *
+ * [correct locking case 2]:
+ * mutex_lock(grp->work[b])
+ * ...
+ * mutex_lock(grp->work[a])
+ * ...
+ * mutex_lock(grp->work[c])
+ * ...
+ * grp->next = NIL
+ * mutex_unlock_all
+ * mutex_lock(grp->work[a])
+ * *role = PRIMARY_OWNER
+ * add all pages to pagevec
+ * ...
+ */
+ mutex_lock(&work->lock);
+
+ *hosted = false;
+ if (!primary)
+ *role = Z_EROFS_VLE_WORK_SECONDARY;
+ /* claim the workgroup if possible */
+ else if (try_to_claim_workgroup(grp, owned_head, hosted))
+ *role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
+ else
+ *role = Z_EROFS_VLE_WORK_PRIMARY;
+
+ return work;
+}
+
+static struct z_erofs_vle_work *
+z_erofs_vle_work_register(struct super_block *sb,
+ struct z_erofs_vle_workgroup **grp_ret,
+ struct erofs_map_blocks *map,
+ pgoff_t index, unsigned pageofs,
+ enum z_erofs_vle_work_role *role,
+ z_erofs_vle_owned_workgrp_t *owned_head,
+ bool *hosted)
+{
+ bool newgrp = false;
+ struct z_erofs_vle_workgroup *grp = *grp_ret;
+ struct z_erofs_vle_work *work;
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+ BUG_ON(grp != NULL);
+#else
+ if (grp != NULL)
+ goto skip;
+#endif
+ /* no available workgroup, let's allocate one */
+ grp = kmem_cache_zalloc(z_erofs_workgroup_cachep, GFP_NOFS);
+ if (unlikely(grp == NULL))
+ return ERR_PTR(-ENOMEM);
+
+ grp->obj.index = index;
+ grp->llen = map->m_llen;
+
+ z_erofs_vle_set_workgrp_fmt(grp,
+ (map->m_flags & EROFS_MAP_ZIPPED) ?
+ Z_EROFS_VLE_WORKGRP_FMT_LZ4 :
+ Z_EROFS_VLE_WORKGRP_FMT_PLAIN);
+ atomic_set(&grp->obj.refcount, 1);
+
+ /* new workgrps have been claimed as type 1 */
+ WRITE_ONCE(grp->next, *owned_head);
+ /* primary and followed work for all new workgrps */
+ *role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
+ /* it should be submitted by ourselves */
+ *hosted = true;
+
+ newgrp = true;
+#ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
+skip:
+ /* currently unimplemented */
+ BUG();
+#else
+ work = z_erofs_vle_grab_primary_work(grp);
+#endif
+ work->pageofs = pageofs;
+
+ mutex_init(&work->lock);
+
+ if (newgrp) {
+ int err = erofs_register_workgroup(sb, &grp->obj, 0);
+
+ if (err) {
+ kmem_cache_free(z_erofs_workgroup_cachep, grp);
+ return ERR_PTR(-EAGAIN);
+ }
+ }
+
+ *owned_head = *grp_ret = grp;
+
+ mutex_lock(&work->lock);
+ return work;
+}
+
+static inline void __update_workgrp_llen(struct z_erofs_vle_workgroup *grp,
+ unsigned int llen)
+{
+ while (1) {
+ unsigned int orig_llen = grp->llen;
+
+ if (orig_llen >= llen || orig_llen ==
+ cmpxchg(&grp->llen, orig_llen, llen))
+ break;
+ }
+}
+
+#define builder_is_followed(builder) \
+ ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
+
+static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder,
+ struct super_block *sb,
+ struct erofs_map_blocks *map,
+ z_erofs_vle_owned_workgrp_t *owned_head)
+{
+ const unsigned clusterpages = erofs_clusterpages(EROFS_SB(sb));
+ const erofs_blk_t index = erofs_blknr(map->m_pa);
+ const unsigned pageofs = map->m_la & ~PAGE_MASK;
+ struct z_erofs_vle_workgroup *grp;
+ struct z_erofs_vle_work *work;
+
+ DBG_BUGON(builder->work != NULL);
+
+ /* must be Z_EROFS_WORK_TAIL or the next chained work */
+ DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL);
+ DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+
+ DBG_BUGON(erofs_blkoff(map->m_pa));
+
+repeat:
+ work = z_erofs_vle_work_lookup(sb, index,
+ pageofs, &grp, &builder->role, owned_head, &builder->hosted);
+ if (work != NULL) {
+ __update_workgrp_llen(grp, map->m_llen);
+ goto got_it;
+ }
+
+ work = z_erofs_vle_work_register(sb, &grp, map, index, pageofs,
+ &builder->role, owned_head, &builder->hosted);
+
+ if (unlikely(work == ERR_PTR(-EAGAIN)))
+ goto repeat;
+
+ if (unlikely(IS_ERR(work)))
+ return PTR_ERR(work);
+got_it:
+ z_erofs_pagevec_ctor_init(&builder->vector,
+ Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, work->vcnt);
+
+ if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) {
+ /* enable possibly in-place decompression */
+ builder->compressed_pages = grp->compressed_pages;
+ builder->compressed_deficit = clusterpages;
+ } else {
+ builder->compressed_pages = NULL;
+ builder->compressed_deficit = 0;
+ }
+
+ builder->grp = grp;
+ builder->work = work;
+ return 0;
+}
+
+/*
+ * keep in mind that no referenced workgroups will be freed
+ * only after a RCU grace period, so rcu_read_lock() could
+ * prevent a workgroup from being freed.
+ */
+static void z_erofs_rcu_callback(struct rcu_head *head)
+{
+ struct z_erofs_vle_work *work = container_of(head,
+ struct z_erofs_vle_work, rcu);
+ struct z_erofs_vle_workgroup *grp =
+ z_erofs_vle_work_workgroup(work, true);
+
+ kmem_cache_free(z_erofs_workgroup_cachep, grp);
+}
+
+void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
+{
+ struct z_erofs_vle_workgroup *const vgrp = container_of(grp,
+ struct z_erofs_vle_workgroup, obj);
+ struct z_erofs_vle_work *const work = &vgrp->work;
+
+ call_rcu(&work->rcu, z_erofs_rcu_callback);
+}
+
+static void __z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp,
+ struct z_erofs_vle_work *work __maybe_unused)
+{
+ erofs_workgroup_put(&grp->obj);
+}
+
+void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
+{
+ struct z_erofs_vle_workgroup *grp =
+ z_erofs_vle_work_workgroup(work, true);
+
+ __z_erofs_vle_work_release(grp, work);
+}
+
+static inline bool
+z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder)
+{
+ struct z_erofs_vle_work *work = builder->work;
+
+ if (work == NULL)
+ return false;
+
+ z_erofs_pagevec_ctor_exit(&builder->vector, false);
+ mutex_unlock(&work->lock);
+
+ /*
+ * if all pending pages are added, don't hold work reference
+ * any longer if the current work isn't hosted by ourselves.
+ */
+ if (!builder->hosted)
+ __z_erofs_vle_work_release(builder->grp, work);
+
+ builder->work = NULL;
+ builder->grp = NULL;
+ return true;
+}
+
+static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
+ gfp_t gfp)
+{
+ struct page *page = erofs_allocpage(pagepool, gfp);
+
+ if (unlikely(page == NULL))
+ return NULL;
+
+ page->mapping = Z_EROFS_MAPPING_STAGING;
+ return page;
+}
+
+struct z_erofs_vle_frontend {
+ struct inode *const inode;
+
+ struct z_erofs_vle_work_builder builder;
+ struct erofs_map_blocks_iter m_iter;
+
+ z_erofs_vle_owned_workgrp_t owned_head;
+
+ bool initial;
+#if (EROFS_FS_ZIP_CACHE_LVL >= 2)
+ erofs_off_t cachedzone_la;
+#endif
+};
+
+#define VLE_FRONTEND_INIT(__i) { \
+ .inode = __i, \
+ .m_iter = { \
+ { .m_llen = 0, .m_plen = 0 }, \
+ .mpage = NULL \
+ }, \
+ .builder = VLE_WORK_BUILDER_INIT(), \
+ .owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \
+ .initial = true, }
+
+static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
+ struct page *page,
+ struct list_head *page_pool)
+{
+ struct super_block *const sb = fe->inode->i_sb;
+ struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
+ struct erofs_map_blocks_iter *const m = &fe->m_iter;
+ struct erofs_map_blocks *const map = &m->map;
+ struct z_erofs_vle_work_builder *const builder = &fe->builder;
+ const loff_t offset = page_offset(page);
+
+ bool tight = builder_is_followed(builder);
+ struct z_erofs_vle_work *work = builder->work;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ struct address_space *const mngda = sbi->managed_cache->i_mapping;
+ struct z_erofs_vle_workgroup *grp;
+ bool noio_outoforder;
+#endif
+
+ enum z_erofs_page_type page_type;
+ unsigned cur, end, spiltted, index;
+ int err;
+
+ /* register locked file pages as online pages in pack */
+ z_erofs_onlinepage_init(page);
+
+ spiltted = 0;
+ end = PAGE_SIZE;
+repeat:
+ cur = end - 1;
+
+ /* lucky, within the range of the current map_blocks */
+ if (offset + cur >= map->m_la &&
+ offset + cur < map->m_la + map->m_llen)
+ goto hitted;
+
+ /* go ahead the next map_blocks */
+ debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
+
+ if (!z_erofs_vle_work_iter_end(builder))
+ fe->initial = false;
+
+ map->m_la = offset + cur;
+ map->m_llen = 0;
+ err = erofs_map_blocks_iter(fe->inode, map, &m->mpage, 0);
+ if (unlikely(err))
+ goto err_out;
+
+ /* deal with hole (FIXME! broken now) */
+ if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
+ goto hitted;
+
+ DBG_BUGON(map->m_plen != 1 << sbi->clusterbits);
+ BUG_ON(erofs_blkoff(map->m_pa));
+
+ err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head);
+ if (unlikely(err))
+ goto err_out;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ grp = fe->builder.grp;
+
+ /* let's do out-of-order decompression for noio */
+ noio_outoforder = grab_managed_cache_pages(mngda,
+ erofs_blknr(map->m_pa),
+ grp->compressed_pages, erofs_blknr(map->m_plen),
+ /* compressed page caching selection strategy */
+ fe->initial | (EROFS_FS_ZIP_CACHE_LVL >= 2 ?
+ map->m_la < fe->cachedzone_la : 0));
+
+ if (noio_outoforder && builder_is_followed(builder))
+ builder->role = Z_EROFS_VLE_WORK_PRIMARY;
+#endif
+
+ tight &= builder_is_followed(builder);
+ work = builder->work;
+hitted:
+ cur = end - min_t(unsigned, offset + end - map->m_la, end);
+ if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
+ zero_user_segment(page, cur, end);
+ goto next_part;
+ }
+
+ /* let's derive page type */
+ page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
+ (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
+ (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
+ Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
+
+retry:
+ err = z_erofs_vle_work_add_page(builder, page, page_type);
+ /* should allocate an additional staging page for pagevec */
+ if (err == -EAGAIN) {
+ struct page *const newpage =
+ __stagingpage_alloc(page_pool, GFP_NOFS);
+
+ err = z_erofs_vle_work_add_page(builder,
+ newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE);
+ if (!err)
+ goto retry;
+ }
+
+ if (unlikely(err))
+ goto err_out;
+
+ index = page->index - map->m_la / PAGE_SIZE;
+
+ /* FIXME! avoid the last relundant fixup & endio */
+ z_erofs_onlinepage_fixup(page, index, true);
+ ++spiltted;
+
+ /* also update nr_pages and increase queued_pages */
+ work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1);
+next_part:
+ /* can be used for verification */
+ map->m_llen = offset + cur - map->m_la;
+
+ end = cur;
+ if (end > 0)
+ goto repeat;
+
+ /* FIXME! avoid the last relundant fixup & endio */
+ z_erofs_onlinepage_endio(page);
+
+ debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
+ __func__, page, spiltted, map->m_llen);
+ return 0;
+
+err_out:
+ /* TODO: the missing error handing cases */
+ return err;
+}
+
+static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
+{
+ tagptr1_t t = tagptr_init(tagptr1_t, ptr);
+ struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t);
+ bool background = tagptr_unfold_tags(t);
+
+ if (atomic_add_return(bios, &io->pending_bios))
+ return;
+
+ if (background)
+ queue_work(z_erofs_workqueue, &io->u.work);
+ else
+ wake_up(&io->u.wait);
+}
+
+static inline void z_erofs_vle_read_endio(struct bio *bio)
+{
+ const blk_status_t err = bio->bi_status;
+ unsigned i;
+ struct bio_vec *bvec;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ struct address_space *mngda = NULL;
+#endif
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
+ bool cachemngd = false;
+
+ DBG_BUGON(PageUptodate(page));
+ BUG_ON(page->mapping == NULL);
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ if (unlikely(mngda == NULL && !z_erofs_is_stagingpage(page))) {
+ struct inode *const inode = page->mapping->host;
+ struct super_block *const sb = inode->i_sb;
+
+ mngda = EROFS_SB(sb)->managed_cache->i_mapping;
+ }
+
+ /*
+ * If mngda has not gotten, it equals NULL,
+ * however, page->mapping never be NULL if working properly.
+ */
+ cachemngd = (page->mapping == mngda);
+#endif
+
+ if (unlikely(err))
+ SetPageError(page);
+ else if (cachemngd)
+ SetPageUptodate(page);
+
+ if (cachemngd)
+ unlock_page(page);
+ }
+
+ z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
+ bio_put(bio);
+}
+
+static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES];
+static DEFINE_MUTEX(z_pagemap_global_lock);
+
+static int z_erofs_vle_unzip(struct super_block *sb,
+ struct z_erofs_vle_workgroup *grp,
+ struct list_head *page_pool)
+{
+ struct erofs_sb_info *const sbi = EROFS_SB(sb);
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ struct address_space *const mngda = sbi->managed_cache->i_mapping;
+#endif
+ const unsigned clusterpages = erofs_clusterpages(sbi);
+
+ struct z_erofs_pagevec_ctor ctor;
+ unsigned nr_pages;
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+ unsigned sparsemem_pages = 0;
+#endif
+ struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES];
+ struct page **pages, **compressed_pages, *page;
+ unsigned i, llen;
+
+ enum z_erofs_page_type page_type;
+ bool overlapped;
+ struct z_erofs_vle_work *work;
+ void *vout;
+ int err;
+
+ might_sleep();
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+ work = z_erofs_vle_grab_primary_work(grp);
+#else
+ BUG();
+#endif
+ BUG_ON(!READ_ONCE(work->nr_pages));
+
+ mutex_lock(&work->lock);
+ nr_pages = work->nr_pages;
+
+ if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES))
+ pages = pages_onstack;
+ else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES &&
+ mutex_trylock(&z_pagemap_global_lock))
+ pages = z_pagemap_global;
+ else {
+repeat:
+ pages = kvmalloc_array(nr_pages,
+ sizeof(struct page *), GFP_KERNEL);
+
+ /* fallback to global pagemap for the lowmem scenario */
+ if (unlikely(pages == NULL)) {
+ if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES)
+ goto repeat;
+ else {
+ mutex_lock(&z_pagemap_global_lock);
+ pages = z_pagemap_global;
+ }
+ }
+ }
+
+ for (i = 0; i < nr_pages; ++i)
+ pages[i] = NULL;
+
+ z_erofs_pagevec_ctor_init(&ctor,
+ Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, 0);
+
+ for (i = 0; i < work->vcnt; ++i) {
+ unsigned pagenr;
+
+ page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type);
+
+ /* all pages in pagevec ought to be valid */
+ DBG_BUGON(page == NULL);
+ DBG_BUGON(page->mapping == NULL);
+
+ if (z_erofs_gather_if_stagingpage(page_pool, page))
+ continue;
+
+ if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
+ pagenr = 0;
+ else
+ pagenr = z_erofs_onlinepage_index(page);
+
+ BUG_ON(pagenr >= nr_pages);
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+ BUG_ON(pages[pagenr] != NULL);
+ ++sparsemem_pages;
+#endif
+ pages[pagenr] = page;
+ }
+
+ z_erofs_pagevec_ctor_exit(&ctor, true);
+
+ overlapped = false;
+ compressed_pages = grp->compressed_pages;
+
+ for (i = 0; i < clusterpages; ++i) {
+ unsigned pagenr;
+
+ page = compressed_pages[i];
+
+ /* all compressed pages ought to be valid */
+ DBG_BUGON(page == NULL);
+ DBG_BUGON(page->mapping == NULL);
+
+ if (z_erofs_is_stagingpage(page))
+ continue;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ else if (page->mapping == mngda) {
+ BUG_ON(PageLocked(page));
+ BUG_ON(!PageUptodate(page));
+ continue;
+ }
+#endif
+
+ /* only non-head page could be reused as a compressed page */
+ pagenr = z_erofs_onlinepage_index(page);
+
+ BUG_ON(pagenr >= nr_pages);
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+ BUG_ON(pages[pagenr] != NULL);
+ ++sparsemem_pages;
+#endif
+ pages[pagenr] = page;
+
+ overlapped = true;
+ }
+
+ llen = (nr_pages << PAGE_SHIFT) - work->pageofs;
+
+ if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) {
+ /* FIXME! this should be fixed in the future */
+ BUG_ON(grp->llen != llen);
+
+ err = z_erofs_vle_plain_copy(compressed_pages, clusterpages,
+ pages, nr_pages, work->pageofs);
+ goto out;
+ }
+
+ if (llen > grp->llen)
+ llen = grp->llen;
+
+ err = z_erofs_vle_unzip_fast_percpu(compressed_pages,
+ clusterpages, pages, llen, work->pageofs,
+ z_erofs_onlinepage_endio);
+ if (err != -ENOTSUPP)
+ goto out_percpu;
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+ if (sparsemem_pages >= nr_pages) {
+ BUG_ON(sparsemem_pages > nr_pages);
+ goto skip_allocpage;
+ }
+#endif
+
+ for (i = 0; i < nr_pages; ++i) {
+ if (pages[i] != NULL)
+ continue;
+
+ pages[i] = __stagingpage_alloc(page_pool, GFP_NOFS);
+ }
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+skip_allocpage:
+#endif
+ vout = erofs_vmap(pages, nr_pages);
+
+ err = z_erofs_vle_unzip_vmap(compressed_pages,
+ clusterpages, vout, llen, work->pageofs, overlapped);
+
+ erofs_vunmap(vout, nr_pages);
+
+out:
+ for (i = 0; i < nr_pages; ++i) {
+ page = pages[i];
+ DBG_BUGON(page->mapping == NULL);
+
+ /* recycle all individual staging pages */
+ if (z_erofs_gather_if_stagingpage(page_pool, page))
+ continue;
+
+ if (unlikely(err < 0))
+ SetPageError(page);
+
+ z_erofs_onlinepage_endio(page);
+ }
+
+out_percpu:
+ for (i = 0; i < clusterpages; ++i) {
+ page = compressed_pages[i];
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ if (page->mapping == mngda)
+ continue;
+#endif
+ /* recycle all individual staging pages */
+ (void)z_erofs_gather_if_stagingpage(page_pool, page);
+
+ WRITE_ONCE(compressed_pages[i], NULL);
+ }
+
+ if (pages == z_pagemap_global)
+ mutex_unlock(&z_pagemap_global_lock);
+ else if (unlikely(pages != pages_onstack))
+ kvfree(pages);
+
+ work->nr_pages = 0;
+ work->vcnt = 0;
+
+ /* all work locks MUST be taken before the following line */
+
+ WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL);
+
+ /* all work locks SHOULD be released right now */
+ mutex_unlock(&work->lock);
+
+ z_erofs_vle_work_release(work);
+ return err;
+}
+
+static void z_erofs_vle_unzip_all(struct super_block *sb,
+ struct z_erofs_vle_unzip_io *io,
+ struct list_head *page_pool)
+{
+ z_erofs_vle_owned_workgrp_t owned = io->head;
+
+ while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) {
+ struct z_erofs_vle_workgroup *grp;
+
+ /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
+ DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL);
+
+ /* no possible that 'owned' equals NULL */
+ DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL);
+
+ grp = owned;
+ owned = READ_ONCE(grp->next);
+
+ z_erofs_vle_unzip(sb, grp, page_pool);
+ }
+}
+
+static void z_erofs_vle_unzip_wq(struct work_struct *work)
+{
+ struct z_erofs_vle_unzip_io_sb *iosb = container_of(work,
+ struct z_erofs_vle_unzip_io_sb, io.u.work);
+ LIST_HEAD(page_pool);
+
+ BUG_ON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+ z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool);
+
+ put_pages_list(&page_pool);
+ kvfree(iosb);
+}
+
+static inline struct z_erofs_vle_unzip_io *
+prepare_io_handler(struct super_block *sb,
+ struct z_erofs_vle_unzip_io *io,
+ bool background)
+{
+ struct z_erofs_vle_unzip_io_sb *iosb;
+
+ if (!background) {
+ /* waitqueue available for foreground io */
+ BUG_ON(io == NULL);
+
+ init_waitqueue_head(&io->u.wait);
+ atomic_set(&io->pending_bios, 0);
+ goto out;
+ }
+
+ if (io != NULL)
+ BUG();
+ else {
+ /* allocate extra io descriptor for background io */
+ iosb = kvzalloc(sizeof(struct z_erofs_vle_unzip_io_sb),
+ GFP_KERNEL | __GFP_NOFAIL);
+ BUG_ON(iosb == NULL);
+
+ io = &iosb->io;
+ }
+
+ iosb->sb = sb;
+ INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
+out:
+ io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
+ return io;
+}
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+/* true - unlocked (noio), false - locked (need submit io) */
+static inline bool recover_managed_page(struct z_erofs_vle_workgroup *grp,
+ struct page *page)
+{
+ wait_on_page_locked(page);
+ if (PagePrivate(page) && PageUptodate(page))
+ return true;
+
+ lock_page(page);
+ if (unlikely(!PagePrivate(page))) {
+ set_page_private(page, (unsigned long)grp);
+ SetPagePrivate(page);
+ }
+ if (unlikely(PageUptodate(page))) {
+ unlock_page(page);
+ return true;
+ }
+ return false;
+}
+
+#define __FSIO_1 1
+#else
+#define __FSIO_1 0
+#endif
+
+static bool z_erofs_vle_submit_all(struct super_block *sb,
+ z_erofs_vle_owned_workgrp_t owned_head,
+ struct list_head *pagepool,
+ struct z_erofs_vle_unzip_io *fg_io,
+ bool force_fg)
+{
+ struct erofs_sb_info *const sbi = EROFS_SB(sb);
+ const unsigned clusterpages = erofs_clusterpages(sbi);
+ const gfp_t gfp = GFP_NOFS;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ struct address_space *const mngda = sbi->managed_cache->i_mapping;
+ struct z_erofs_vle_workgroup *lstgrp_noio = NULL, *lstgrp_io = NULL;
+#endif
+ struct z_erofs_vle_unzip_io *ios[1 + __FSIO_1];
+ struct bio *bio;
+ tagptr1_t bi_private;
+ /* since bio will be NULL, no need to initialize last_index */
+ pgoff_t uninitialized_var(last_index);
+ bool force_submit = false;
+ unsigned nr_bios;
+
+ if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL))
+ return false;
+
+ /*
+ * force_fg == 1, (io, fg_io[0]) no io, (io, fg_io[1]) need submit io
+ * force_fg == 0, (io, fg_io[0]) no io; (io[1], bg_io) need submit io
+ */
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ ios[0] = prepare_io_handler(sb, fg_io + 0, false);
+#endif
+
+ if (force_fg) {
+ ios[__FSIO_1] = prepare_io_handler(sb, fg_io + __FSIO_1, false);
+ bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 0);
+ } else {
+ ios[__FSIO_1] = prepare_io_handler(sb, NULL, true);
+ bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 1);
+ }
+
+ nr_bios = 0;
+ force_submit = false;
+ bio = NULL;
+
+ /* by default, all need io submission */
+ ios[__FSIO_1]->head = owned_head;
+
+ do {
+ struct z_erofs_vle_workgroup *grp;
+ struct page **compressed_pages, *oldpage, *page;
+ pgoff_t first_index;
+ unsigned i = 0;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ unsigned int noio = 0;
+ bool cachemngd;
+#endif
+ int err;
+
+ /* no possible 'owned_head' equals the following */
+ DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+ DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL);
+
+ grp = owned_head;
+
+ /* close the main owned chain at first */
+ owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
+ Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+
+ first_index = grp->obj.index;
+ compressed_pages = grp->compressed_pages;
+
+ force_submit |= (first_index != last_index + 1);
+repeat:
+ /* fulfill all compressed pages */
+ oldpage = page = READ_ONCE(compressed_pages[i]);
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ cachemngd = false;
+
+ if (page == EROFS_UNALLOCATED_CACHED_PAGE) {
+ cachemngd = true;
+ goto do_allocpage;
+ } else if (page != NULL) {
+ if (page->mapping != mngda)
+ BUG_ON(PageUptodate(page));
+ else if (recover_managed_page(grp, page)) {
+ /* page is uptodate, skip io submission */
+ force_submit = true;
+ ++noio;
+ goto skippage;
+ }
+ } else {
+do_allocpage:
+#else
+ if (page != NULL)
+ BUG_ON(PageUptodate(page));
+ else {
+#endif
+ page = __stagingpage_alloc(pagepool, gfp);
+
+ if (oldpage != cmpxchg(compressed_pages + i,
+ oldpage, page)) {
+ list_add(&page->lru, pagepool);
+ goto repeat;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ } else if (cachemngd && !add_to_page_cache_lru(page,
+ mngda, first_index + i, gfp)) {
+ set_page_private(page, (unsigned long)grp);
+ SetPagePrivate(page);
+#endif
+ }
+ }
+
+ if (bio != NULL && force_submit) {
+submit_bio_retry:
+ __submit_bio(bio, REQ_OP_READ, 0);
+ bio = NULL;
+ }
+
+ if (bio == NULL) {
+ bio = prepare_bio(sb, first_index + i,
+ BIO_MAX_PAGES, z_erofs_vle_read_endio);
+ bio->bi_private = tagptr_cast_ptr(bi_private);
+
+ ++nr_bios;
+ }
+
+ err = bio_add_page(bio, page, PAGE_SIZE, 0);
+ if (err < PAGE_SIZE)
+ goto submit_bio_retry;
+
+ force_submit = false;
+ last_index = first_index + i;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+skippage:
+#endif
+ if (++i < clusterpages)
+ goto repeat;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ if (noio < clusterpages) {
+ lstgrp_io = grp;
+ } else {
+ z_erofs_vle_owned_workgrp_t iogrp_next =
+ owned_head == Z_EROFS_VLE_WORKGRP_TAIL ?
+ Z_EROFS_VLE_WORKGRP_TAIL_CLOSED :
+ owned_head;
+
+ if (lstgrp_io == NULL)
+ ios[1]->head = iogrp_next;
+ else
+ WRITE_ONCE(lstgrp_io->next, iogrp_next);
+
+ if (lstgrp_noio == NULL)
+ ios[0]->head = grp;
+ else
+ WRITE_ONCE(lstgrp_noio->next, grp);
+
+ lstgrp_noio = grp;
+ }
+#endif
+ } while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL);
+
+ if (bio != NULL)
+ __submit_bio(bio, REQ_OP_READ, 0);
+
+#ifndef EROFS_FS_HAS_MANAGED_CACHE
+ BUG_ON(!nr_bios);
+#else
+ if (lstgrp_noio != NULL)
+ WRITE_ONCE(lstgrp_noio->next, Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+
+ if (!force_fg && !nr_bios) {
+ kvfree(container_of(ios[1],
+ struct z_erofs_vle_unzip_io_sb, io));
+ return true;
+ }
+#endif
+
+ z_erofs_vle_unzip_kickoff(tagptr_cast_ptr(bi_private), nr_bios);
+ return true;
+}
+
+static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f,
+ struct list_head *pagepool,
+ bool force_fg)
+{
+ struct super_block *sb = f->inode->i_sb;
+ struct z_erofs_vle_unzip_io io[1 + __FSIO_1];
+
+ if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg))
+ return;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ z_erofs_vle_unzip_all(sb, &io[0], pagepool);
+#endif
+ if (!force_fg)
+ return;
+
+ /* wait until all bios are completed */
+ wait_event(io[__FSIO_1].u.wait,
+ !atomic_read(&io[__FSIO_1].pending_bios));
+
+ /* let's synchronous decompression */
+ z_erofs_vle_unzip_all(sb, &io[__FSIO_1], pagepool);
+}
+
+static int z_erofs_vle_normalaccess_readpage(struct file *file,
+ struct page *page)
+{
+ struct inode *const inode = page->mapping->host;
+ struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
+ int err;
+ LIST_HEAD(pagepool);
+
+#if (EROFS_FS_ZIP_CACHE_LVL >= 2)
+ f.cachedzone_la = page->index << PAGE_SHIFT;
+#endif
+ err = z_erofs_do_read_page(&f, page, &pagepool);
+ (void)z_erofs_vle_work_iter_end(&f.builder);
+
+ if (err) {
+ errln("%s, failed to read, err [%d]", __func__, err);
+ goto out;
+ }
+
+ z_erofs_submit_and_unzip(&f, &pagepool, true);
+out:
+ if (f.m_iter.mpage != NULL)
+ put_page(f.m_iter.mpage);
+
+ /* clean up the remaining free pages */
+ put_pages_list(&pagepool);
+ return 0;
+}
+
+static inline int __z_erofs_vle_normalaccess_readpages(
+ struct file *filp,
+ struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages, bool sync)
+{
+ struct inode *const inode = mapping->host;
+
+ struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
+ gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
+ struct page *head = NULL;
+ LIST_HEAD(pagepool);
+
+#if (EROFS_FS_ZIP_CACHE_LVL >= 2)
+ f.cachedzone_la = lru_to_page(pages)->index << PAGE_SHIFT;
+#endif
+ for (; nr_pages; --nr_pages) {
+ struct page *page = lru_to_page(pages);
+
+ prefetchw(&page->flags);
+ list_del(&page->lru);
+
+ if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+ list_add(&page->lru, &pagepool);
+ continue;
+ }
+
+ BUG_ON(PagePrivate(page));
+ set_page_private(page, (unsigned long)head);
+ head = page;
+ }
+
+ while (head != NULL) {
+ struct page *page = head;
+ int err;
+
+ /* traversal in reverse order */
+ head = (void *)page_private(page);
+
+ err = z_erofs_do_read_page(&f, page, &pagepool);
+ if (err) {
+ struct erofs_vnode *vi = EROFS_V(inode);
+
+ errln("%s, readahead error at page %lu of nid %llu",
+ __func__, page->index, vi->nid);
+ }
+
+ put_page(page);
+ }
+
+ (void)z_erofs_vle_work_iter_end(&f.builder);
+
+ z_erofs_submit_and_unzip(&f, &pagepool, sync);
+
+ if (f.m_iter.mpage != NULL)
+ put_page(f.m_iter.mpage);
+
+ /* clean up the remaining free pages */
+ put_pages_list(&pagepool);
+ return 0;
+}
+
+static int z_erofs_vle_normalaccess_readpages(
+ struct file *filp,
+ struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ return __z_erofs_vle_normalaccess_readpages(filp,
+ mapping, pages, nr_pages,
+ nr_pages < 4 /* sync */);
+}
+
+const struct address_space_operations z_erofs_vle_normalaccess_aops = {
+ .readpage = z_erofs_vle_normalaccess_readpage,
+ .readpages = z_erofs_vle_normalaccess_readpages,
+};
+
+#define __vle_cluster_advise(x, bit, bits) \
+ ((le16_to_cpu(x) >> (bit)) & ((1 << (bits)) - 1))
+
+#define __vle_cluster_type(advise) __vle_cluster_advise(advise, \
+ Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT, Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS)
+
+enum {
+ Z_EROFS_VLE_CLUSTER_TYPE_PLAIN,
+ Z_EROFS_VLE_CLUSTER_TYPE_HEAD,
+ Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD,
+ Z_EROFS_VLE_CLUSTER_TYPE_RESERVED,
+ Z_EROFS_VLE_CLUSTER_TYPE_MAX
+};
+
+#define vle_cluster_type(di) \
+ __vle_cluster_type((di)->di_advise)
+
+static inline unsigned
+vle_compressed_index_clusterofs(unsigned clustersize,
+ struct z_erofs_vle_decompressed_index *di)
+{
+ debugln("%s, vle=%pK, advise=%x (type %u), clusterofs=%x blkaddr=%x",
+ __func__, di, di->di_advise, vle_cluster_type(di),
+ di->di_clusterofs, di->di_u.blkaddr);
+
+ switch (vle_cluster_type(di)) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ break;
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ return di->di_clusterofs;
+ default:
+ BUG_ON(1);
+ }
+ return clustersize;
+}
+
+static inline erofs_blk_t
+vle_extent_blkaddr(struct inode *inode, pgoff_t index)
+{
+ struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+ struct erofs_vnode *vi = EROFS_V(inode);
+
+ unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
+ vi->xattr_isize) + sizeof(struct erofs_extent_header) +
+ index * sizeof(struct z_erofs_vle_decompressed_index);
+
+ return erofs_blknr(iloc(sbi, vi->nid) + ofs);
+}
+
+static inline unsigned int
+vle_extent_blkoff(struct inode *inode, pgoff_t index)
+{
+ struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+ struct erofs_vnode *vi = EROFS_V(inode);
+
+ unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
+ vi->xattr_isize) + sizeof(struct erofs_extent_header) +
+ index * sizeof(struct z_erofs_vle_decompressed_index);
+
+ return erofs_blkoff(iloc(sbi, vi->nid) + ofs);
+}
+
+/*
+ * Variable-sized Logical Extent (Fixed Physical Cluster) Compression Mode
+ * ---
+ * VLE compression mode attempts to compress a number of logical data into
+ * a physical cluster with a fixed size.
+ * VLE compression mode uses "struct z_erofs_vle_decompressed_index".
+ */
+static erofs_off_t vle_get_logical_extent_head(
+ struct inode *inode,
+ struct page **page_iter,
+ void **kaddr_iter,
+ unsigned lcn, /* logical cluster number */
+ erofs_blk_t *pcn,
+ unsigned *flags)
+{
+ /* for extent meta */
+ struct page *page = *page_iter;
+ erofs_blk_t blkaddr = vle_extent_blkaddr(inode, lcn);
+ struct z_erofs_vle_decompressed_index *di;
+ unsigned long long ofs;
+ const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits;
+ const unsigned int clustersize = 1 << clusterbits;
+
+ if (page->index != blkaddr) {
+ kunmap_atomic(*kaddr_iter);
+ unlock_page(page);
+ put_page(page);
+
+ *page_iter = page = erofs_get_meta_page(inode->i_sb,
+ blkaddr, false);
+ *kaddr_iter = kmap_atomic(page);
+ }
+
+ di = *kaddr_iter + vle_extent_blkoff(inode, lcn);
+ switch (vle_cluster_type(di)) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ BUG_ON(!di->di_u.delta[0]);
+ BUG_ON(lcn < di->di_u.delta[0]);
+
+ ofs = vle_get_logical_extent_head(inode,
+ page_iter, kaddr_iter,
+ lcn - di->di_u.delta[0], pcn, flags);
+ break;
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ *flags ^= EROFS_MAP_ZIPPED;
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ /* clustersize should be a power of two */
+ ofs = ((unsigned long long)lcn << clusterbits) +
+ (le16_to_cpu(di->di_clusterofs) & (clustersize - 1));
+ *pcn = le32_to_cpu(di->di_u.blkaddr);
+ break;
+ default:
+ BUG_ON(1);
+ }
+ return ofs;
+}
+
+int z_erofs_map_blocks_iter(struct inode *inode,
+ struct erofs_map_blocks *map,
+ struct page **mpage_ret, int flags)
+{
+ /* logicial extent (start, end) offset */
+ unsigned long long ofs, end;
+ struct z_erofs_vle_decompressed_index *di;
+ erofs_blk_t e_blkaddr, pcn;
+ unsigned lcn, logical_cluster_ofs, cluster_type;
+ u32 ofs_rem;
+ struct page *mpage = *mpage_ret;
+ void *kaddr;
+ bool initial;
+ const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits;
+ const unsigned int clustersize = 1 << clusterbits;
+ int err = 0;
+
+ /* if both m_(l,p)len are 0, regularize l_lblk, l_lofs, etc... */
+ initial = !map->m_llen;
+
+ /* when trying to read beyond EOF, leave it unmapped */
+ if (unlikely(map->m_la >= inode->i_size)) {
+ BUG_ON(!initial);
+ map->m_llen = map->m_la + 1 - inode->i_size;
+ map->m_la = inode->i_size - 1;
+ map->m_flags = 0;
+ goto out;
+ }
+
+ debugln("%s, m_la %llu m_llen %llu --- start", __func__,
+ map->m_la, map->m_llen);
+
+ ofs = map->m_la + map->m_llen;
+
+ /* clustersize should be power of two */
+ lcn = ofs >> clusterbits;
+ ofs_rem = ofs & (clustersize - 1);
+
+ e_blkaddr = vle_extent_blkaddr(inode, lcn);
+
+ if (mpage == NULL || mpage->index != e_blkaddr) {
+ if (mpage != NULL)
+ put_page(mpage);
+
+ mpage = erofs_get_meta_page(inode->i_sb, e_blkaddr, false);
+ *mpage_ret = mpage;
+ } else {
+ lock_page(mpage);
+ DBG_BUGON(!PageUptodate(mpage));
+ }
+
+ kaddr = kmap_atomic(mpage);
+ di = kaddr + vle_extent_blkoff(inode, lcn);
+
+ debugln("%s, lcn %u e_blkaddr %u e_blkoff %u", __func__, lcn,
+ e_blkaddr, vle_extent_blkoff(inode, lcn));
+
+ logical_cluster_ofs = vle_compressed_index_clusterofs(clustersize, di);
+ if (!initial) {
+ /* [walking mode] 'map' has been already initialized */
+ map->m_llen += logical_cluster_ofs;
+ goto unmap_out;
+ }
+
+ /* by default, compressed */
+ map->m_flags |= EROFS_MAP_ZIPPED;
+
+ end = (u64)(lcn + 1) * clustersize;
+
+ cluster_type = vle_cluster_type(di);
+
+ switch (cluster_type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ if (ofs_rem >= logical_cluster_ofs)
+ map->m_flags ^= EROFS_MAP_ZIPPED;
+ /* fallthrough */
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ if (ofs_rem == logical_cluster_ofs) {
+ pcn = le32_to_cpu(di->di_u.blkaddr);
+ goto exact_hitted;
+ }
+
+ if (ofs_rem > logical_cluster_ofs) {
+ ofs = lcn * clustersize | logical_cluster_ofs;
+ pcn = le32_to_cpu(di->di_u.blkaddr);
+ break;
+ }
+
+ /* logical cluster number should be >= 1 */
+ if (unlikely(!lcn)) {
+ errln("invalid logical cluster 0 at nid %llu",
+ EROFS_V(inode)->nid);
+ err = -EIO;
+ goto unmap_out;
+ }
+ end = (lcn-- * clustersize) | logical_cluster_ofs;
+ /* fallthrough */
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ /* get the correspoinding first chunk */
+ ofs = vle_get_logical_extent_head(inode, mpage_ret,
+ &kaddr, lcn, &pcn, &map->m_flags);
+ mpage = *mpage_ret;
+ break;
+ default:
+ errln("unknown cluster type %u at offset %llu of nid %llu",
+ cluster_type, ofs, EROFS_V(inode)->nid);
+ err = -EIO;
+ goto unmap_out;
+ }
+
+ map->m_la = ofs;
+exact_hitted:
+ map->m_llen = end - ofs;
+ map->m_plen = clustersize;
+ map->m_pa = blknr_to_addr(pcn);
+ map->m_flags |= EROFS_MAP_MAPPED;
+unmap_out:
+ kunmap_atomic(kaddr);
+ unlock_page(mpage);
+out:
+ debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
+ __func__, map->m_la, map->m_pa,
+ map->m_llen, map->m_plen, map->m_flags);
+
+ /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
+ DBG_BUGON(err < 0);
+ return err;
+}
+
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
new file mode 100644
index 000000000000..393998500865
--- /dev/null
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * linux/drivers/staging/erofs/unzip_vle.h
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#ifndef __EROFS_FS_UNZIP_VLE_H
+#define __EROFS_FS_UNZIP_VLE_H
+
+#include "internal.h"
+#include "unzip_pagevec.h"
+
+/*
+ * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
+ * used for temporary allocated pages (via erofs_allocpage),
+ * in order to seperate those from NULL mapping (eg. truncated pages)
+ */
+#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
+
+#define z_erofs_is_stagingpage(page) \
+ ((page)->mapping == Z_EROFS_MAPPING_STAGING)
+
+static inline bool z_erofs_gather_if_stagingpage(struct list_head *page_pool,
+ struct page *page)
+{
+ if (z_erofs_is_stagingpage(page)) {
+ list_add(&page->lru, page_pool);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Structure fields follow one of the following exclusion rules.
+ *
+ * I: Modifiable by initialization/destruction paths and read-only
+ * for everyone else.
+ *
+ */
+
+#define Z_EROFS_VLE_INLINE_PAGEVECS 3
+
+struct z_erofs_vle_work {
+ /* struct z_erofs_vle_work *left, *right; */
+
+#ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
+ struct list_head list;
+
+ atomic_t refcount;
+#endif
+ struct mutex lock;
+
+ /* I: decompression offset in page */
+ unsigned short pageofs;
+ unsigned short nr_pages;
+
+ /* L: queued pages in pagevec[] */
+ unsigned vcnt;
+
+ union {
+ /* L: pagevec */
+ erofs_vtptr_t pagevec[Z_EROFS_VLE_INLINE_PAGEVECS];
+ struct rcu_head rcu;
+ };
+};
+
+#define Z_EROFS_VLE_WORKGRP_FMT_PLAIN 0
+#define Z_EROFS_VLE_WORKGRP_FMT_LZ4 1
+#define Z_EROFS_VLE_WORKGRP_FMT_MASK 1
+
+typedef struct z_erofs_vle_workgroup *z_erofs_vle_owned_workgrp_t;
+
+struct z_erofs_vle_workgroup {
+ struct erofs_workgroup obj;
+ struct z_erofs_vle_work work;
+
+ /* next owned workgroup */
+ z_erofs_vle_owned_workgrp_t next;
+
+ /* compressed pages (including multi-usage pages) */
+ struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];
+ unsigned int llen, flags;
+};
+
+/* let's avoid the valid 32-bit kernel addresses */
+
+/* the chained workgroup has't submitted io (still open) */
+#define Z_EROFS_VLE_WORKGRP_TAIL ((void *)0x5F0ECAFE)
+/* the chained workgroup has already submitted io */
+#define Z_EROFS_VLE_WORKGRP_TAIL_CLOSED ((void *)0x5F0EDEAD)
+
+#define Z_EROFS_VLE_WORKGRP_NIL (NULL)
+
+#define z_erofs_vle_workgrp_fmt(grp) \
+ ((grp)->flags & Z_EROFS_VLE_WORKGRP_FMT_MASK)
+
+static inline void z_erofs_vle_set_workgrp_fmt(
+ struct z_erofs_vle_workgroup *grp,
+ unsigned int fmt)
+{
+ grp->flags = fmt | (grp->flags & ~Z_EROFS_VLE_WORKGRP_FMT_MASK);
+}
+
+#ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
+#error multiref decompression is unimplemented yet
+#else
+
+#define z_erofs_vle_grab_primary_work(grp) (&(grp)->work)
+#define z_erofs_vle_grab_work(grp, pageofs) (&(grp)->work)
+#define z_erofs_vle_work_workgroup(wrk, primary) \
+ ((primary) ? container_of(wrk, \
+ struct z_erofs_vle_workgroup, work) : \
+ ({ BUG(); (void *)NULL; }))
+
+#endif
+
+#define Z_EROFS_WORKGROUP_SIZE sizeof(struct z_erofs_vle_workgroup)
+
+struct z_erofs_vle_unzip_io {
+ atomic_t pending_bios;
+ z_erofs_vle_owned_workgrp_t head;
+
+ union {
+ wait_queue_head_t wait;
+ struct work_struct work;
+ } u;
+};
+
+struct z_erofs_vle_unzip_io_sb {
+ struct z_erofs_vle_unzip_io io;
+ struct super_block *sb;
+};
+
+#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
+#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
+#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
+
+/*
+ * waiters (aka. ongoing_packs): # to unlock the page
+ * sub-index: 0 - for partial page, >= 1 full page sub-index
+ */
+typedef atomic_t z_erofs_onlinepage_t;
+
+/* type punning */
+union z_erofs_onlinepage_converter {
+ z_erofs_onlinepage_t *o;
+ unsigned long *v;
+};
+
+static inline unsigned z_erofs_onlinepage_index(struct page *page)
+{
+ union z_erofs_onlinepage_converter u;
+
+ BUG_ON(!PagePrivate(page));
+ u.v = &page_private(page);
+
+ return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+}
+
+static inline void z_erofs_onlinepage_init(struct page *page)
+{
+ union {
+ z_erofs_onlinepage_t o;
+ unsigned long v;
+ /* keep from being unlocked in advance */
+ } u = { .o = ATOMIC_INIT(1) };
+
+ set_page_private(page, u.v);
+ smp_wmb();
+ SetPagePrivate(page);
+}
+
+static inline void z_erofs_onlinepage_fixup(struct page *page,
+ uintptr_t index, bool down)
+{
+ unsigned long *p, o, v, id;
+repeat:
+ p = &page_private(page);
+ o = READ_ONCE(*p);
+
+ id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+ if (id) {
+ if (!index)
+ return;
+
+ BUG_ON(id != index);
+ }
+
+ v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
+ ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned)down);
+ if (cmpxchg(p, o, v) != o)
+ goto repeat;
+}
+
+static inline void z_erofs_onlinepage_endio(struct page *page)
+{
+ union z_erofs_onlinepage_converter u;
+ unsigned v;
+
+ BUG_ON(!PagePrivate(page));
+ u.v = &page_private(page);
+
+ v = atomic_dec_return(u.o);
+ if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
+ ClearPagePrivate(page);
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+
+ debugln("%s, page %p value %x", __func__, page, atomic_read(u.o));
+}
+
+#define Z_EROFS_VLE_VMAP_ONSTACK_PAGES \
+ min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
+#define Z_EROFS_VLE_VMAP_GLOBAL_PAGES 2048
+
+/* unzip_vle_lz4.c */
+extern int z_erofs_vle_plain_copy(struct page **compressed_pages,
+ unsigned clusterpages, struct page **pages,
+ unsigned nr_pages, unsigned short pageofs);
+
+extern int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
+ unsigned clusterpages, struct page **pages,
+ unsigned outlen, unsigned short pageofs,
+ void (*endio)(struct page *));
+
+extern int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
+ unsigned clusterpages, void *vaddr, unsigned llen,
+ unsigned short pageofs, bool overlapped);
+
+#endif
+
diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c
new file mode 100644
index 000000000000..f5b665f15be5
--- /dev/null
+++ b/drivers/staging/erofs/unzip_vle_lz4.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/unzip_vle_lz4.c
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "unzip_vle.h"
+
+#if Z_EROFS_CLUSTER_MAX_PAGES > Z_EROFS_VLE_INLINE_PAGEVECS
+#define EROFS_PERCPU_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES
+#else
+#define EROFS_PERCPU_NR_PAGES Z_EROFS_VLE_INLINE_PAGEVECS
+#endif
+
+static struct {
+ char data[PAGE_SIZE * EROFS_PERCPU_NR_PAGES];
+} erofs_pcpubuf[NR_CPUS];
+
+int z_erofs_vle_plain_copy(struct page **compressed_pages,
+ unsigned clusterpages,
+ struct page **pages,
+ unsigned nr_pages,
+ unsigned short pageofs)
+{
+ unsigned i, j;
+ void *src = NULL;
+ const unsigned righthalf = PAGE_SIZE - pageofs;
+ char *percpu_data;
+ bool mirrored[Z_EROFS_CLUSTER_MAX_PAGES] = { 0 };
+
+ preempt_disable();
+ percpu_data = erofs_pcpubuf[smp_processor_id()].data;
+
+ j = 0;
+ for (i = 0; i < nr_pages; j = i++) {
+ struct page *page = pages[i];
+ void *dst;
+
+ if (page == NULL) {
+ if (src != NULL) {
+ if (!mirrored[j])
+ kunmap_atomic(src);
+ src = NULL;
+ }
+ continue;
+ }
+
+ dst = kmap_atomic(page);
+
+ for (; j < clusterpages; ++j) {
+ if (compressed_pages[j] != page)
+ continue;
+
+ BUG_ON(mirrored[j]);
+ memcpy(percpu_data + j * PAGE_SIZE, dst, PAGE_SIZE);
+ mirrored[j] = true;
+ break;
+ }
+
+ if (i) {
+ if (src == NULL)
+ src = mirrored[i-1] ?
+ percpu_data + (i-1) * PAGE_SIZE :
+ kmap_atomic(compressed_pages[i-1]);
+
+ memcpy(dst, src + righthalf, pageofs);
+
+ if (!mirrored[i-1])
+ kunmap_atomic(src);
+
+ if (unlikely(i >= clusterpages)) {
+ kunmap_atomic(dst);
+ break;
+ }
+ }
+
+ if (!righthalf)
+ src = NULL;
+ else {
+ src = mirrored[i] ? percpu_data + i * PAGE_SIZE :
+ kmap_atomic(compressed_pages[i]);
+
+ memcpy(dst + pageofs, src, righthalf);
+ }
+
+ kunmap_atomic(dst);
+ }
+
+ if (src != NULL && !mirrored[j])
+ kunmap_atomic(src);
+
+ preempt_enable();
+ return 0;
+}
+
+extern int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen);
+
+int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
+ unsigned clusterpages,
+ struct page **pages,
+ unsigned outlen,
+ unsigned short pageofs,
+ void (*endio)(struct page *))
+{
+ void *vin, *vout;
+ unsigned nr_pages, i, j;
+ int ret;
+
+ if (outlen + pageofs > EROFS_PERCPU_NR_PAGES * PAGE_SIZE)
+ return -ENOTSUPP;
+
+ nr_pages = DIV_ROUND_UP(outlen + pageofs, PAGE_SIZE);
+
+ if (clusterpages == 1)
+ vin = kmap_atomic(compressed_pages[0]);
+ else
+ vin = erofs_vmap(compressed_pages, clusterpages);
+
+ preempt_disable();
+ vout = erofs_pcpubuf[smp_processor_id()].data;
+
+ ret = z_erofs_unzip_lz4(vin, vout + pageofs,
+ clusterpages * PAGE_SIZE, outlen);
+
+ if (ret >= 0) {
+ outlen = ret;
+ ret = 0;
+ }
+
+ for (i = 0; i < nr_pages; ++i) {
+ j = min((unsigned)PAGE_SIZE - pageofs, outlen);
+
+ if (pages[i] != NULL) {
+ if (ret < 0)
+ SetPageError(pages[i]);
+ else if (clusterpages == 1 && pages[i] == compressed_pages[0])
+ memcpy(vin + pageofs, vout + pageofs, j);
+ else {
+ void *dst = kmap_atomic(pages[i]);
+
+ memcpy(dst + pageofs, vout + pageofs, j);
+ kunmap_atomic(dst);
+ }
+ endio(pages[i]);
+ }
+ vout += PAGE_SIZE;
+ outlen -= j;
+ pageofs = 0;
+ }
+ preempt_enable();
+
+ if (clusterpages == 1)
+ kunmap_atomic(vin);
+ else
+ erofs_vunmap(vin, clusterpages);
+
+ return ret;
+}
+
+int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
+ unsigned clusterpages,
+ void *vout,
+ unsigned llen,
+ unsigned short pageofs,
+ bool overlapped)
+{
+ void *vin;
+ unsigned i;
+ int ret;
+
+ if (overlapped) {
+ preempt_disable();
+ vin = erofs_pcpubuf[smp_processor_id()].data;
+
+ for (i = 0; i < clusterpages; ++i) {
+ void *t = kmap_atomic(compressed_pages[i]);
+
+ memcpy(vin + PAGE_SIZE *i, t, PAGE_SIZE);
+ kunmap_atomic(t);
+ }
+ } else if (clusterpages == 1)
+ vin = kmap_atomic(compressed_pages[0]);
+ else {
+ vin = erofs_vmap(compressed_pages, clusterpages);
+ }
+
+ ret = z_erofs_unzip_lz4(vin, vout + pageofs,
+ clusterpages * PAGE_SIZE, llen);
+ if (ret > 0)
+ ret = 0;
+
+ if (!overlapped) {
+ if (clusterpages == 1)
+ kunmap_atomic(vin);
+ else {
+ erofs_vunmap(vin, clusterpages);
+ }
+ } else
+ preempt_enable();
+
+ return ret;
+}
+
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
new file mode 100644
index 000000000000..595cf90af9bb
--- /dev/null
+++ b/drivers/staging/erofs/utils.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/utils.c
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include "internal.h"
+#include <linux/pagevec.h>
+
+struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
+{
+ struct page *page;
+
+ if (!list_empty(pool)) {
+ page = lru_to_page(pool);
+ list_del(&page->lru);
+ } else {
+ page = alloc_pages(gfp | __GFP_NOFAIL, 0);
+
+ BUG_ON(page == NULL);
+ BUG_ON(page->mapping != NULL);
+ }
+ return page;
+}
+
+/* global shrink count (for all mounted EROFS instances) */
+static atomic_long_t erofs_global_shrink_cnt;
+
+#ifdef CONFIG_EROFS_FS_ZIP
+
+/* radix_tree and the future XArray both don't use tagptr_t yet */
+struct erofs_workgroup *erofs_find_workgroup(
+ struct super_block *sb, pgoff_t index, bool *tag)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+ struct erofs_workgroup *grp;
+ int oldcount;
+
+repeat:
+ rcu_read_lock();
+ grp = radix_tree_lookup(&sbi->workstn_tree, index);
+ if (grp != NULL) {
+ *tag = radix_tree_exceptional_entry(grp);
+ grp = (void *)((unsigned long)grp &
+ ~RADIX_TREE_EXCEPTIONAL_ENTRY);
+
+ if (erofs_workgroup_get(grp, &oldcount)) {
+ /* prefer to relax rcu read side */
+ rcu_read_unlock();
+ goto repeat;
+ }
+
+ /* decrease refcount added by erofs_workgroup_put */
+ if (unlikely(oldcount == 1))
+ atomic_long_dec(&erofs_global_shrink_cnt);
+ BUG_ON(index != grp->index);
+ }
+ rcu_read_unlock();
+ return grp;
+}
+
+int erofs_register_workgroup(struct super_block *sb,
+ struct erofs_workgroup *grp,
+ bool tag)
+{
+ struct erofs_sb_info *sbi;
+ int err;
+
+ /* grp->refcount should not < 1 */
+ BUG_ON(!atomic_read(&grp->refcount));
+
+ err = radix_tree_preload(GFP_NOFS);
+ if (err)
+ return err;
+
+ sbi = EROFS_SB(sb);
+ erofs_workstn_lock(sbi);
+
+ if (tag)
+ grp = (void *)((unsigned long)grp |
+ 1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
+
+ err = radix_tree_insert(&sbi->workstn_tree,
+ grp->index, grp);
+
+ if (!err) {
+ __erofs_workgroup_get(grp);
+ }
+
+ erofs_workstn_unlock(sbi);
+ radix_tree_preload_end();
+ return err;
+}
+
+extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
+
+int erofs_workgroup_put(struct erofs_workgroup *grp)
+{
+ int count = atomic_dec_return(&grp->refcount);
+
+ if (count == 1)
+ atomic_long_inc(&erofs_global_shrink_cnt);
+ else if (!count) {
+ atomic_long_dec(&erofs_global_shrink_cnt);
+ erofs_workgroup_free_rcu(grp);
+ }
+ return count;
+}
+
+unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
+ unsigned long nr_shrink,
+ bool cleanup)
+{
+ pgoff_t first_index = 0;
+ void *batch[PAGEVEC_SIZE];
+ unsigned freed = 0;
+
+ int i, found;
+repeat:
+ erofs_workstn_lock(sbi);
+
+ found = radix_tree_gang_lookup(&sbi->workstn_tree,
+ batch, first_index, PAGEVEC_SIZE);
+
+ for (i = 0; i < found; ++i) {
+ int cnt;
+ struct erofs_workgroup *grp = (void *)
+ ((unsigned long)batch[i] &
+ ~RADIX_TREE_EXCEPTIONAL_ENTRY);
+
+ first_index = grp->index + 1;
+
+ cnt = atomic_read(&grp->refcount);
+ BUG_ON(cnt <= 0);
+
+ if (cleanup)
+ BUG_ON(cnt != 1);
+
+#ifndef EROFS_FS_HAS_MANAGED_CACHE
+ else if (cnt > 1)
+#else
+ if (!erofs_workgroup_try_to_freeze(grp, 1))
+#endif
+ continue;
+
+ if (radix_tree_delete(&sbi->workstn_tree,
+ grp->index) != grp) {
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+skip:
+ erofs_workgroup_unfreeze(grp, 1);
+#endif
+ continue;
+ }
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ if (erofs_try_to_free_all_cached_pages(sbi, grp))
+ goto skip;
+
+ erofs_workgroup_unfreeze(grp, 1);
+#endif
+ /* (rarely) grabbed again when freeing */
+ erofs_workgroup_put(grp);
+
+ ++freed;
+ if (unlikely(!--nr_shrink))
+ break;
+ }
+ erofs_workstn_unlock(sbi);
+
+ if (i && nr_shrink)
+ goto repeat;
+ return freed;
+}
+
+#endif
+
+/* protected by 'erofs_sb_list_lock' */
+static unsigned int shrinker_run_no;
+
+/* protects the mounted 'erofs_sb_list' */
+static DEFINE_SPINLOCK(erofs_sb_list_lock);
+static LIST_HEAD(erofs_sb_list);
+
+void erofs_register_super(struct super_block *sb)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+ mutex_init(&sbi->umount_mutex);
+
+ spin_lock(&erofs_sb_list_lock);
+ list_add(&sbi->list, &erofs_sb_list);
+ spin_unlock(&erofs_sb_list_lock);
+}
+
+void erofs_unregister_super(struct super_block *sb)
+{
+ spin_lock(&erofs_sb_list_lock);
+ list_del(&EROFS_SB(sb)->list);
+ spin_unlock(&erofs_sb_list_lock);
+}
+
+unsigned long erofs_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ return atomic_long_read(&erofs_global_shrink_cnt);
+}
+
+unsigned long erofs_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct erofs_sb_info *sbi;
+ struct list_head *p;
+
+ unsigned long nr = sc->nr_to_scan;
+ unsigned int run_no;
+ unsigned long freed = 0;
+
+ spin_lock(&erofs_sb_list_lock);
+ do
+ run_no = ++shrinker_run_no;
+ while (run_no == 0);
+
+ /* Iterate over all mounted superblocks and try to shrink them */
+ p = erofs_sb_list.next;
+ while (p != &erofs_sb_list) {
+ sbi = list_entry(p, struct erofs_sb_info, list);
+
+ /*
+ * We move the ones we do to the end of the list, so we stop
+ * when we see one we have already done.
+ */
+ if (sbi->shrinker_run_no == run_no)
+ break;
+
+ if (!mutex_trylock(&sbi->umount_mutex)) {
+ p = p->next;
+ continue;
+ }
+
+ spin_unlock(&erofs_sb_list_lock);
+ sbi->shrinker_run_no = run_no;
+
+#ifdef CONFIG_EROFS_FS_ZIP
+ freed += erofs_shrink_workstation(sbi, nr, false);
+#endif
+
+ spin_lock(&erofs_sb_list_lock);
+ /* Get the next list element before we move this one */
+ p = p->next;
+
+ /*
+ * Move this one to the end of the list to provide some
+ * fairness.
+ */
+ list_move_tail(&sbi->list, &erofs_sb_list);
+ mutex_unlock(&sbi->umount_mutex);
+
+ if (freed >= nr)
+ break;
+ }
+ spin_unlock(&erofs_sb_list_lock);
+ return freed;
+}
+
diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c
new file mode 100644
index 000000000000..0e9cfeccdf99
--- /dev/null
+++ b/drivers/staging/erofs/xattr.c
@@ -0,0 +1,577 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/xattr.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include <linux/security.h>
+#include "xattr.h"
+
+struct xattr_iter {
+ struct super_block *sb;
+ struct page *page;
+ void *kaddr;
+
+ erofs_blk_t blkaddr;
+ unsigned ofs;
+};
+
+static inline void xattr_iter_end(struct xattr_iter *it, bool atomic)
+{
+ /* only init_inode_xattrs use non-atomic once */
+ if (unlikely(!atomic))
+ kunmap(it->page);
+ else
+ kunmap_atomic(it->kaddr);
+ unlock_page(it->page);
+ put_page(it->page);
+}
+
+static void init_inode_xattrs(struct inode *inode)
+{
+ struct xattr_iter it;
+ unsigned i;
+ struct erofs_xattr_ibody_header *ih;
+ struct erofs_sb_info *sbi;
+ struct erofs_vnode *vi;
+ bool atomic_map;
+
+ if (likely(inode_has_inited_xattr(inode)))
+ return;
+
+ vi = EROFS_V(inode);
+ BUG_ON(!vi->xattr_isize);
+
+ sbi = EROFS_I_SB(inode);
+ it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize);
+ it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize);
+
+ it.page = erofs_get_inline_page(inode, it.blkaddr);
+ BUG_ON(IS_ERR(it.page));
+
+ /* read in shared xattr array (non-atomic, see kmalloc below) */
+ it.kaddr = kmap(it.page);
+ atomic_map = false;
+
+ ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
+
+ vi->xattr_shared_count = ih->h_shared_count;
+ vi->xattr_shared_xattrs = (unsigned *)kmalloc_array(
+ vi->xattr_shared_count, sizeof(unsigned),
+ GFP_KERNEL | __GFP_NOFAIL);
+
+ /* let's skip ibody header */
+ it.ofs += sizeof(struct erofs_xattr_ibody_header);
+
+ for (i = 0; i < vi->xattr_shared_count; ++i) {
+ if (unlikely(it.ofs >= EROFS_BLKSIZ)) {
+ /* cannot be unaligned */
+ BUG_ON(it.ofs != EROFS_BLKSIZ);
+ xattr_iter_end(&it, atomic_map);
+
+ it.page = erofs_get_meta_page(inode->i_sb,
+ ++it.blkaddr, S_ISDIR(inode->i_mode));
+ BUG_ON(IS_ERR(it.page));
+
+ it.kaddr = kmap_atomic(it.page);
+ atomic_map = true;
+ it.ofs = 0;
+ }
+ vi->xattr_shared_xattrs[i] =
+ le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs));
+ it.ofs += sizeof(__le32);
+ }
+ xattr_iter_end(&it, atomic_map);
+
+ inode_set_inited_xattr(inode);
+}
+
+struct xattr_iter_handlers {
+ int (*entry)(struct xattr_iter *, struct erofs_xattr_entry *);
+ int (*name)(struct xattr_iter *, unsigned, char *, unsigned);
+ int (*alloc_buffer)(struct xattr_iter *, unsigned);
+ void (*value)(struct xattr_iter *, unsigned, char *, unsigned);
+};
+
+static void xattr_iter_fixup(struct xattr_iter *it)
+{
+ if (unlikely(it->ofs >= EROFS_BLKSIZ)) {
+ xattr_iter_end(it, true);
+
+ it->blkaddr += erofs_blknr(it->ofs);
+ it->page = erofs_get_meta_page(it->sb, it->blkaddr, false);
+ BUG_ON(IS_ERR(it->page));
+
+ it->kaddr = kmap_atomic(it->page);
+ it->ofs = erofs_blkoff(it->ofs);
+ }
+}
+
+static int inline_xattr_iter_begin(struct xattr_iter *it,
+ struct inode *inode)
+{
+ struct erofs_vnode *const vi = EROFS_V(inode);
+ struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
+ unsigned xattr_header_sz, inline_xattr_ofs;
+
+ xattr_header_sz = inlinexattr_header_size(inode);
+ if (unlikely(xattr_header_sz >= vi->xattr_isize)) {
+ BUG_ON(xattr_header_sz > vi->xattr_isize);
+ return -ENOATTR;
+ }
+
+ inline_xattr_ofs = vi->inode_isize + xattr_header_sz;
+
+ it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs);
+ it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
+
+ it->page = erofs_get_inline_page(inode, it->blkaddr);
+ BUG_ON(IS_ERR(it->page));
+ it->kaddr = kmap_atomic(it->page);
+
+ return vi->xattr_isize - xattr_header_sz;
+}
+
+static int xattr_foreach(struct xattr_iter *it,
+ struct xattr_iter_handlers *op, unsigned *tlimit)
+{
+ struct erofs_xattr_entry entry;
+ unsigned value_sz, processed, slice;
+ int err;
+
+ /* 0. fixup blkaddr, ofs, ipage */
+ xattr_iter_fixup(it);
+
+ /*
+ * 1. read xattr entry to the memory,
+ * since we do EROFS_XATTR_ALIGN
+ * therefore entry should be in the page
+ */
+ entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs);
+ if (tlimit != NULL) {
+ unsigned entry_sz = EROFS_XATTR_ENTRY_SIZE(&entry);
+
+ BUG_ON(*tlimit < entry_sz);
+ *tlimit -= entry_sz;
+ }
+
+ it->ofs += sizeof(struct erofs_xattr_entry);
+ value_sz = le16_to_cpu(entry.e_value_size);
+
+ /* handle entry */
+ err = op->entry(it, &entry);
+ if (err) {
+ it->ofs += entry.e_name_len + value_sz;
+ goto out;
+ }
+
+ /* 2. handle xattr name (ofs will finally be at the end of name) */
+ processed = 0;
+
+ while (processed < entry.e_name_len) {
+ if (it->ofs >= EROFS_BLKSIZ) {
+ BUG_ON(it->ofs > EROFS_BLKSIZ);
+
+ xattr_iter_fixup(it);
+ it->ofs = 0;
+ }
+
+ slice = min_t(unsigned, PAGE_SIZE - it->ofs,
+ entry.e_name_len - processed);
+
+ /* handle name */
+ err = op->name(it, processed, it->kaddr + it->ofs, slice);
+ if (err) {
+ it->ofs += entry.e_name_len - processed + value_sz;
+ goto out;
+ }
+
+ it->ofs += slice;
+ processed += slice;
+ }
+
+ /* 3. handle xattr value */
+ processed = 0;
+
+ if (op->alloc_buffer != NULL) {
+ err = op->alloc_buffer(it, value_sz);
+ if (err) {
+ it->ofs += value_sz;
+ goto out;
+ }
+ }
+
+ while (processed < value_sz) {
+ if (it->ofs >= EROFS_BLKSIZ) {
+ BUG_ON(it->ofs > EROFS_BLKSIZ);
+ xattr_iter_fixup(it);
+ it->ofs = 0;
+ }
+
+ slice = min_t(unsigned, PAGE_SIZE - it->ofs,
+ value_sz - processed);
+ op->value(it, processed, it->kaddr + it->ofs, slice);
+ it->ofs += slice;
+ processed += slice;
+ }
+
+out:
+ /* we assume that ofs is aligned with 4 bytes */
+ it->ofs = EROFS_XATTR_ALIGN(it->ofs);
+ return err;
+}
+
+struct getxattr_iter {
+ struct xattr_iter it;
+
+ char *buffer;
+ int buffer_size, index;
+ struct qstr name;
+};
+
+static int xattr_entrymatch(struct xattr_iter *_it,
+ struct erofs_xattr_entry *entry)
+{
+ struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+ return (it->index != entry->e_name_index ||
+ it->name.len != entry->e_name_len) ? -ENOATTR : 0;
+}
+
+static int xattr_namematch(struct xattr_iter *_it,
+ unsigned processed, char *buf, unsigned len)
+{
+ struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+ return memcmp(buf, it->name.name + processed, len) ? -ENOATTR : 0;
+}
+
+static int xattr_checkbuffer(struct xattr_iter *_it,
+ unsigned value_sz)
+{
+ struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+ int err = it->buffer_size < value_sz ? -ERANGE : 0;
+
+ it->buffer_size = value_sz;
+ return it->buffer == NULL ? 1 : err;
+}
+
+static void xattr_copyvalue(struct xattr_iter *_it,
+ unsigned processed, char *buf, unsigned len)
+{
+ struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+ memcpy(it->buffer + processed, buf, len);
+}
+
+static struct xattr_iter_handlers find_xattr_handlers = {
+ .entry = xattr_entrymatch,
+ .name = xattr_namematch,
+ .alloc_buffer = xattr_checkbuffer,
+ .value = xattr_copyvalue
+};
+
+static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
+{
+ int ret;
+ unsigned remaining;
+
+ ret = inline_xattr_iter_begin(&it->it, inode);
+ if (ret < 0)
+ return ret;
+
+ remaining = ret;
+ while (remaining) {
+ ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining);
+ if (ret >= 0)
+ break;
+ }
+ xattr_iter_end(&it->it, true);
+
+ return ret < 0 ? ret : it->buffer_size;
+}
+
+static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
+{
+ struct erofs_vnode *const vi = EROFS_V(inode);
+ struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
+ unsigned i;
+ int ret = -ENOATTR;
+
+ for (i = 0; i < vi->xattr_shared_count; ++i) {
+ erofs_blk_t blkaddr =
+ xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
+
+ it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
+
+ if (!i || blkaddr != it->it.blkaddr) {
+ if (i)
+ xattr_iter_end(&it->it, true);
+
+ it->it.page = erofs_get_meta_page(inode->i_sb,
+ blkaddr, false);
+ BUG_ON(IS_ERR(it->it.page));
+ it->it.kaddr = kmap_atomic(it->it.page);
+ it->it.blkaddr = blkaddr;
+ }
+
+ ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
+ if (ret >= 0)
+ break;
+ }
+ if (vi->xattr_shared_count)
+ xattr_iter_end(&it->it, true);
+
+ return ret < 0 ? ret : it->buffer_size;
+}
+
+static bool erofs_xattr_user_list(struct dentry *dentry)
+{
+ return test_opt(EROFS_SB(dentry->d_sb), XATTR_USER);
+}
+
+static bool erofs_xattr_trusted_list(struct dentry *dentry)
+{
+ return capable(CAP_SYS_ADMIN);
+}
+
+int erofs_getxattr(struct inode *inode, int index,
+ const char *name,
+ void *buffer, size_t buffer_size)
+{
+ int ret;
+ struct getxattr_iter it;
+
+ if (unlikely(name == NULL))
+ return -EINVAL;
+
+ init_inode_xattrs(inode);
+
+ it.index = index;
+
+ it.name.len = strlen(name);
+ if (it.name.len > EROFS_NAME_LEN)
+ return -ERANGE;
+ it.name.name = name;
+
+ it.buffer = buffer;
+ it.buffer_size = buffer_size;
+
+ it.it.sb = inode->i_sb;
+ ret = inline_getxattr(inode, &it);
+ if (ret == -ENOATTR)
+ ret = shared_getxattr(inode, &it);
+ return ret;
+}
+
+static int erofs_xattr_generic_get(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ struct erofs_vnode *const vi = EROFS_V(inode);
+ struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
+
+ switch (handler->flags) {
+ case EROFS_XATTR_INDEX_USER:
+ if (!test_opt(sbi, XATTR_USER))
+ return -EOPNOTSUPP;
+ break;
+ case EROFS_XATTR_INDEX_TRUSTED:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ break;
+ case EROFS_XATTR_INDEX_SECURITY:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!vi->xattr_isize)
+ return -ENOATTR;
+
+ return erofs_getxattr(inode, handler->flags, name, buffer, size);
+}
+
+const struct xattr_handler erofs_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .flags = EROFS_XATTR_INDEX_USER,
+ .list = erofs_xattr_user_list,
+ .get = erofs_xattr_generic_get,
+};
+
+const struct xattr_handler erofs_xattr_trusted_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .flags = EROFS_XATTR_INDEX_TRUSTED,
+ .list = erofs_xattr_trusted_list,
+ .get = erofs_xattr_generic_get,
+};
+
+#ifdef CONFIG_EROFS_FS_SECURITY
+const struct xattr_handler __maybe_unused erofs_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .flags = EROFS_XATTR_INDEX_SECURITY,
+ .get = erofs_xattr_generic_get,
+};
+#endif
+
+const struct xattr_handler *erofs_xattr_handlers[] = {
+ &erofs_xattr_user_handler,
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+#endif
+ &erofs_xattr_trusted_handler,
+#ifdef CONFIG_EROFS_FS_SECURITY
+ &erofs_xattr_security_handler,
+#endif
+ NULL,
+};
+
+struct listxattr_iter {
+ struct xattr_iter it;
+
+ struct dentry *dentry;
+ char *buffer;
+ int buffer_size, buffer_ofs;
+};
+
+static int xattr_entrylist(struct xattr_iter *_it,
+ struct erofs_xattr_entry *entry)
+{
+ struct listxattr_iter *it =
+ container_of(_it, struct listxattr_iter, it);
+ unsigned prefix_len;
+ const char *prefix;
+
+ const struct xattr_handler *h =
+ erofs_xattr_handler(entry->e_name_index);
+
+ if (h == NULL || (h->list != NULL && !h->list(it->dentry)))
+ return 1;
+
+ /* Note that at least one of 'prefix' and 'name' should be non-NULL */
+ prefix = h->prefix != NULL ? h->prefix : h->name;
+ prefix_len = strlen(prefix);
+
+ if (it->buffer == NULL) {
+ it->buffer_ofs += prefix_len + entry->e_name_len + 1;
+ return 1;
+ }
+
+ if (it->buffer_ofs + prefix_len
+ + entry->e_name_len + 1 > it->buffer_size)
+ return -ERANGE;
+
+ memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len);
+ it->buffer_ofs += prefix_len;
+ return 0;
+}
+
+static int xattr_namelist(struct xattr_iter *_it,
+ unsigned processed, char *buf, unsigned len)
+{
+ struct listxattr_iter *it =
+ container_of(_it, struct listxattr_iter, it);
+
+ memcpy(it->buffer + it->buffer_ofs, buf, len);
+ it->buffer_ofs += len;
+ return 0;
+}
+
+static int xattr_skipvalue(struct xattr_iter *_it,
+ unsigned value_sz)
+{
+ struct listxattr_iter *it =
+ container_of(_it, struct listxattr_iter, it);
+
+ it->buffer[it->buffer_ofs++] = '\0';
+ return 1;
+}
+
+static struct xattr_iter_handlers list_xattr_handlers = {
+ .entry = xattr_entrylist,
+ .name = xattr_namelist,
+ .alloc_buffer = xattr_skipvalue,
+ .value = NULL
+};
+
+static int inline_listxattr(struct listxattr_iter *it)
+{
+ int ret;
+ unsigned remaining;
+
+ ret = inline_xattr_iter_begin(&it->it, d_inode(it->dentry));
+ if (ret < 0)
+ return ret;
+
+ remaining = ret;
+ while (remaining) {
+ ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining);
+ if (ret < 0)
+ break;
+ }
+ xattr_iter_end(&it->it, true);
+ return ret < 0 ? ret : it->buffer_ofs;
+}
+
+static int shared_listxattr(struct listxattr_iter *it)
+{
+ struct inode *const inode = d_inode(it->dentry);
+ struct erofs_vnode *const vi = EROFS_V(inode);
+ struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
+ unsigned i;
+ int ret = 0;
+
+ for (i = 0; i < vi->xattr_shared_count; ++i) {
+ erofs_blk_t blkaddr =
+ xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
+
+ it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
+ if (!i || blkaddr != it->it.blkaddr) {
+ if (i)
+ xattr_iter_end(&it->it, true);
+
+ it->it.page = erofs_get_meta_page(inode->i_sb,
+ blkaddr, false);
+ BUG_ON(IS_ERR(it->it.page));
+ it->it.kaddr = kmap_atomic(it->it.page);
+ it->it.blkaddr = blkaddr;
+ }
+
+ ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
+ if (ret < 0)
+ break;
+ }
+ if (vi->xattr_shared_count)
+ xattr_iter_end(&it->it, true);
+
+ return ret < 0 ? ret : it->buffer_ofs;
+}
+
+ssize_t erofs_listxattr(struct dentry *dentry,
+ char *buffer, size_t buffer_size)
+{
+ int ret;
+ struct listxattr_iter it;
+
+ init_inode_xattrs(d_inode(dentry));
+
+ it.dentry = dentry;
+ it.buffer = buffer;
+ it.buffer_size = buffer_size;
+ it.buffer_ofs = 0;
+
+ it.it.sb = dentry->d_sb;
+
+ ret = inline_listxattr(&it);
+ if (ret < 0 && ret != -ENOATTR)
+ return ret;
+ return shared_listxattr(&it);
+}
+
diff --git a/drivers/staging/erofs/xattr.h b/drivers/staging/erofs/xattr.h
new file mode 100644
index 000000000000..0c7379282fc5
--- /dev/null
+++ b/drivers/staging/erofs/xattr.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * linux/drivers/staging/erofs/xattr.h
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#ifndef __EROFS_XATTR_H
+#define __EROFS_XATTR_H
+
+#include "internal.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+/* Attribute not found */
+#define ENOATTR ENODATA
+
+static inline unsigned inlinexattr_header_size(struct inode *inode)
+{
+ return sizeof(struct erofs_xattr_ibody_header)
+ + sizeof(u32) * EROFS_V(inode)->xattr_shared_count;
+}
+
+static inline erofs_blk_t
+xattrblock_addr(struct erofs_sb_info *sbi, unsigned xattr_id)
+{
+#ifdef CONFIG_EROFS_FS_XATTR
+ return sbi->xattr_blkaddr +
+ xattr_id * sizeof(__u32) / EROFS_BLKSIZ;
+#else
+ return 0;
+#endif
+}
+
+static inline unsigned
+xattrblock_offset(struct erofs_sb_info *sbi, unsigned xattr_id)
+{
+ return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ;
+}
+
+extern const struct xattr_handler erofs_xattr_user_handler;
+extern const struct xattr_handler erofs_xattr_trusted_handler;
+#ifdef CONFIG_EROFS_FS_SECURITY
+extern const struct xattr_handler erofs_xattr_security_handler;
+#endif
+
+static inline const struct xattr_handler *erofs_xattr_handler(unsigned index)
+{
+static const struct xattr_handler *xattr_handler_map[] = {
+ [EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler,
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+ [EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler,
+ [EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] =
+ &posix_acl_default_xattr_handler,
+#endif
+ [EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler,
+#ifdef CONFIG_EROFS_FS_SECURITY
+ [EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler,
+#endif
+};
+ return index && index < ARRAY_SIZE(xattr_handler_map) ?
+ xattr_handler_map[index] : NULL;
+}
+
+#ifdef CONFIG_EROFS_FS_XATTR
+
+extern const struct inode_operations erofs_generic_xattr_iops;
+extern const struct inode_operations erofs_dir_xattr_iops;
+
+int erofs_getxattr(struct inode *, int, const char *, void *, size_t);
+ssize_t erofs_listxattr(struct dentry *, char *, size_t);
+#else
+static int __maybe_unused erofs_getxattr(struct inode *inode, int index,
+ const char *name,
+ void *buffer, size_t buffer_size)
+{
+ return -ENOTSUPP;
+}
+
+static ssize_t __maybe_unused erofs_listxattr(struct dentry *dentry,
+ char *buffer, size_t buffer_size)
+{
+ return -ENOTSUPP;
+}
+#endif
+
+#endif
+