21 files changed, 6779 insertions, 0 deletions
diff --git a/drivers/staging/erofs/Kconfig b/drivers/staging/erofs/Kconfig
new file mode 100644
index 000000000000..96f614934df1
--- /dev/null
+++ b/drivers/staging/erofs/Kconfig
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config EROFS_FS
+	tristate "EROFS filesystem support"
+	depends on BROKEN
+	help
+	  EROFS(Enhanced Read-Only File System) is a lightweight
+	  read-only file system with modern designs (eg. page-sized
+	  blocks, inline xattrs/data, etc.) for scenarios which need
+	  high-performance read-only requirements, eg. firmwares in
+	  mobile phone or LIVECDs.
+
+	  It also provides VLE compression support, focusing on
+	  random read improvements, keeping relatively lower
+	  compression ratios, which is useful for high-performance
+	  devices with limited memory and ROM space.
+
+	  If unsure, say N.
+
+config EROFS_FS_DEBUG
+	bool "EROFS debugging feature"
+	depends on EROFS_FS
+	help
+	  Print EROFS debugging messages and enable more BUG_ONs
+	  which check the filesystem consistency aggressively.
+
+	  For daily use, say N.
+
+config EROFS_FS_XATTR
+	bool "EROFS extended attributes"
+	depends on EROFS_FS
+	default y
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+config EROFS_FS_POSIX_ACL
+	bool "EROFS Access Control Lists"
+	depends on EROFS_FS_XATTR
+	select FS_POSIX_ACL
+	default y
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N.
+
+config EROFS_FS_SECURITY
+	bool "EROFS Security Labels"
+	depends on EROFS_FS_XATTR
+	help
+	  Security labels provide an access control facility to support Linux
+	  Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
+	  Linux. This option enables an extended attribute handler for file
+	  security labels in the erofs filesystem, so that it requires enabling
+	  the extended attribute support in advance.
+
+	  If you are not using a security module, say N.
+
+config EROFS_FS_USE_VM_MAP_RAM
+	bool "EROFS VM_MAP_RAM Support"
+	depends on EROFS_FS
+	help
+	  use vm_map_ram/vm_unmap_ram instead of vmap/vunmap.
+
+	  If you don't know what these are, say N.
+
+config EROFS_FAULT_INJECTION
+	bool "EROFS fault injection facility"
+	depends on EROFS_FS
+	help
+	  Test EROFS to inject faults such as ENOMEM, EIO, and so on.
+	  If unsure, say N.
+
+config EROFS_FS_ZIP
+	bool "EROFS Data Compresssion Support"
+	depends on EROFS_FS
+	help
+	  Currently we support VLE Compression only.
+	  Play at your own risk.
+
+	  If you don't want to use compression feature, say N.
+
+config EROFS_FS_CLUSTER_PAGE_LIMIT
+	int "EROFS Cluster Pages Hard Limit"
+	depends on EROFS_FS_ZIP
+	range 1 256
+	default "1"
+	help
+	  Indicates VLE compressed pages hard limit of a
+	  compressed cluster.
+
+	  For example, if files of a image are compressed
+	  into 8k-unit, the hard limit should not be less
+	  than 2. Otherwise, the image cannot be mounted
+	  correctly on this kernel.
+
+choice
+	prompt "EROFS VLE Data Decompression mode"
+	depends on EROFS_FS_ZIP
+	default EROFS_FS_ZIP_CACHE_BIPOLAR
+	help
+	  EROFS supports three options for VLE decompression.
+	  "In-place Decompression Only" consumes the minimum memory
+	  with lowest random read.
+
+	  "Bipolar Cached Decompression" consumes the maximum memory
+	  with highest random read.
+
+	  If unsure, select "Bipolar Cached Decompression"
+
+config EROFS_FS_ZIP_NO_CACHE
+	bool "In-place Decompression Only"
+	help
+	  Read compressed data into page cache and do in-place
+	  decompression directly.
+
+config EROFS_FS_ZIP_CACHE_UNIPOLAR
+	bool "Unipolar Cached Decompression"
+	help
+	  For each request, it caches the last compressed page
+	  for further reading.
+	  It still decompresses in place for the rest compressed pages.
+
+config EROFS_FS_ZIP_CACHE_BIPOLAR
+	bool "Bipolar Cached Decompression"
+	help
+	  For each request, it caches the both end compressed pages
+	  for further reading.
+	  It still decompresses in place for the rest compressed pages.
+
+	  Recommended for performance priority.
+
+endchoice
+
diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile
new file mode 100644
index 000000000000..9a766eb7ed75
--- /dev/null
+++ b/drivers/staging/erofs/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+
+EROFS_VERSION = "1.0pre1"
+
+ccflags-y += -Wall -DEROFS_VERSION=\"$(EROFS_VERSION)\"
+
+obj-$(CONFIG_EROFS_FS) += erofs.o
+# staging requirement: to be self-contained in its own directory
+ccflags-y += -I$(src)/include
+erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
+erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_lz4.o unzip_vle_lz4.o
+
diff --git a/drivers/staging/erofs/TODO b/drivers/staging/erofs/TODO
new file mode 100644
index 000000000000..f99ddb842f99
--- /dev/null
+++ b/drivers/staging/erofs/TODO
@@ -0,0 +1,45 @@
+
+EROFS is still working in progress, thus it is not suitable
+for all productive uses. play at your own risk :)
+
+TODO List:
+ - add the missing error handling code
+   (mainly existed in xattr and decompression submodules);
+
+ - finalize erofs ondisk format design  (which means that
+   minor on-disk revisions could happen later);
+
+ - documentation and detailed technical analysis;
+
+ - general code review and clean up
+   (including confusing variable names and code snippets);
+
+ - support larger compressed clustersizes for selection
+   (currently erofs only works as expected with the page-sized
+    compressed cluster configuration, usually 4KB);
+
+ - support more lossless data compression algorithms
+   in addition to LZ4 algorithms in VLE approach;
+
+ - data deduplication and other useful features.
+
+erofs-mkfs (preview version) binaries for i386 / x86_64 are available at:
+
+   https://github.com/hsiangkao/erofs_mkfs_binary
+
+It is still in progress opening mkfs source code to public,
+in any case an open-source mkfs will be released in the near future.
+
+
+Code, suggestions, etc, are welcome. Please feel free to
+ask and send patches,
+
+To:
+  linux-erofs mailing list   <linux-erofs@lists.ozlabs.org>
+  Gao Xiang                  <gaoxiang25@huawei.com>
+  Chao Yu                    <yuchao0@huawei.com>
+
+Cc: (for linux-kernel upstream patches)
+  Greg Kroah-Hartman         <gregkh@linuxfoundation.org>
+  linux-staging mailing list <devel@driverdev.osuosl.org>
+
diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c
new file mode 100644
index 000000000000..ac263a180253
--- /dev/null
+++ b/drivers/staging/erofs/data.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/data.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "internal.h"
+#include <linux/prefetch.h>
+
+#include <trace/events/erofs.h>
+
+static inline void read_endio(struct bio *bio)
+{
+	int i;
+	struct bio_vec *bvec;
+	const blk_status_t err = bio->bi_status;
+
+	bio_for_each_segment_all(bvec, bio, i) {
+		struct page *page = bvec->bv_page;
+
+		/* page is already locked */
+		BUG_ON(PageUptodate(page));
+
+		if (unlikely(err))
+			SetPageError(page);
+		else
+			SetPageUptodate(page);
+
+		unlock_page(page);
+		/* page could be reclaimed now */
+	}
+	bio_put(bio);
+}
+
+/* prio -- true is used for dir */
+struct page *erofs_get_meta_page(struct super_block *sb,
+	erofs_blk_t blkaddr, bool prio)
+{
+	struct inode *bd_inode = sb->s_bdev->bd_inode;
+	struct address_space *mapping = bd_inode->i_mapping;
+	struct page *page;
+
+repeat:
+	page = find_or_create_page(mapping, blkaddr,
+	/*
+	 * Prefer looping in the allocator rather than here,
+	 * at least that code knows what it's doing.
+	 */
+		mapping_gfp_constraint(mapping, ~__GFP_FS) | __GFP_NOFAIL);
+
+	BUG_ON(!page || !PageLocked(page));
+
+	if (!PageUptodate(page)) {
+		struct bio *bio;
+		int err;
+
+		bio = prepare_bio(sb, blkaddr, 1, read_endio);
+		err = bio_add_page(bio, page, PAGE_SIZE, 0);
+		BUG_ON(err != PAGE_SIZE);
+
+		__submit_bio(bio, REQ_OP_READ,
+			REQ_META | (prio ? REQ_PRIO : 0));
+
+		lock_page(page);
+
+		/* the page has been truncated by others? */
+		if (unlikely(page->mapping != mapping)) {
+			unlock_page(page);
+			put_page(page);
+			goto repeat;
+		}
+
+		/* more likely a read error */
+		if (unlikely(!PageUptodate(page))) {
+			unlock_page(page);
+			put_page(page);
+
+			page = ERR_PTR(-EIO);
+		}
+	}
+	return page;
+}
+
+static int erofs_map_blocks_flatmode(struct inode *inode,
+	struct erofs_map_blocks *map,
+	int flags)
+{
+	erofs_blk_t nblocks, lastblk;
+	u64 offset = map->m_la;
+	struct erofs_vnode *vi = EROFS_V(inode);
+
+	trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
+	BUG_ON(is_inode_layout_compression(inode));
+
+	nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+	lastblk = nblocks - is_inode_layout_inline(inode);
+
+	if (unlikely(offset >= inode->i_size)) {
+		/* leave out-of-bound access unmapped */
+		map->m_flags = 0;
+		map->m_plen = 0;
+		goto out;
+	}
+
+	/* there is no hole in flatmode */
+	map->m_flags = EROFS_MAP_MAPPED;
+
+	if (offset < blknr_to_addr(lastblk)) {
+		map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
+		map->m_plen = blknr_to_addr(lastblk) - offset;
+	} else if (is_inode_layout_inline(inode)) {
+		/* 2 - inode inline B: inode, [xattrs], inline last blk... */
+		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+
+		map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
+			vi->xattr_isize + erofs_blkoff(map->m_la);
+		map->m_plen = inode->i_size - offset;
+
+		/* inline data should locate in one meta block */
+		BUG_ON(erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE);
+		map->m_flags |= EROFS_MAP_META;
+	} else {
+		errln("internal error @ nid: %llu (size %llu), m_la 0x%llx",
+			vi->nid, inode->i_size, map->m_la);
+		BUG();
+	}
+
+out:
+	map->m_llen = map->m_plen;
+	trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
+	return 0;
+}
+
+#ifdef CONFIG_EROFS_FS_ZIP
+extern int z_erofs_map_blocks_iter(struct inode *,
+	struct erofs_map_blocks *, struct page **, int);
+#endif
+
+int erofs_map_blocks_iter(struct inode *inode,
+	struct erofs_map_blocks *map,
+	struct page **mpage_ret, int flags)
+{
+	/* by default, reading raw data never use erofs_map_blocks_iter */
+	if (unlikely(!is_inode_layout_compression(inode))) {
+		if (*mpage_ret != NULL)
+			put_page(*mpage_ret);
+		*mpage_ret = NULL;
+
+		return erofs_map_blocks(inode, map, flags);
+	}
+
+#ifdef CONFIG_EROFS_FS_ZIP
+	return z_erofs_map_blocks_iter(inode, map, mpage_ret, flags);
+#else
+	/* data compression is not available */
+	return -ENOTSUPP;
+#endif
+}
+
+int erofs_map_blocks(struct inode *inode,
+	struct erofs_map_blocks *map, int flags)
+{
+	if (unlikely(is_inode_layout_compression(inode))) {
+		struct page *mpage = NULL;
+		int err;
+
+		err = erofs_map_blocks_iter(inode, map, &mpage, flags);
+		if (mpage != NULL)
+			put_page(mpage);
+		return err;
+	}
+	return erofs_map_blocks_flatmode(inode, map, flags);
+}
+
+static inline struct bio *erofs_read_raw_page(
+	struct bio *bio,
+	struct address_space *mapping,
+	struct page *page,
+	erofs_off_t *last_block,
+	unsigned nblocks,
+	bool ra)
+{
+	struct inode *inode = mapping->host;
+	erofs_off_t current_block = (erofs_off_t)page->index;
+	int err;
+
+	BUG_ON(!nblocks);
+
+	if (PageUptodate(page)) {
+		err = 0;
+		goto has_updated;
+	}
+
+	if (cleancache_get_page(page) == 0) {
+		err = 0;
+		SetPageUptodate(page);
+		goto has_updated;
+	}
+
+	/* note that for readpage case, bio also equals to NULL */
+	if (bio != NULL &&
+		/* not continuous */
+		*last_block + 1 != current_block) {
+submit_bio_retry:
+		__submit_bio(bio, REQ_OP_READ, 0);
+		bio = NULL;
+	}
+
+	if (bio == NULL) {
+		struct erofs_map_blocks map = {
+			.m_la = blknr_to_addr(current_block),
+		};
+		erofs_blk_t blknr;
+		unsigned blkoff;
+
+		err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+		if (unlikely(err))
+			goto err_out;
+
+		/* zero out the holed page */
+		if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) {
+			zero_user_segment(page, 0, PAGE_SIZE);
+			SetPageUptodate(page);
+
+			/* imply err = 0, see erofs_map_blocks */
+			goto has_updated;
+		}
+
+		/* for RAW access mode, m_plen must be equal to m_llen */
+		BUG_ON(map.m_plen != map.m_llen);
+
+		blknr = erofs_blknr(map.m_pa);
+		blkoff = erofs_blkoff(map.m_pa);
+
+		/* deal with inline page */
+		if (map.m_flags & EROFS_MAP_META) {
+			void *vsrc, *vto;
+			struct page *ipage;
+
+			BUG_ON(map.m_plen > PAGE_SIZE);
+
+			ipage = erofs_get_meta_page(inode->i_sb, blknr, 0);
+
+			if (IS_ERR(ipage)) {
+				err = PTR_ERR(ipage);
+				goto err_out;
+			}
+
+			vsrc = kmap_atomic(ipage);
+			vto = kmap_atomic(page);
+			memcpy(vto, vsrc + blkoff, map.m_plen);
+			memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
+			kunmap_atomic(vto);
+			kunmap_atomic(vsrc);
+			flush_dcache_page(page);
+
+			SetPageUptodate(page);
+			/* TODO: could we unlock the page earlier? */
+			unlock_page(ipage);
+			put_page(ipage);
+
+			/* imply err = 0, see erofs_map_blocks */
+			goto has_updated;
+		}
+
+		/* pa must be block-aligned for raw reading */
+		BUG_ON(erofs_blkoff(map.m_pa) != 0);
+
+		/* max # of continuous pages */
+		if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
+			nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
+		if (nblocks > BIO_MAX_PAGES)
+			nblocks = BIO_MAX_PAGES;
+
+		bio = prepare_bio(inode->i_sb, blknr, nblocks, read_endio);
+	}
+
+	err = bio_add_page(bio, page, PAGE_SIZE, 0);
+	/* out of the extent or bio is full */
+	if (err < PAGE_SIZE)
+		goto submit_bio_retry;
+
+	*last_block = current_block;
+
+	/* shift in advance in case of it followed by too many gaps */
+	if (unlikely(bio->bi_vcnt >= bio->bi_max_vecs)) {
+		/* err should reassign to 0 after submitting */
+		err = 0;
+		goto submit_bio_out;
+	}
+
+	return bio;
+
+err_out:
+	/* for sync reading, set page error immediately */
+	if (!ra) {
+		SetPageError(page);
+		ClearPageUptodate(page);
+	}
+has_updated:
+	unlock_page(page);
+
+	/* if updated manually, continuous pages has a gap */
+	if (bio != NULL)
+submit_bio_out:
+		__submit_bio(bio, REQ_OP_READ, 0);
+
+	return unlikely(err) ? ERR_PTR(err) : NULL;
+}
+
+/*
+ * since we dont have write or truncate flows, so no inode
+ * locking needs to be held at the moment.
+ */
+static int erofs_raw_access_readpage(struct file *file, struct page *page)
+{
+	erofs_off_t last_block;
+	struct bio *bio;
+
+	trace_erofs_readpage(page, true);
+
+	bio = erofs_read_raw_page(NULL, page->mapping,
+		page, &last_block, 1, false);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	BUG_ON(bio != NULL);	/* since we have only one bio -- must be NULL */
+	return 0;
+}
+
+static int erofs_raw_access_readpages(struct file *filp,
+	struct address_space *mapping,
+	struct list_head *pages, unsigned nr_pages)
+{
+	erofs_off_t last_block;
+	struct bio *bio = NULL;
+	gfp_t gfp = readahead_gfp_mask(mapping);
+	struct page *page = list_last_entry(pages, struct page, lru);
+
+	trace_erofs_readpages(mapping->host, page, nr_pages, true);
+
+	for (; nr_pages; --nr_pages) {
+		page = list_entry(pages->prev, struct page, lru);
+
+		prefetchw(&page->flags);
+		list_del(&page->lru);
+
+		if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+			bio = erofs_read_raw_page(bio, mapping, page,
+				&last_block, nr_pages, true);
+
+			/* all the page errors are ignored when readahead */
+			if (IS_ERR(bio)) {
+				pr_err("%s, readahead error at page %lu of nid %llu\n",
+					__func__, page->index,
+					EROFS_V(mapping->host)->nid);
+
+				bio = NULL;
+			}
+		}
+
+		/* pages could still be locked */
+		put_page(page);
+	}
+	BUG_ON(!list_empty(pages));
+
+	/* the rare case (end in gaps) */
+	if (unlikely(bio != NULL))
+		__submit_bio(bio, REQ_OP_READ, 0);
+	return 0;
+}
+
+/* for uncompressed (aligned) files and raw access for other files */
+const struct address_space_operations erofs_raw_access_aops = {
+	.readpage = erofs_raw_access_readpage,
+	.readpages = erofs_raw_access_readpages,
+};
+
diff --git a/drivers/staging/erofs/dir.c b/drivers/staging/erofs/dir.c
new file mode 100644
index 000000000000..be6ae3b1bdbe
--- /dev/null
+++ b/drivers/staging/erofs/dir.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/dir.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "internal.h"
+
+static const unsigned char erofs_filetype_table[EROFS_FT_MAX] = {
+	[EROFS_FT_UNKNOWN]	= DT_UNKNOWN,
+	[EROFS_FT_REG_FILE]	= DT_REG,
+	[EROFS_FT_DIR]		= DT_DIR,
+	[EROFS_FT_CHRDEV]	= DT_CHR,
+	[EROFS_FT_BLKDEV]	= DT_BLK,
+	[EROFS_FT_FIFO]		= DT_FIFO,
+	[EROFS_FT_SOCK]		= DT_SOCK,
+	[EROFS_FT_SYMLINK]	= DT_LNK,
+};
+
+static int erofs_fill_dentries(struct dir_context *ctx,
+	void *dentry_blk, unsigned *ofs,
+	unsigned nameoff, unsigned maxsize)
+{
+	struct erofs_dirent *de = dentry_blk;
+	const struct erofs_dirent *end = dentry_blk + nameoff;
+
+	de = dentry_blk + *ofs;
+	while (de < end) {
+		const char *de_name;
+		int de_namelen;
+		unsigned char d_type;
+#ifdef CONFIG_EROFS_FS_DEBUG
+		unsigned dbg_namelen;
+		unsigned char dbg_namebuf[EROFS_NAME_LEN];
+#endif
+
+		if (unlikely(de->file_type < EROFS_FT_MAX))
+			d_type = erofs_filetype_table[de->file_type];
+		else
+			d_type = DT_UNKNOWN;
+
+		nameoff = le16_to_cpu(de->nameoff);
+		de_name = (char *)dentry_blk + nameoff;
+
+		de_namelen = unlikely(de + 1 >= end) ?
+			/* last directory entry */
+			strnlen(de_name, maxsize - nameoff) :
+			le16_to_cpu(de[1].nameoff) - nameoff;
+
+		/* the corrupted directory found */
+		BUG_ON(de_namelen < 0);
+
+#ifdef CONFIG_EROFS_FS_DEBUG
+		dbg_namelen = min(EROFS_NAME_LEN - 1, de_namelen);
+		memcpy(dbg_namebuf, de_name, dbg_namelen);
+		dbg_namebuf[dbg_namelen] = '\0';
+
+		debugln("%s, found de_name %s de_len %d d_type %d", __func__,
+			dbg_namebuf, de_namelen, d_type);
+#endif
+
+		if (!dir_emit(ctx, de_name, de_namelen,
+					le64_to_cpu(de->nid), d_type))
+			/* stoped by some reason */
+			return 1;
+		++de;
+		*ofs += sizeof(struct erofs_dirent);
+	}
+	*ofs = maxsize;
+	return 0;
+}
+
+static int erofs_readdir(struct file *f, struct dir_context *ctx)
+{
+	struct inode *dir = file_inode(f);
+	struct address_space *mapping = dir->i_mapping;
+	const size_t dirsize = i_size_read(dir);
+	unsigned i = ctx->pos / EROFS_BLKSIZ;
+	unsigned ofs = ctx->pos % EROFS_BLKSIZ;
+	int err = 0;
+	bool initial = true;
+
+	while (ctx->pos < dirsize) {
+		struct page *dentry_page;
+		struct erofs_dirent *de;
+		unsigned nameoff, maxsize;
+
+		dentry_page = read_mapping_page(mapping, i, NULL);
+		if (IS_ERR(dentry_page))
+			continue;
+
+		lock_page(dentry_page);
+		de = (struct erofs_dirent *)kmap(dentry_page);
+
+		nameoff = le16_to_cpu(de->nameoff);
+
+		if (unlikely(nameoff < sizeof(struct erofs_dirent) ||
+			nameoff >= PAGE_SIZE)) {
+			errln("%s, invalid de[0].nameoff %u",
+				__func__, nameoff);
+
+			err = -EIO;
+			goto skip_this;
+		}
+
+		maxsize = min_t(unsigned, dirsize - ctx->pos + ofs, PAGE_SIZE);
+
+		/* search dirents at the arbitrary position */
+		if (unlikely(initial)) {
+			initial = false;
+
+			ofs = roundup(ofs, sizeof(struct erofs_dirent));
+			if (unlikely(ofs >= nameoff))
+				goto skip_this;
+		}
+
+		err = erofs_fill_dentries(ctx, de, &ofs, nameoff, maxsize);
+skip_this:
+		kunmap(dentry_page);
+
+		unlock_page(dentry_page);
+		put_page(dentry_page);
+
+		ctx->pos = blknr_to_addr(i) + ofs;
+
+		if (unlikely(err))
+			break;
+		++i;
+		ofs = 0;
+	}
+	return err < 0 ? err : 0;
+}
+
+const struct file_operations erofs_dir_fops = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.iterate	= erofs_readdir,
+};
+
diff --git a/drivers/staging/erofs/erofs_fs.h b/drivers/staging/erofs/erofs_fs.h
new file mode 100644
index 000000000000..2f8e2bf70941
--- /dev/null
+++ b/drivers/staging/erofs/erofs_fs.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Apache-2.0
+ *
+ * linux/drivers/staging/erofs/erofs_fs.h
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is dual-licensed; you may select either the GNU General Public
+ * License version 2 or Apache License, Version 2.0. See the file COPYING
+ * in the main directory of the Linux distribution for more details.
+ */
+#ifndef __EROFS_FS_H
+#define __EROFS_FS_H
+
+/* Enhanced(Extended) ROM File System */
+#define EROFS_SUPER_MAGIC_V1    0xE0F5E1E2
+#define EROFS_SUPER_OFFSET      1024
+
+struct erofs_super_block {
+/*  0 */__le32 magic;           /* in the little endian */
+/*  4 */__le32 checksum;        /* crc32c(super_block) */
+/*  8 */__le32 features;
+/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
+/* 13 */__u8 reserved;
+
+/* 14 */__le16 root_nid;
+/* 16 */__le64 inos;            /* total valid ino # (== f_files - f_favail) */
+
+/* 24 */__le64 build_time;      /* inode v1 time derivation */
+/* 32 */__le32 build_time_nsec;
+/* 36 */__le32 blocks;          /* used for statfs */
+/* 40 */__le32 meta_blkaddr;
+/* 44 */__le32 xattr_blkaddr;
+/* 48 */__u8 uuid[16];          /* 128-bit uuid for volume */
+/* 64 */__u8 volume_name[16];   /* volume name */
+
+/* 80 */__u8 reserved2[48];     /* 128 bytes */
+} __packed;
+
+#define __EROFS_BIT(_prefix, _cur, _pre)	enum {	\
+	_prefix ## _cur ## _BIT = _prefix ## _pre ## _BIT + \
+		_prefix ## _pre ## _BITS }
+
+/*
+ * erofs inode data mapping:
+ * 0 - inode plain without inline data A:
+ * inode, [xattrs], ... | ... | no-holed data
+ * 1 - inode VLE compression B:
+ * inode, [xattrs], extents ... | ...
+ * 2 - inode plain with inline data C:
+ * inode, [xattrs], last_inline_data, ... | ... | no-holed data
+ * 3~7 - reserved
+ */
+enum {
+	EROFS_INODE_LAYOUT_PLAIN,
+	EROFS_INODE_LAYOUT_COMPRESSION,
+	EROFS_INODE_LAYOUT_INLINE,
+	EROFS_INODE_LAYOUT_MAX
+};
+#define EROFS_I_VERSION_BITS            1
+#define EROFS_I_DATA_MAPPING_BITS       3
+
+#define EROFS_I_VERSION_BIT             0
+__EROFS_BIT(EROFS_I_, DATA_MAPPING, VERSION);
+
+struct erofs_inode_v1 {
+/*  0 */__le16 i_advise;
+
+/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+/*  2 */__le16 i_xattr_icount;
+/*  4 */__le16 i_mode;
+/*  6 */__le16 i_nlink;
+/*  8 */__le32 i_size;
+/* 12 */__le32 i_reserved;
+/* 16 */union {
+		/* file total compressed blocks for data mapping 1 */
+		__le32 compressed_blocks;
+		__le32 raw_blkaddr;
+
+		/* for device files, used to indicate old/new device # */
+		__le32 rdev;
+	} i_u __packed;
+/* 20 */__le32 i_ino;           /* only used for 32-bit stat compatibility */
+/* 24 */__le16 i_uid;
+/* 26 */__le16 i_gid;
+/* 28 */__le32 i_checksum;
+} __packed;
+
+/* 32 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_V1   0
+/* 64 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_V2   1
+
+struct erofs_inode_v2 {
+	__le16 i_advise;
+
+	/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+	__le16 i_xattr_icount;
+	__le16 i_mode;
+	__le16 i_reserved;      /* 8 bytes */
+	__le64 i_size;          /* 16 bytes */
+	union {
+		/* file total compressed blocks for data mapping 1 */
+		__le32 compressed_blocks;
+		__le32 raw_blkaddr;
+
+		/* for device files, used to indicate old/new device # */
+		__le32 rdev;
+	} i_u __packed;
+
+	/* only used for 32-bit stat compatibility */
+	__le32 i_ino;           /* 24 bytes */
+
+	__le32 i_uid;
+	__le32 i_gid;
+	__le64 i_ctime;         /* 32 bytes */
+	__le32 i_ctime_nsec;
+	__le32 i_nlink;
+	__u8   i_reserved2[12];
+	__le32 i_checksum;      /* 64 bytes */
+} __packed;
+
+#define EROFS_MAX_SHARED_XATTRS         (128)
+/* h_shared_count between 129 ... 255 are special # */
+#define EROFS_SHARED_XATTR_EXTENT       (255)
+
+/*
+ * inline xattrs (n == i_xattr_icount):
+ * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
+ *          12 bytes           /                   \
+ *                            /                     \
+ *                           /-----------------------\
+ *                           |  erofs_xattr_entries+ |
+ *                           +-----------------------+
+ * inline xattrs must starts in erofs_xattr_ibody_header,
+ * for read-only fs, no need to introduce h_refcount
+ */
+struct erofs_xattr_ibody_header {
+	__le32 h_checksum;
+	__u8   h_shared_count;
+	__u8   h_reserved[7];
+	__le32 h_shared_xattrs[0];      /* shared xattr id array */
+} __packed;
+
+/* Name indexes */
+#define EROFS_XATTR_INDEX_USER              1
+#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS  2
+#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+#define EROFS_XATTR_INDEX_TRUSTED           4
+#define EROFS_XATTR_INDEX_LUSTRE            5
+#define EROFS_XATTR_INDEX_SECURITY          6
+
+/* xattr entry (for both inline & shared xattrs) */
+struct erofs_xattr_entry {
+	__u8   e_name_len;      /* length of name */
+	__u8   e_name_index;    /* attribute name index */
+	__le16 e_value_size;    /* size of attribute value */
+	/* followed by e_name and e_value */
+	char   e_name[0];       /* attribute name */
+} __packed;
+
+#define ondisk_xattr_ibody_size(count)	({\
+	u32 __count = le16_to_cpu(count); \
+	((__count) == 0) ? 0 : \
+	sizeof(struct erofs_xattr_ibody_header) + \
+		sizeof(__u32) * ((__count) - 1); })
+
+#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry))
+#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \
+	sizeof(struct erofs_xattr_entry) + \
+	(entry)->e_name_len + le16_to_cpu((entry)->e_value_size))
+
+/* have to be aligned with 8 bytes on disk */
+struct erofs_extent_header {
+	__le32 eh_checksum;
+	__le32 eh_reserved[3];
+} __packed;
+
+/*
+ * Z_EROFS Variable-sized Logical Extent cluster type:
+ *    0 - literal (uncompressed) cluster
+ *    1 - compressed cluster (for the head logical cluster)
+ *    2 - compressed cluster (for the other logical clusters)
+ *
+ * In detail,
+ *    0 - literal (uncompressed) cluster,
+ *        di_advise = 0
+ *        di_clusterofs = the literal data offset of the cluster
+ *        di_blkaddr = the blkaddr of the literal cluster
+ *
+ *    1 - compressed cluster (for the head logical cluster)
+ *        di_advise = 1
+ *        di_clusterofs = the decompressed data offset of the cluster
+ *        di_blkaddr = the blkaddr of the compressed cluster
+ *
+ *    2 - compressed cluster (for the other logical clusters)
+ *        di_advise = 2
+ *        di_clusterofs =
+ *           the decompressed data offset in its own head cluster
+ *        di_u.delta[0] = distance to its corresponding head cluster
+ *        di_u.delta[1] = distance to its corresponding tail cluster
+ *                (di_advise could be 0, 1 or 2)
+ */
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS        2
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT         0
+
+struct z_erofs_vle_decompressed_index {
+	__le16 di_advise;
+	/* where to decompress in the head cluster */
+	__le16 di_clusterofs;
+
+	union {
+		/* for the head cluster */
+		__le32 blkaddr;
+		/*
+		 * for the rest clusters
+		 * eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
+		 * [0] - pointing to the head cluster
+		 * [1] - pointing to the tail cluster
+		 */
+		__le16 delta[2];
+	} di_u __packed;		/* 8 bytes */
+} __packed;
+
+#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \
+	sizeof(struct z_erofs_vle_decompressed_index))
+
+/* dirent sorts in alphabet order, thus we can do binary search */
+struct erofs_dirent {
+	__le64 nid;     /*  0, node number */
+	__le16 nameoff; /*  8, start offset of file name */
+	__u8 file_type; /* 10, file type */
+	__u8 reserved;  /* 11, reserved */
+} __packed;
+
+/* file types used in inode_info->flags */
+enum {
+	EROFS_FT_UNKNOWN,
+	EROFS_FT_REG_FILE,
+	EROFS_FT_DIR,
+	EROFS_FT_CHRDEV,
+	EROFS_FT_BLKDEV,
+	EROFS_FT_FIFO,
+	EROFS_FT_SOCK,
+	EROFS_FT_SYMLINK,
+	EROFS_FT_MAX
+};
+
+#define EROFS_NAME_LEN      255
+
+/* check the EROFS on-disk layout strictly at compile time */
+static inline void erofs_check_ondisk_layout_definitions(void)
+{
+	BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_v1) != 32);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_v2) != 64);
+	BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
+	BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
+	BUILD_BUG_ON(sizeof(struct erofs_extent_header) != 16);
+	BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
+	BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
+}
+
+#endif
+
diff --git a/drivers/staging/erofs/include/linux/tagptr.h b/drivers/staging/erofs/include/linux/tagptr.h
new file mode 100644
index 000000000000..ccd106dbd48e
--- /dev/null
+++ b/drivers/staging/erofs/include/linux/tagptr.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Tagged pointer implementation
+ *
+ * Copyright (C) 2018 Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef _LINUX_TAGPTR_H
+#define _LINUX_TAGPTR_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/*
+ * the name of tagged pointer types are tagptr{1, 2, 3...}_t
+ * avoid directly using the internal structs __tagptr{1, 2, 3...}
+ */
+#define __MAKE_TAGPTR(n) \
+typedef struct __tagptr##n {	\
+	uintptr_t v;	\
+} tagptr##n##_t;
+
+__MAKE_TAGPTR(1)
+__MAKE_TAGPTR(2)
+__MAKE_TAGPTR(3)
+__MAKE_TAGPTR(4)
+
+#undef __MAKE_TAGPTR
+
+extern void __compiletime_error("bad tagptr tags")
+	__bad_tagptr_tags(void);
+
+extern void __compiletime_error("bad tagptr type")
+	__bad_tagptr_type(void);
+
+/* fix the broken usage of "#define tagptr2_t tagptr3_t" by users */
+#define __tagptr_mask_1(ptr, n)	\
+	__builtin_types_compatible_p(typeof(ptr), struct __tagptr##n) ? \
+		(1UL << (n)) - 1 :
+
+#define __tagptr_mask(ptr)	(\
+	__tagptr_mask_1(ptr, 1) ( \
+	__tagptr_mask_1(ptr, 2) ( \
+	__tagptr_mask_1(ptr, 3) ( \
+	__tagptr_mask_1(ptr, 4) ( \
+	__bad_tagptr_type(), 0)))))
+
+/* generate a tagged pointer from a raw value */
+#define tagptr_init(type, val) \
+	((typeof(type)){ .v = (uintptr_t)(val) })
+
+/*
+ * directly cast a tagged pointer to the native pointer type, which
+ * could be used for backward compatibility of existing code.
+ */
+#define tagptr_cast_ptr(tptr) ((void *)(tptr).v)
+
+/* encode tagged pointers */
+#define tagptr_fold(type, ptr, _tags) ({ \
+	const typeof(_tags) tags = (_tags); \
+	if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(type))) \
+		__bad_tagptr_tags(); \
+tagptr_init(type, (uintptr_t)(ptr) | tags); })
+
+/* decode tagged pointers */
+#define tagptr_unfold_ptr(tptr) \
+	((void *)((tptr).v & ~__tagptr_mask(tptr)))
+
+#define tagptr_unfold_tags(tptr) \
+	((tptr).v & __tagptr_mask(tptr))
+
+/* operations for the tagger pointer */
+#define tagptr_eq(_tptr1, _tptr2) ({ \
+	typeof(_tptr1) tptr1 = (_tptr1); \
+	typeof(_tptr2) tptr2 = (_tptr2); \
+	(void)(&tptr1 == &tptr2); \
+(tptr1).v == (tptr2).v; })
+
+/* lock-free CAS operation */
+#define tagptr_cmpxchg(_ptptr, _o, _n) ({ \
+	typeof(_ptptr) ptptr = (_ptptr); \
+	typeof(_o) o = (_o); \
+	typeof(_n) n = (_n); \
+	(void)(&o == &n); \
+	(void)(&o == ptptr); \
+tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); })
+
+/* wrap WRITE_ONCE if atomic update is needed */
+#define tagptr_replace_tags(_ptptr, tags) ({ \
+	typeof(_ptptr) ptptr = (_ptptr); \
+	*ptptr = tagptr_fold(*ptptr, tagptr_unfold_ptr(*ptptr), tags); \
+*ptptr; })
+
+#define tagptr_set_tags(_ptptr, _tags) ({ \
+	typeof(_ptptr) ptptr = (_ptptr); \
+	const typeof(_tags) tags = (_tags); \
+	if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
+		__bad_tagptr_tags(); \
+	ptptr->v |= tags; \
+*ptptr; })
+
+#define tagptr_clear_tags(_ptptr, _tags) ({ \
+	typeof(_ptptr) ptptr = (_ptptr); \
+	const typeof(_tags) tags = (_tags); \
+	if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
+		__bad_tagptr_tags(); \
+	ptptr->v &= ~tags; \
+*ptptr; })
+
+#endif
+
diff --git a/drivers/staging/erofs/include/trace/events/erofs.h b/drivers/staging/erofs/include/trace/events/erofs.h
new file mode 100644
index 000000000000..5aead93a762f
--- /dev/null
+++ b/drivers/staging/erofs/include/trace/events/erofs.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM erofs
+
+#if !defined(_TRACE_EROFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EROFS_H
+
+#include <linux/tracepoint.h>
+
+#define show_dev(dev)		MAJOR(dev), MINOR(dev)
+#define show_dev_nid(entry)	show_dev(entry->dev), entry->nid
+
+#define show_file_type(type)						\
+	__print_symbolic(type,						\
+		{ 0,		"FILE" },				\
+		{ 1,		"DIR" })
+
+#define show_map_flags(flags) __print_flags(flags, "|",	\
+	{ EROFS_GET_BLOCKS_RAW,	"RAW" })
+
+#define show_mflags(flags) __print_flags(flags, "",	\
+	{ EROFS_MAP_MAPPED,	"M" },			\
+	{ EROFS_MAP_META,	"I" },			\
+	{ EROFS_MAP_ZIPPED,	"Z" })
+
+TRACE_EVENT(erofs_lookup,
+
+	TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
+
+	TP_ARGS(dir, dentry, flags),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev	)
+		__field(erofs_nid_t,	nid	)
+		__field(const char *,	name	)
+		__field(unsigned int,	flags	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= dir->i_sb->s_dev;
+		__entry->nid	= EROFS_V(dir)->nid;
+		__entry->name	= dentry->d_name.name;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x",
+		show_dev_nid(__entry),
+		__entry->name,
+		__entry->flags)
+);
+
+TRACE_EVENT(erofs_fill_inode,
+	TP_PROTO(struct inode *inode, int isdir),
+	TP_ARGS(inode, isdir),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev	)
+		__field(erofs_nid_t,	nid	)
+		__field(erofs_blk_t,	blkaddr )
+		__field(unsigned int,	ofs	)
+		__field(int,		isdir	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->nid		= EROFS_V(inode)->nid;
+		__entry->blkaddr	= erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid));
+		__entry->ofs		= erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid));
+		__entry->isdir		= isdir;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d",
+		  show_dev_nid(__entry),
+		  __entry->blkaddr, __entry->ofs,
+		  __entry->isdir)
+);
+
+TRACE_EVENT(erofs_readpage,
+
+	TP_PROTO(struct page *page, bool raw),
+
+	TP_ARGS(page, raw),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev	)
+		__field(erofs_nid_t,    nid     )
+		__field(int,		dir	)
+		__field(pgoff_t,	index	)
+		__field(int,		uptodate)
+		__field(bool,		raw	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= page->mapping->host->i_sb->s_dev;
+		__entry->nid	= EROFS_V(page->mapping->host)->nid;
+		__entry->dir	= S_ISDIR(page->mapping->host->i_mode);
+		__entry->index	= page->index;
+		__entry->uptodate = PageUptodate(page);
+		__entry->raw = raw;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, %s, index = %lu, uptodate = %d "
+		"raw = %d",
+		show_dev_nid(__entry),
+		show_file_type(__entry->dir),
+		(unsigned long)__entry->index,
+		__entry->uptodate,
+		__entry->raw)
+);
+
+TRACE_EVENT(erofs_readpages,
+
+	TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage,
+		bool raw),
+
+	TP_ARGS(inode, page, nrpage, raw),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev	)
+		__field(erofs_nid_t,	nid	)
+		__field(pgoff_t,	start	)
+		__field(unsigned int,	nrpage	)
+		__field(bool,		raw	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->nid	= EROFS_V(inode)->nid;
+		__entry->start	= page->index;
+		__entry->nrpage	= nrpage;
+		__entry->raw	= raw;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, start = %lu nrpage = %u raw = %d",
+		show_dev_nid(__entry),
+		(unsigned long)__entry->start,
+		__entry->nrpage,
+		__entry->raw)
+);
+
+DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned int flags),
+
+	TP_ARGS(inode, map, flags),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	erofs_nid_t,	nid		)
+		__field(	erofs_off_t,	la		)
+		__field(	u64,		llen		)
+		__field(	unsigned int,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev    = inode->i_sb->s_dev;
+		__entry->nid    = EROFS_V(inode)->nid;
+		__entry->la	= map->m_la;
+		__entry->llen	= map->m_llen;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, la %llu llen %llu flags %s",
+		  show_dev_nid(__entry),
+		  __entry->la, __entry->llen, show_map_flags(__entry->flags))
+);
+
+DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned flags),
+
+	TP_ARGS(inode, map, flags)
+);
+
+DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned int flags, int ret),
+
+	TP_ARGS(inode, map, flags, ret),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	erofs_nid_t,	nid		)
+		__field(        unsigned int,   flags           )
+		__field(	erofs_off_t,	la		)
+		__field(	erofs_off_t,	pa		)
+		__field(	u64,		llen		)
+		__field(	u64,		plen		)
+		__field(        unsigned int,	mflags		)
+		__field(	int,		ret		)
+	),
+
+	TP_fast_assign(
+		__entry->dev    = inode->i_sb->s_dev;
+		__entry->nid    = EROFS_V(inode)->nid;
+		__entry->flags	= flags;
+		__entry->la	= map->m_la;
+		__entry->pa	= map->m_pa;
+		__entry->llen	= map->m_llen;
+		__entry->plen	= map->m_plen;
+		__entry->mflags	= map->m_flags;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, flags %s "
+		  "la %llu pa %llu llen %llu plen %llu mflags %s ret %d",
+		  show_dev_nid(__entry), show_map_flags(__entry->flags),
+		  __entry->la, __entry->pa, __entry->llen, __entry->plen,
+		  show_mflags(__entry->mflags), __entry->ret)
+);
+
+DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned flags, int ret),
+
+	TP_ARGS(inode, map, flags, ret)
+);
+
+TRACE_EVENT(erofs_destroy_inode,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	erofs_nid_t,	nid		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->nid	= EROFS_V(inode)->nid;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry))
+);
+
+#endif /* _TRACE_EROFS_H */
+
+ /* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
new file mode 100644
index 000000000000..fbf6ff25cd1b
--- /dev/null
+++ b/drivers/staging/erofs/inode.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/inode.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "xattr.h"
+
+#include <trace/events/erofs.h>
+
+/* no locking */
+static int read_inode(struct inode *inode, void *data)
+{
+	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_inode_v1 *v1 = data;
+	const unsigned advise = le16_to_cpu(v1->i_advise);
+
+	vi->data_mapping_mode = __inode_data_mapping(advise);
+
+	if (unlikely(vi->data_mapping_mode >= EROFS_INODE_LAYOUT_MAX)) {
+		errln("unknown data mapping mode %u of nid %llu",
+			vi->data_mapping_mode, vi->nid);
+		DBG_BUGON(1);
+		return -EIO;
+	}
+
+	if (__inode_version(advise) == EROFS_INODE_LAYOUT_V2) {
+		struct erofs_inode_v2 *v2 = data;
+
+		vi->inode_isize = sizeof(struct erofs_inode_v2);
+		vi->xattr_isize = ondisk_xattr_ibody_size(v2->i_xattr_icount);
+
+		inode->i_mode = le16_to_cpu(v2->i_mode);
+		if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+						S_ISLNK(inode->i_mode)) {
+			vi->raw_blkaddr = le32_to_cpu(v2->i_u.raw_blkaddr);
+		} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+			inode->i_rdev =
+				new_decode_dev(le32_to_cpu(v2->i_u.rdev));
+		} else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+			inode->i_rdev = 0;
+		} else {
+			return -EIO;
+		}
+
+		i_uid_write(inode, le32_to_cpu(v2->i_uid));
+		i_gid_write(inode, le32_to_cpu(v2->i_gid));
+		set_nlink(inode, le32_to_cpu(v2->i_nlink));
+
+		/* ns timestamp */
+		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
+			le64_to_cpu(v2->i_ctime);
+		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
+			le32_to_cpu(v2->i_ctime_nsec);
+
+		inode->i_size = le64_to_cpu(v2->i_size);
+	} else if (__inode_version(advise) == EROFS_INODE_LAYOUT_V1) {
+		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+
+		vi->inode_isize = sizeof(struct erofs_inode_v1);
+		vi->xattr_isize = ondisk_xattr_ibody_size(v1->i_xattr_icount);
+
+		inode->i_mode = le16_to_cpu(v1->i_mode);
+		if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+						S_ISLNK(inode->i_mode)) {
+			vi->raw_blkaddr = le32_to_cpu(v1->i_u.raw_blkaddr);
+		} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+			inode->i_rdev =
+				new_decode_dev(le32_to_cpu(v1->i_u.rdev));
+		} else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+			inode->i_rdev = 0;
+		} else {
+			return -EIO;
+		}
+
+		i_uid_write(inode, le16_to_cpu(v1->i_uid));
+		i_gid_write(inode, le16_to_cpu(v1->i_gid));
+		set_nlink(inode, le16_to_cpu(v1->i_nlink));
+
+		/* use build time to derive all file time */
+		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
+			sbi->build_time;
+		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
+			sbi->build_time_nsec;
+
+		inode->i_size = le32_to_cpu(v1->i_size);
+	} else {
+		errln("unsupported on-disk inode version %u of nid %llu",
+			__inode_version(advise), vi->nid);
+		DBG_BUGON(1);
+		return -EIO;
+	}
+
+	/* measure inode.i_blocks as the generic filesystem */
+	inode->i_blocks = ((inode->i_size - 1) >> 9) + 1;
+	return 0;
+}
+
+/*
+ * try_lock can be required since locking order is:
+ *   file data(fs_inode)
+ *        meta(bd_inode)
+ * but the majority of the callers is "iget",
+ * in that case we are pretty sure no deadlock since
+ * no data operations exist. However I tend to
+ * try_lock since it takes no much overhead and
+ * will success immediately.
+ */
+static int fill_inline_data(struct inode *inode, void *data, unsigned m_pofs)
+{
+	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+	int mode = vi->data_mapping_mode;
+
+	DBG_BUGON(mode >= EROFS_INODE_LAYOUT_MAX);
+
+	/* should be inode inline C */
+	if (mode != EROFS_INODE_LAYOUT_INLINE)
+		return 0;
+
+	/* fast symlink (following ext4) */
+	if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) {
+		char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL);
+
+		if (unlikely(lnk == NULL))
+			return -ENOMEM;
+
+		m_pofs += vi->inode_isize + vi->xattr_isize;
+		BUG_ON(m_pofs + inode->i_size > PAGE_SIZE);
+
+		/* get in-page inline data */
+		memcpy(lnk, data + m_pofs, inode->i_size);
+		lnk[inode->i_size] = '\0';
+
+		inode->i_link = lnk;
+		set_inode_fast_symlink(inode);
+	}
+	return -EAGAIN;
+}
+
+static int fill_inode(struct inode *inode, int isdir)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+	struct erofs_vnode *vi = EROFS_V(inode);
+	struct page *page;
+	void *data;
+	int err;
+	erofs_blk_t blkaddr;
+	unsigned ofs;
+
+	trace_erofs_fill_inode(inode, isdir);
+
+	blkaddr = erofs_blknr(iloc(sbi, vi->nid));
+	ofs = erofs_blkoff(iloc(sbi, vi->nid));
+
+	debugln("%s, reading inode nid %llu at %u of blkaddr %u",
+		__func__, vi->nid, ofs, blkaddr);
+
+	page = erofs_get_meta_page(inode->i_sb, blkaddr, isdir);
+
+	if (IS_ERR(page)) {
+		errln("failed to get inode (nid: %llu) page, err %ld",
+			vi->nid, PTR_ERR(page));
+		return PTR_ERR(page);
+	}
+
+	BUG_ON(!PageUptodate(page));
+	data = page_address(page);
+
+	err = read_inode(inode, data + ofs);
+	if (!err) {
+		/* setup the new inode */
+		if (S_ISREG(inode->i_mode)) {
+#ifdef CONFIG_EROFS_FS_XATTR
+			if (vi->xattr_isize)
+				inode->i_op = &erofs_generic_xattr_iops;
+#endif
+			inode->i_fop = &generic_ro_fops;
+		} else if (S_ISDIR(inode->i_mode)) {
+			inode->i_op =
+#ifdef CONFIG_EROFS_FS_XATTR
+				vi->xattr_isize ? &erofs_dir_xattr_iops :
+#endif
+				&erofs_dir_iops;
+			inode->i_fop = &erofs_dir_fops;
+		} else if (S_ISLNK(inode->i_mode)) {
+			/* by default, page_get_link is used for symlink */
+			inode->i_op =
+#ifdef CONFIG_EROFS_FS_XATTR
+				&erofs_symlink_xattr_iops,
+#else
+				&page_symlink_inode_operations;
+#endif
+			inode_nohighmem(inode);
+		} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+			S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+#ifdef CONFIG_EROFS_FS_XATTR
+			inode->i_op = &erofs_special_inode_operations;
+#endif
+			init_special_inode(inode, inode->i_mode, inode->i_rdev);
+		} else {
+			err = -EIO;
+			goto out_unlock;
+		}
+
+		if (is_inode_layout_compression(inode)) {
+#ifdef CONFIG_EROFS_FS_ZIP
+			inode->i_mapping->a_ops =
+				&z_erofs_vle_normalaccess_aops;
+#else
+			err = -ENOTSUPP;
+#endif
+			goto out_unlock;
+		}
+
+		inode->i_mapping->a_ops = &erofs_raw_access_aops;
+
+		/* fill last page if inline data is available */
+		fill_inline_data(inode, data, ofs);
+	}
+
+out_unlock:
+	unlock_page(page);
+	put_page(page);
+	return err;
+}
+
+struct inode *erofs_iget(struct super_block *sb,
+	erofs_nid_t nid, bool isdir)
+{
+	struct inode *inode = iget_locked(sb, nid);
+
+	if (unlikely(inode == NULL))
+		return ERR_PTR(-ENOMEM);
+
+	if (inode->i_state & I_NEW) {
+		int err;
+		struct erofs_vnode *vi = EROFS_V(inode);
+		vi->nid = nid;
+
+		err = fill_inode(inode, isdir);
+		if (likely(!err))
+			unlock_new_inode(inode);
+		else {
+			iget_failed(inode);
+			inode = ERR_PTR(err);
+		}
+	}
+	return inode;
+}
+
+#ifdef CONFIG_EROFS_FS_XATTR
+const struct inode_operations erofs_generic_xattr_iops = {
+	.listxattr = erofs_listxattr,
+};
+#endif
+
+#ifdef CONFIG_EROFS_FS_XATTR
+const struct inode_operations erofs_symlink_xattr_iops = {
+	.get_link = page_get_link,
+	.listxattr = erofs_listxattr,
+};
+#endif
+
+const struct inode_operations erofs_special_inode_operations = {
+#ifdef CONFIG_EROFS_FS_XATTR
+	.listxattr = erofs_listxattr,
+#endif
+};
+
+#ifdef CONFIG_EROFS_FS_XATTR
+const struct inode_operations erofs_fast_symlink_xattr_iops = {
+	.get_link = simple_get_link,
+	.listxattr = erofs_listxattr,
+};
+#endif
+
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
new file mode 100644
index 000000000000..367b39fe46e5
--- /dev/null
+++ b/drivers/staging/erofs/internal.h
@@ -0,0 +1,556 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * linux/drivers/staging/erofs/internal.h
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#ifndef __INTERNAL_H
+#define __INTERNAL_H
+
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/buffer_head.h>
+#include <linux/cleancache.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "erofs_fs.h"
+
+/* redefine pr_fmt "erofs: " */
+#undef pr_fmt
+#define pr_fmt(fmt) "erofs: " fmt
+
+#define errln(x, ...)   pr_err(x "\n", ##__VA_ARGS__)
+#define infoln(x, ...)  pr_info(x "\n", ##__VA_ARGS__)
+#ifdef CONFIG_EROFS_FS_DEBUG
+#define debugln(x, ...) pr_debug(x "\n", ##__VA_ARGS__)
+
+#define dbg_might_sleep         might_sleep
+#define DBG_BUGON               BUG_ON
+#else
+#define debugln(x, ...)         ((void)0)
+
+#define dbg_might_sleep()       ((void)0)
+#define DBG_BUGON(...)          ((void)0)
+#endif
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+enum {
+	FAULT_KMALLOC,
+	FAULT_MAX,
+};
+
+extern char *erofs_fault_name[FAULT_MAX];
+#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
+
+struct erofs_fault_info {
+	atomic_t inject_ops;
+	unsigned int inject_rate;
+	unsigned int inject_type;
+};
+#endif
+
+#ifdef CONFIG_EROFS_FS_ZIP_CACHE_BIPOLAR
+#define EROFS_FS_ZIP_CACHE_LVL	(2)
+#elif defined(EROFS_FS_ZIP_CACHE_UNIPOLAR)
+#define EROFS_FS_ZIP_CACHE_LVL	(1)
+#else
+#define EROFS_FS_ZIP_CACHE_LVL	(0)
+#endif
+
+#if (!defined(EROFS_FS_HAS_MANAGED_CACHE) && (EROFS_FS_ZIP_CACHE_LVL > 0))
+#define EROFS_FS_HAS_MANAGED_CACHE
+#endif
+
+/* EROFS_SUPER_MAGIC_V1 to represent the whole file system */
+#define EROFS_SUPER_MAGIC   EROFS_SUPER_MAGIC_V1
+
+typedef u64 erofs_nid_t;
+
+struct erofs_sb_info {
+	/* list for all registered superblocks, mainly for shrinker */
+	struct list_head list;
+	struct mutex umount_mutex;
+
+	u32 blocks;
+	u32 meta_blkaddr;
+#ifdef CONFIG_EROFS_FS_XATTR
+	u32 xattr_blkaddr;
+#endif
+
+	/* inode slot unit size in bit shift */
+	unsigned char islotbits;
+#ifdef CONFIG_EROFS_FS_ZIP
+	/* cluster size in bit shift */
+	unsigned char clusterbits;
+
+	/* the dedicated workstation for compression */
+	struct radix_tree_root workstn_tree;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	struct inode *managed_cache;
+#endif
+
+#endif
+
+	u32 build_time_nsec;
+	u64 build_time;
+
+	/* what we really care is nid, rather than ino.. */
+	erofs_nid_t root_nid;
+	/* used for statfs, f_files - f_favail */
+	u64 inos;
+
+	u8 uuid[16];                    /* 128-bit uuid for volume */
+	u8 volume_name[16];             /* volume name */
+	char *dev_name;
+
+	unsigned int mount_opt;
+	unsigned int shrinker_run_no;
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+	struct erofs_fault_info fault_info;	/* For fault injection */
+#endif
+};
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+#define erofs_show_injection_info(type)					\
+	infoln("inject %s in %s of %pS", erofs_fault_name[type],        \
+		__func__, __builtin_return_address(0))
+
+static inline bool time_to_inject(struct erofs_sb_info *sbi, int type)
+{
+	struct erofs_fault_info *ffi = &sbi->fault_info;
+
+	if (!ffi->inject_rate)
+		return false;
+
+	if (!IS_FAULT_SET(ffi, type))
+		return false;
+
+	atomic_inc(&ffi->inject_ops);
+	if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
+		atomic_set(&ffi->inject_ops, 0);
+		return true;
+	}
+	return false;
+}
+#endif
+
+static inline void *erofs_kmalloc(struct erofs_sb_info *sbi,
+					size_t size, gfp_t flags)
+{
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+	if (time_to_inject(sbi, FAULT_KMALLOC)) {
+		erofs_show_injection_info(FAULT_KMALLOC);
+		return NULL;
+	}
+#endif
+	return kmalloc(size, flags);
+}
+
+#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
+#define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info)
+
+/* Mount flags set via mount options or defaults */
+#define EROFS_MOUNT_XATTR_USER		0x00000010
+#define EROFS_MOUNT_POSIX_ACL		0x00000020
+#define EROFS_MOUNT_FAULT_INJECTION	0x00000040
+
+#define clear_opt(sbi, option)	((sbi)->mount_opt &= ~EROFS_MOUNT_##option)
+#define set_opt(sbi, option)	((sbi)->mount_opt |= EROFS_MOUNT_##option)
+#define test_opt(sbi, option)	((sbi)->mount_opt & EROFS_MOUNT_##option)
+
+#ifdef CONFIG_EROFS_FS_ZIP
+#define erofs_workstn_lock(sbi)         xa_lock(&(sbi)->workstn_tree)
+#define erofs_workstn_unlock(sbi)       xa_unlock(&(sbi)->workstn_tree)
+
+/* basic unit of the workstation of a super_block */
+struct erofs_workgroup {
+	/* the workgroup index in the workstation */
+	pgoff_t index;
+
+	/* overall workgroup reference count */
+	atomic_t refcount;
+};
+
+#define EROFS_LOCKED_MAGIC     (INT_MIN | 0xE0F510CCL)
+
+static inline bool erofs_workgroup_try_to_freeze(
+	struct erofs_workgroup *grp, int v)
+{
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+	if (v != atomic_cmpxchg(&grp->refcount,
+		v, EROFS_LOCKED_MAGIC))
+		return false;
+	preempt_disable();
+#else
+	preempt_disable();
+	if (atomic_read(&grp->refcount) != v) {
+		preempt_enable();
+		return false;
+	}
+#endif
+	return true;
+}
+
+static inline void erofs_workgroup_unfreeze(
+	struct erofs_workgroup *grp, int v)
+{
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+	atomic_set(&grp->refcount, v);
+#endif
+	preempt_enable();
+}
+
+static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt)
+{
+	const int locked = (int)EROFS_LOCKED_MAGIC;
+	int o;
+
+repeat:
+	o = atomic_read(&grp->refcount);
+
+	/* spin if it is temporarily locked at the reclaim path */
+	if (unlikely(o == locked)) {
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+		do
+			cpu_relax();
+		while (atomic_read(&grp->refcount) == locked);
+#endif
+		goto repeat;
+	}
+
+	if (unlikely(o <= 0))
+		return -1;
+
+	if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
+		goto repeat;
+
+	*ocnt = o;
+	return 0;
+}
+
+#define __erofs_workgroup_get(grp)	atomic_inc(&(grp)->refcount)
+
+extern int erofs_workgroup_put(struct erofs_workgroup *grp);
+
+extern struct erofs_workgroup *erofs_find_workgroup(
+	struct super_block *sb, pgoff_t index, bool *tag);
+
+extern int erofs_register_workgroup(struct super_block *sb,
+	struct erofs_workgroup *grp, bool tag);
+
+extern unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
+	unsigned long nr_shrink, bool cleanup);
+
+static inline void erofs_workstation_cleanup_all(struct super_block *sb)
+{
+	erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true);
+}
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+#define EROFS_UNALLOCATED_CACHED_PAGE	((void *)0x5F0EF00D)
+
+extern int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
+	struct erofs_workgroup *egrp);
+extern int erofs_try_to_free_cached_page(struct address_space *mapping,
+	struct page *page);
+#endif
+
+#endif
+
+/* we strictly follow PAGE_SIZE and no buffer head yet */
+#define LOG_BLOCK_SIZE		PAGE_SHIFT
+
+#undef LOG_SECTORS_PER_BLOCK
+#define LOG_SECTORS_PER_BLOCK	(PAGE_SHIFT - 9)
+
+#undef SECTORS_PER_BLOCK
+#define SECTORS_PER_BLOCK	(1 << SECTORS_PER_BLOCK)
+
+#define EROFS_BLKSIZ		(1 << LOG_BLOCK_SIZE)
+
+#if (EROFS_BLKSIZ % 4096 || !EROFS_BLKSIZ)
+#error erofs cannot be used in this platform
+#endif
+
+#define ROOT_NID(sb)		((sb)->root_nid)
+
+#ifdef CONFIG_EROFS_FS_ZIP
+/* hard limit of pages per compressed cluster */
+#define Z_EROFS_CLUSTER_MAX_PAGES       (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
+
+/* page count of a compressed cluster */
+#define erofs_clusterpages(sbi)         ((1 << (sbi)->clusterbits) / PAGE_SIZE)
+#endif
+
+typedef u64 erofs_off_t;
+
+/* data type for filesystem-wide blocks number */
+typedef u32 erofs_blk_t;
+
+#define erofs_blknr(addr)       ((addr) / EROFS_BLKSIZ)
+#define erofs_blkoff(addr)      ((addr) % EROFS_BLKSIZ)
+#define blknr_to_addr(nr)       ((erofs_off_t)(nr) * EROFS_BLKSIZ)
+
+static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
+{
+	return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits);
+}
+
+#define inode_set_inited_xattr(inode)   (EROFS_V(inode)->flags |= 1)
+#define inode_has_inited_xattr(inode)   (EROFS_V(inode)->flags & 1)
+
+struct erofs_vnode {
+	erofs_nid_t nid;
+	unsigned int flags;
+
+	unsigned char data_mapping_mode;
+	/* inline size in bytes */
+	unsigned char inode_isize;
+	unsigned short xattr_isize;
+
+	unsigned xattr_shared_count;
+	unsigned *xattr_shared_xattrs;
+
+	erofs_blk_t raw_blkaddr;
+
+	/* the corresponding vfs inode */
+	struct inode vfs_inode;
+};
+
+#define EROFS_V(ptr)	\
+	container_of(ptr, struct erofs_vnode, vfs_inode)
+
+#define __inode_advise(x, bit, bits) \
+	(((x) >> (bit)) & ((1 << (bits)) - 1))
+
+#define __inode_version(advise)	\
+	__inode_advise(advise, EROFS_I_VERSION_BIT,	\
+		EROFS_I_VERSION_BITS)
+
+#define __inode_data_mapping(advise)	\
+	__inode_advise(advise, EROFS_I_DATA_MAPPING_BIT,\
+		EROFS_I_DATA_MAPPING_BITS)
+
+static inline unsigned long inode_datablocks(struct inode *inode)
+{
+	/* since i_size cannot be changed */
+	return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+}
+
+static inline bool is_inode_layout_plain(struct inode *inode)
+{
+	return EROFS_V(inode)->data_mapping_mode == EROFS_INODE_LAYOUT_PLAIN;
+}
+
+static inline bool is_inode_layout_compression(struct inode *inode)
+{
+	return EROFS_V(inode)->data_mapping_mode ==
+					EROFS_INODE_LAYOUT_COMPRESSION;
+}
+
+static inline bool is_inode_layout_inline(struct inode *inode)
+{
+	return EROFS_V(inode)->data_mapping_mode == EROFS_INODE_LAYOUT_INLINE;
+}
+
+extern const struct super_operations erofs_sops;
+extern const struct inode_operations erofs_dir_iops;
+extern const struct file_operations erofs_dir_fops;
+
+extern const struct address_space_operations erofs_raw_access_aops;
+#ifdef CONFIG_EROFS_FS_ZIP
+extern const struct address_space_operations z_erofs_vle_normalaccess_aops;
+#endif
+
+/*
+ * Logical to physical block mapping, used by erofs_map_blocks()
+ *
+ * Different with other file systems, it is used for 2 access modes:
+ *
+ * 1) RAW access mode:
+ *
+ * Users pass a valid (m_lblk, m_lofs -- usually 0) pair,
+ * and get the valid m_pblk, m_pofs and the longest m_len(in bytes).
+ *
+ * Note that m_lblk in the RAW access mode refers to the number of
+ * the compressed ondisk block rather than the uncompressed
+ * in-memory block for the compressed file.
+ *
+ * m_pofs equals to m_lofs except for the inline data page.
+ *
+ * 2) Normal access mode:
+ *
+ * If the inode is not compressed, it has no difference with
+ * the RAW access mode. However, if the inode is compressed,
+ * users should pass a valid (m_lblk, m_lofs) pair, and get
+ * the needed m_pblk, m_pofs, m_len to get the compressed data
+ * and the updated m_lblk, m_lofs which indicates the start
+ * of the corresponding uncompressed data in the file.
+ */
+enum {
+	BH_Zipped = BH_PrivateStart,
+};
+
+/* Has a disk mapping */
+#define EROFS_MAP_MAPPED	(1 << BH_Mapped)
+/* Located in metadata (could be copied from bd_inode) */
+#define EROFS_MAP_META		(1 << BH_Meta)
+/* The extent has been compressed */
+#define EROFS_MAP_ZIPPED	(1 << BH_Zipped)
+
+struct erofs_map_blocks {
+	erofs_off_t m_pa, m_la;
+	u64 m_plen, m_llen;
+
+	unsigned int m_flags;
+};
+
+/* Flags used by erofs_map_blocks() */
+#define EROFS_GET_BLOCKS_RAW    0x0001
+
+/* data.c */
+static inline struct bio *prepare_bio(
+	struct super_block *sb,
+	erofs_blk_t blkaddr, unsigned nr_pages,
+	bio_end_io_t endio)
+{
+	gfp_t gfp = GFP_NOIO;
+	struct bio *bio = bio_alloc(gfp, nr_pages);
+
+	if (unlikely(bio == NULL) &&
+		(current->flags & PF_MEMALLOC)) {
+		do {
+			nr_pages /= 2;
+			if (unlikely(!nr_pages)) {
+				bio = bio_alloc(gfp | __GFP_NOFAIL, 1);
+				BUG_ON(bio == NULL);
+				break;
+			}
+			bio = bio_alloc(gfp, nr_pages);
+		} while (bio == NULL);
+	}
+
+	bio->bi_end_io = endio;
+	bio_set_dev(bio, sb->s_bdev);
+	bio->bi_iter.bi_sector = blkaddr << LOG_SECTORS_PER_BLOCK;
+	return bio;
+}
+
+static inline void __submit_bio(struct bio *bio, unsigned op, unsigned op_flags)
+{
+	bio_set_op_attrs(bio, op, op_flags);
+	submit_bio(bio);
+}
+
+extern struct page *erofs_get_meta_page(struct super_block *sb,
+	erofs_blk_t blkaddr, bool prio);
+extern int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int);
+extern int erofs_map_blocks_iter(struct inode *, struct erofs_map_blocks *,
+	struct page **, int);
+
+struct erofs_map_blocks_iter {
+	struct erofs_map_blocks map;
+	struct page *mpage;
+};
+
+
+static inline struct page *erofs_get_inline_page(struct inode *inode,
+	erofs_blk_t blkaddr)
+{
+	return erofs_get_meta_page(inode->i_sb,
+		blkaddr, S_ISDIR(inode->i_mode));
+}
+
+/* inode.c */
+extern struct inode *erofs_iget(struct super_block *sb,
+	erofs_nid_t nid, bool dir);
+
+/* dir.c */
+int erofs_namei(struct inode *dir, struct qstr *name,
+	erofs_nid_t *nid, unsigned *d_type);
+
+/* xattr.c */
+#ifdef CONFIG_EROFS_FS_XATTR
+extern const struct xattr_handler *erofs_xattr_handlers[];
+#endif
+
+/* symlink */
+#ifdef CONFIG_EROFS_FS_XATTR
+extern const struct inode_operations erofs_symlink_xattr_iops;
+extern const struct inode_operations erofs_fast_symlink_xattr_iops;
+extern const struct inode_operations erofs_special_inode_operations;
+#endif
+
+static inline void set_inode_fast_symlink(struct inode *inode)
+{
+#ifdef CONFIG_EROFS_FS_XATTR
+	inode->i_op = &erofs_fast_symlink_xattr_iops;
+#else
+	inode->i_op = &simple_symlink_inode_operations;
+#endif
+}
+
+static inline bool is_inode_fast_symlink(struct inode *inode)
+{
+#ifdef CONFIG_EROFS_FS_XATTR
+	return inode->i_op == &erofs_fast_symlink_xattr_iops;
+#else
+	return inode->i_op == &simple_symlink_inode_operations;
+#endif
+}
+
+static inline void *erofs_vmap(struct page **pages, unsigned int count)
+{
+#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM
+	int i = 0;
+
+	while (1) {
+		void *addr = vm_map_ram(pages, count, -1, PAGE_KERNEL);
+		/* retry two more times (totally 3 times) */
+		if (addr != NULL || ++i >= 3)
+			return addr;
+		vm_unmap_aliases();
+	}
+	return NULL;
+#else
+	return vmap(pages, count, VM_MAP, PAGE_KERNEL);
+#endif
+}
+
+static inline void erofs_vunmap(const void *mem, unsigned int count)
+{
+#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM
+	vm_unmap_ram(mem, count);
+#else
+	vunmap(mem);
+#endif
+}
+
+/* utils.c */
+extern struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
+
+extern void erofs_register_super(struct super_block *sb);
+extern void erofs_unregister_super(struct super_block *sb);
+
+extern unsigned long erofs_shrink_count(struct shrinker *shrink,
+	struct shrink_control *sc);
+extern unsigned long erofs_shrink_scan(struct shrinker *shrink,
+	struct shrink_control *sc);
+
+#ifndef lru_to_page
+#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+#endif
+
+#endif
+
diff --git a/drivers/staging/erofs/lz4defs.h b/drivers/staging/erofs/lz4defs.h
new file mode 100644
index 000000000000..00a0b58a0871
--- /dev/null
+++ b/drivers/staging/erofs/lz4defs.h
@@ -0,0 +1,227 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
+/*
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
+ *
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+ */
+
+#include <asm/unaligned.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+#define FORCE_INLINE __always_inline
+
+/*-************************************
+ *	Basic Types
+ **************************************/
+#include <linux/types.h>
+
+typedef	uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef	int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+/*-************************************
+ *	Architecture specifics
+ **************************************/
+#if defined(CONFIG_64BIT)
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+#if defined(__LITTLE_ENDIAN)
+#define LZ4_LITTLE_ENDIAN 1
+#else
+#define LZ4_LITTLE_ENDIAN 0
+#endif
+
+/*-************************************
+ *	Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
+
+/* Increase this value ==> compression run slower on incompressible data */
+#define LZ4_SKIPTRIGGER 6
+
+#define HASH_UNIT sizeof(size_t)
+
+#define KB (1 << 10)
+#define MB (1 << 20)
+#define GB (1U << 30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS	4
+#define ML_MASK	((1U << ML_BITS) - 1)
+#define RUN_BITS (8 - ML_BITS)
+#define RUN_MASK ((1U << RUN_BITS) - 1)
+
+/*-************************************
+ *	Reading and writing into memory
+ **************************************/
+static FORCE_INLINE U16 LZ4_read16(const void *ptr)
+{
+	return get_unaligned((const U16 *)ptr);
+}
+
+static FORCE_INLINE U32 LZ4_read32(const void *ptr)
+{
+	return get_unaligned((const U32 *)ptr);
+}
+
+static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
+{
+	return get_unaligned((const size_t *)ptr);
+}
+
+static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
+{
+	put_unaligned(value, (U16 *)memPtr);
+}
+
+static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
+{
+	put_unaligned(value, (U32 *)memPtr);
+}
+
+static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
+{
+	return get_unaligned_le16(memPtr);
+}
+
+static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
+{
+	return put_unaligned_le16(value, memPtr);
+}
+
+static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
+{
+#if LZ4_ARCH64
+	U64 a = get_unaligned((const U64 *)src);
+
+	put_unaligned(a, (U64 *)dst);
+#else
+	U32 a = get_unaligned((const U32 *)src);
+	U32 b = get_unaligned((const U32 *)src + 1);
+
+	put_unaligned(a, (U32 *)dst);
+	put_unaligned(b, (U32 *)dst + 1);
+#endif
+}
+
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
+	const void *srcPtr, void *dstEnd)
+{
+	BYTE *d = (BYTE *)dstPtr;
+	const BYTE *s = (const BYTE *)srcPtr;
+	BYTE *const e = (BYTE *)dstEnd;
+
+	do {
+		LZ4_copy8(d, s);
+		d += 8;
+		s += 8;
+	} while (d < e);
+}
+
+static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
+{
+#if LZ4_LITTLE_ENDIAN
+	return __ffs(val) >> 3;
+#else
+	return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
+#endif
+}
+
+static FORCE_INLINE unsigned int LZ4_count(
+	const BYTE *pIn,
+	const BYTE *pMatch,
+	const BYTE *pInLimit)
+{
+	const BYTE *const pStart = pIn;
+
+	while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
+		size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+		if (!diff) {
+			pIn += STEPSIZE;
+			pMatch += STEPSIZE;
+			continue;
+		}
+
+		pIn += LZ4_NbCommonBytes(diff);
+
+		return (unsigned int)(pIn - pStart);
+	}
+
+#if LZ4_ARCH64
+	if ((pIn < (pInLimit - 3))
+		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+		pIn += 4;
+		pMatch += 4;
+	}
+#endif
+
+	if ((pIn < (pInLimit - 1))
+		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+		pIn += 2;
+		pMatch += 2;
+	}
+
+	if ((pIn < pInLimit) && (*pMatch == *pIn))
+		pIn++;
+
+	return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+#endif
diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
new file mode 100644
index 000000000000..546a47156101
--- /dev/null
+++ b/drivers/staging/erofs/namei.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/namei.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "internal.h"
+#include "xattr.h"
+
+#include <trace/events/erofs.h>
+
+/* based on the value of qn->len is accurate */
+static inline int dirnamecmp(struct qstr *qn,
+	struct qstr *qd, unsigned *matched)
+{
+	unsigned i = *matched, len = min(qn->len, qd->len);
+loop:
+	if (unlikely(i >= len)) {
+		*matched = i;
+		if (qn->len < qd->len) {
+			/*
+			 * actually (qn->len == qd->len)
+			 * when qd->name[i] == '\0'
+			 */
+			return qd->name[i] == '\0' ? 0 : -1;
+		}
+		return (qn->len > qd->len);
+	}
+
+	if (qn->name[i] != qd->name[i]) {
+		*matched = i;
+		return qn->name[i] > qd->name[i] ? 1 : -1;
+	}
+
+	++i;
+	goto loop;
+}
+
+static struct erofs_dirent *find_target_dirent(
+	struct qstr *name,
+	u8 *data, int maxsize)
+{
+	unsigned ndirents, head, back;
+	unsigned startprfx, endprfx;
+	struct erofs_dirent *const de = (struct erofs_dirent *)data;
+
+	/* make sure that maxsize is valid */
+	BUG_ON(maxsize < sizeof(struct erofs_dirent));
+
+	ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
+
+	/* corrupted dir (may be unnecessary...) */
+	BUG_ON(!ndirents);
+
+	head = 0;
+	back = ndirents - 1;
+	startprfx = endprfx = 0;
+
+	while (head <= back) {
+		unsigned mid = head + (back - head) / 2;
+		unsigned nameoff = le16_to_cpu(de[mid].nameoff);
+		unsigned matched = min(startprfx, endprfx);
+
+		struct qstr dname = QSTR_INIT(data + nameoff,
+			unlikely(mid >= ndirents - 1) ?
+				maxsize - nameoff :
+				le16_to_cpu(de[mid + 1].nameoff) - nameoff);
+
+		/* string comparison without already matched prefix */
+		int ret = dirnamecmp(name, &dname, &matched);
+
+		if (unlikely(!ret))
+			return de + mid;
+		else if (ret > 0) {
+			head = mid + 1;
+			startprfx = matched;
+		} else if (unlikely(mid < 1))	/* fix "mid" overflow */
+			break;
+		else {
+			back = mid - 1;
+			endprfx = matched;
+		}
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct page *find_target_block_classic(
+	struct inode *dir,
+	struct qstr *name, int *_diff)
+{
+	unsigned startprfx, endprfx;
+	unsigned head, back;
+	struct address_space *const mapping = dir->i_mapping;
+	struct page *candidate = ERR_PTR(-ENOENT);
+
+	startprfx = endprfx = 0;
+	head = 0;
+	back = inode_datablocks(dir) - 1;
+
+	while (head <= back) {
+		unsigned mid = head + (back - head) / 2;
+		struct page *page = read_mapping_page(mapping, mid, NULL);
+
+		if (IS_ERR(page)) {
+exact_out:
+			if (!IS_ERR(candidate)) /* valid candidate */
+				put_page(candidate);
+			return page;
+		} else {
+			int diff;
+			unsigned ndirents, matched;
+			struct qstr dname;
+			struct erofs_dirent *de = kmap_atomic(page);
+			unsigned nameoff = le16_to_cpu(de->nameoff);
+
+			ndirents = nameoff / sizeof(*de);
+
+			/* corrupted dir (should have one entry at least) */
+			BUG_ON(!ndirents || nameoff > PAGE_SIZE);
+
+			matched = min(startprfx, endprfx);
+
+			dname.name = (u8 *)de + nameoff;
+			dname.len = ndirents == 1 ?
+				/* since the rest of the last page is 0 */
+				EROFS_BLKSIZ - nameoff
+				: le16_to_cpu(de[1].nameoff) - nameoff;
+
+			/* string comparison without already matched prefix */
+			diff = dirnamecmp(name, &dname, &matched);
+			kunmap_atomic(de);
+
+			if (unlikely(!diff)) {
+				*_diff = 0;
+				goto exact_out;
+			} else if (diff > 0) {
+				head = mid + 1;
+				startprfx = matched;
+
+				if (likely(!IS_ERR(candidate)))
+					put_page(candidate);
+				candidate = page;
+			} else {
+				put_page(page);
+
+				if (unlikely(mid < 1))	/* fix "mid" overflow */
+					break;
+
+				back = mid - 1;
+				endprfx = matched;
+			}
+		}
+	}
+	*_diff = 1;
+	return candidate;
+}
+
+int erofs_namei(struct inode *dir,
+	struct qstr *name,
+	erofs_nid_t *nid, unsigned *d_type)
+{
+	int diff;
+	struct page *page;
+	u8 *data;
+	struct erofs_dirent *de;
+
+	if (unlikely(!dir->i_size))
+		return -ENOENT;
+
+	diff = 1;
+	page = find_target_block_classic(dir, name, &diff);
+
+	if (unlikely(IS_ERR(page)))
+		return PTR_ERR(page);
+
+	data = kmap_atomic(page);
+	/* the target page has been mapped */
+	de = likely(diff) ?
+		/* since the rest of the last page is 0 */
+		find_target_dirent(name, data, EROFS_BLKSIZ) :
+		(struct erofs_dirent *)data;
+
+	if (likely(!IS_ERR(de))) {
+		*nid = le64_to_cpu(de->nid);
+		*d_type = de->file_type;
+	}
+
+	kunmap_atomic(data);
+	put_page(page);
+
+	return PTR_ERR_OR_ZERO(de);
+}
+
+/* NOTE: i_mutex is already held by vfs */
+static struct dentry *erofs_lookup(struct inode *dir,
+	struct dentry *dentry, unsigned int flags)
+{
+	int err;
+	erofs_nid_t nid;
+	unsigned d_type;
+	struct inode *inode;
+
+	DBG_BUGON(!d_really_is_negative(dentry));
+	/* dentry must be unhashed in lookup, no need to worry about */
+	DBG_BUGON(!d_unhashed(dentry));
+
+	trace_erofs_lookup(dir, dentry, flags);
+
+	/* file name exceeds fs limit */
+	if (unlikely(dentry->d_name.len > EROFS_NAME_LEN))
+		return ERR_PTR(-ENAMETOOLONG);
+
+	/* false uninitialized warnings on gcc 4.8.x */
+	err = erofs_namei(dir, &dentry->d_name, &nid, &d_type);
+
+	if (err == -ENOENT) {
+		/* negative dentry */
+		inode = NULL;
+		goto negative_out;
+	} else if (unlikely(err))
+		return ERR_PTR(err);
+
+	debugln("%s, %s (nid %llu) found, d_type %u", __func__,
+		dentry->d_name.name, nid, d_type);
+
+	inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
+negative_out:
+	return d_splice_alias(inode, dentry);
+}
+
+const struct inode_operations erofs_dir_iops = {
+	.lookup = erofs_lookup,
+};
+
+const struct inode_operations erofs_dir_xattr_iops = {
+	.lookup = erofs_lookup,
+#ifdef CONFIG_EROFS_FS_XATTR
+	.listxattr = erofs_listxattr,
+#endif
+};
+
diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c
new file mode 100644
index 000000000000..1aec509c805f
--- /dev/null
+++ b/drivers/staging/erofs/super.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/super.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/statfs.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include "internal.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/erofs.h>
+
+static struct kmem_cache *erofs_inode_cachep __read_mostly;
+
+static void init_once(void *ptr)
+{
+	struct erofs_vnode *vi = ptr;
+
+	inode_init_once(&vi->vfs_inode);
+}
+
+static int erofs_init_inode_cache(void)
+{
+	erofs_inode_cachep = kmem_cache_create("erofs_inode",
+		sizeof(struct erofs_vnode), 0,
+		SLAB_RECLAIM_ACCOUNT, init_once);
+
+	return erofs_inode_cachep != NULL ? 0 : -ENOMEM;
+}
+
+static void erofs_exit_inode_cache(void)
+{
+	BUG_ON(erofs_inode_cachep == NULL);
+	kmem_cache_destroy(erofs_inode_cachep);
+}
+
+static struct inode *alloc_inode(struct super_block *sb)
+{
+	struct erofs_vnode *vi =
+		kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
+
+	if (vi == NULL)
+		return NULL;
+
+	/* zero out everything except vfs_inode */
+	memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode));
+	return &vi->vfs_inode;
+}
+
+static void i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct erofs_vnode *vi = EROFS_V(inode);
+
+	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
+	if (is_inode_fast_symlink(inode))
+		kfree(inode->i_link);
+
+	kfree(vi->xattr_shared_xattrs);
+
+	kmem_cache_free(erofs_inode_cachep, vi);
+}
+
+static void destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, i_callback);
+}
+
+static int superblock_read(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi;
+	struct buffer_head *bh;
+	struct erofs_super_block *layout;
+	unsigned blkszbits;
+	int ret;
+
+	bh = sb_bread(sb, 0);
+
+	if (bh == NULL) {
+		errln("cannot read erofs superblock");
+		return -EIO;
+	}
+
+	sbi = EROFS_SB(sb);
+	layout = (struct erofs_super_block *)((u8 *)bh->b_data
+		 + EROFS_SUPER_OFFSET);
+
+	ret = -EINVAL;
+	if (le32_to_cpu(layout->magic) != EROFS_SUPER_MAGIC_V1) {
+		errln("cannot find valid erofs superblock");
+		goto out;
+	}
+
+	blkszbits = layout->blkszbits;
+	/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
+	if (unlikely(blkszbits != LOG_BLOCK_SIZE)) {
+		errln("blksize %u isn't supported on this platform",
+			1 << blkszbits);
+		goto out;
+	}
+
+	sbi->blocks = le32_to_cpu(layout->blocks);
+	sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
+#ifdef CONFIG_EROFS_FS_XATTR
+	sbi->xattr_blkaddr = le32_to_cpu(layout->xattr_blkaddr);
+#endif
+	sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
+#ifdef CONFIG_EROFS_FS_ZIP
+	sbi->clusterbits = 12;
+
+	if (1 << (sbi->clusterbits - 12) > Z_EROFS_CLUSTER_MAX_PAGES)
+		errln("clusterbits %u is not supported on this kernel",
+			sbi->clusterbits);
+#endif
+
+	sbi->root_nid = le16_to_cpu(layout->root_nid);
+	sbi->inos = le64_to_cpu(layout->inos);
+
+	sbi->build_time = le64_to_cpu(layout->build_time);
+	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
+
+	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
+	memcpy(sbi->volume_name, layout->volume_name,
+		sizeof(layout->volume_name));
+
+	ret = 0;
+out:
+	brelse(bh);
+	return ret;
+}
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+char *erofs_fault_name[FAULT_MAX] = {
+	[FAULT_KMALLOC]		= "kmalloc",
+};
+
+static void erofs_build_fault_attr(struct erofs_sb_info *sbi,
+						unsigned int rate)
+{
+	struct erofs_fault_info *ffi = &sbi->fault_info;
+
+	if (rate) {
+		atomic_set(&ffi->inject_ops, 0);
+		ffi->inject_rate = rate;
+		ffi->inject_type = (1 << FAULT_MAX) - 1;
+	} else {
+		memset(ffi, 0, sizeof(struct erofs_fault_info));
+	}
+}
+#endif
+
+static void default_options(struct erofs_sb_info *sbi)
+{
+#ifdef CONFIG_EROFS_FS_XATTR
+	set_opt(sbi, XATTR_USER);
+#endif
+
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+	set_opt(sbi, POSIX_ACL);
+#endif
+}
+
+enum {
+	Opt_user_xattr,
+	Opt_nouser_xattr,
+	Opt_acl,
+	Opt_noacl,
+	Opt_fault_injection,
+	Opt_err
+};
+
+static match_table_t erofs_tokens = {
+	{Opt_user_xattr, "user_xattr"},
+	{Opt_nouser_xattr, "nouser_xattr"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
+	{Opt_fault_injection, "fault_injection=%u"},
+	{Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options)
+{
+	substring_t args[MAX_OPT_ARGS];
+	char *p;
+	int arg = 0;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		args[0].to = args[0].from = NULL;
+		token = match_token(p, erofs_tokens, args);
+
+		switch (token) {
+#ifdef CONFIG_EROFS_FS_XATTR
+		case Opt_user_xattr:
+			set_opt(EROFS_SB(sb), XATTR_USER);
+			break;
+		case Opt_nouser_xattr:
+			clear_opt(EROFS_SB(sb), XATTR_USER);
+			break;
+#else
+		case Opt_user_xattr:
+			infoln("user_xattr options not supported");
+			break;
+		case Opt_nouser_xattr:
+			infoln("nouser_xattr options not supported");
+			break;
+#endif
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+		case Opt_acl:
+			set_opt(EROFS_SB(sb), POSIX_ACL);
+			break;
+		case Opt_noacl:
+			clear_opt(EROFS_SB(sb), POSIX_ACL);
+			break;
+#else
+		case Opt_acl:
+			infoln("acl options not supported");
+			break;
+		case Opt_noacl:
+			infoln("noacl options not supported");
+			break;
+#endif
+		case Opt_fault_injection:
+			if (args->from && match_int(args, &arg))
+				return -EINVAL;
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+			erofs_build_fault_attr(EROFS_SB(sb), arg);
+			set_opt(EROFS_SB(sb), FAULT_INJECTION);
+#else
+			infoln("FAULT_INJECTION was not selected");
+#endif
+			break;
+		default:
+			errln("Unrecognized mount option \"%s\" "
+					"or missing value", p);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+
+static const struct address_space_operations managed_cache_aops;
+
+static int managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
+{
+	int ret = 1;	/* 0 - busy */
+	struct address_space *const mapping = page->mapping;
+
+	BUG_ON(!PageLocked(page));
+	BUG_ON(mapping->a_ops != &managed_cache_aops);
+
+	if (PagePrivate(page))
+		ret = erofs_try_to_free_cached_page(mapping, page);
+
+	return ret;
+}
+
+static void managed_cache_invalidatepage(struct page *page,
+	unsigned int offset, unsigned int length)
+{
+	const unsigned int stop = length + offset;
+
+	BUG_ON(!PageLocked(page));
+
+	/* Check for overflow */
+	BUG_ON(stop > PAGE_SIZE || stop < length);
+
+	if (offset == 0 && stop == PAGE_SIZE)
+		while (!managed_cache_releasepage(page, GFP_NOFS))
+			cond_resched();
+}
+
+static const struct address_space_operations managed_cache_aops = {
+	.releasepage = managed_cache_releasepage,
+	.invalidatepage = managed_cache_invalidatepage,
+};
+
+static struct inode *erofs_init_managed_cache(struct super_block *sb)
+{
+	struct inode *inode = new_inode(sb);
+
+	if (unlikely(inode == NULL))
+		return ERR_PTR(-ENOMEM);
+
+	set_nlink(inode, 1);
+	inode->i_size = OFFSET_MAX;
+
+	inode->i_mapping->a_ops = &managed_cache_aops;
+	mapping_set_gfp_mask(inode->i_mapping,
+			     GFP_NOFS | __GFP_HIGHMEM |
+			     __GFP_MOVABLE |  __GFP_NOFAIL);
+	return inode;
+}
+
+#endif
+
+static int erofs_read_super(struct super_block *sb,
+	const char *dev_name, void *data, int silent)
+{
+	struct inode *inode;
+	struct erofs_sb_info *sbi;
+	int err = -EINVAL;
+
+	infoln("read_super, device -> %s", dev_name);
+	infoln("options -> %s", (char *)data);
+
+	if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) {
+		errln("failed to set erofs blksize");
+		goto err;
+	}
+
+	sbi = kzalloc(sizeof(struct erofs_sb_info), GFP_KERNEL);
+	if (unlikely(sbi == NULL)) {
+		err = -ENOMEM;
+		goto err;
+	}
+	sb->s_fs_info = sbi;
+
+	err = superblock_read(sb);
+	if (err)
+		goto err_sbread;
+
+	sb->s_magic = EROFS_SUPER_MAGIC;
+	sb->s_flags |= MS_RDONLY | MS_NOATIME;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_time_gran = 1;
+
+	sb->s_op = &erofs_sops;
+
+#ifdef CONFIG_EROFS_FS_XATTR
+	sb->s_xattr = erofs_xattr_handlers;
+#endif
+
+	/* set erofs default mount options */
+	default_options(sbi);
+
+	err = parse_options(sb, data);
+	if (err)
+		goto err_parseopt;
+
+	if (!silent)
+		infoln("root inode @ nid %llu", ROOT_NID(sbi));
+
+#ifdef CONFIG_EROFS_FS_ZIP
+	INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
+#endif
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	sbi->managed_cache = erofs_init_managed_cache(sb);
+	if (IS_ERR(sbi->managed_cache)) {
+		err = PTR_ERR(sbi->managed_cache);
+		goto err_init_managed_cache;
+	}
+#endif
+
+	/* get the root inode */
+	inode = erofs_iget(sb, ROOT_NID(sbi), true);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto err_iget;
+	}
+
+	if (!S_ISDIR(inode->i_mode)) {
+		errln("rootino(nid %llu) is not a directory(i_mode %o)",
+			ROOT_NID(sbi), inode->i_mode);
+		err = -EINVAL;
+		goto err_isdir;
+	}
+
+	sb->s_root = d_make_root(inode);
+	if (sb->s_root == NULL) {
+		err = -ENOMEM;
+		goto err_makeroot;
+	}
+
+	/* save the device name to sbi */
+	sbi->dev_name = __getname();
+	if (sbi->dev_name == NULL) {
+		err = -ENOMEM;
+		goto err_devname;
+	}
+
+	snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name);
+	sbi->dev_name[PATH_MAX - 1] = '\0';
+
+	erofs_register_super(sb);
+
+	/*
+	 * We already have a positive dentry, which was instantiated
+	 * by d_make_root. Just need to d_rehash it.
+	 */
+	d_rehash(sb->s_root);
+
+	if (!silent)
+		infoln("mounted on %s with opts: %s.", dev_name,
+			(char *)data);
+	return 0;
+	/*
+	 * please add a label for each exit point and use
+	 * the following name convention, thus new features
+	 * can be integrated easily without renaming labels.
+	 */
+err_devname:
+	dput(sb->s_root);
+err_makeroot:
+err_isdir:
+	if (sb->s_root == NULL)
+		iput(inode);
+err_iget:
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	iput(sbi->managed_cache);
+err_init_managed_cache:
+#endif
+err_parseopt:
+err_sbread:
+	sb->s_fs_info = NULL;
+	kfree(sbi);
+err:
+	return err;
+}
+
+/*
+ * could be triggered after deactivate_locked_super()
+ * is called, thus including umount and failed to initialize.
+ */
+static void erofs_put_super(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+	/* for cases which are failed in "read_super" */
+	if (sbi == NULL)
+		return;
+
+	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
+
+	infoln("unmounted for %s", sbi->dev_name);
+	__putname(sbi->dev_name);
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	iput(sbi->managed_cache);
+#endif
+
+	mutex_lock(&sbi->umount_mutex);
+
+#ifdef CONFIG_EROFS_FS_ZIP
+	erofs_workstation_cleanup_all(sb);
+#endif
+
+	erofs_unregister_super(sb);
+	mutex_unlock(&sbi->umount_mutex);
+
+	kfree(sbi);
+	sb->s_fs_info = NULL;
+}
+
+
+struct erofs_mount_private {
+	const char *dev_name;
+	char *options;
+};
+
+/* support mount_bdev() with options */
+static int erofs_fill_super(struct super_block *sb,
+	void *_priv, int silent)
+{
+	struct erofs_mount_private *priv = _priv;
+
+	return erofs_read_super(sb, priv->dev_name,
+		priv->options, silent);
+}
+
+static struct dentry *erofs_mount(
+	struct file_system_type *fs_type, int flags,
+	const char *dev_name, void *data)
+{
+	struct erofs_mount_private priv = {
+		.dev_name = dev_name,
+		.options = data
+	};
+
+	return mount_bdev(fs_type, flags, dev_name,
+		&priv, erofs_fill_super);
+}
+
+static void erofs_kill_sb(struct super_block *sb)
+{
+	kill_block_super(sb);
+}
+
+static struct shrinker erofs_shrinker_info = {
+	.scan_objects = erofs_shrink_scan,
+	.count_objects = erofs_shrink_count,
+	.seeks = DEFAULT_SEEKS,
+};
+
+static struct file_system_type erofs_fs_type = {
+	.owner          = THIS_MODULE,
+	.name           = "erofs",
+	.mount          = erofs_mount,
+	.kill_sb        = erofs_kill_sb,
+	.fs_flags       = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("erofs");
+
+#ifdef CONFIG_EROFS_FS_ZIP
+extern int z_erofs_init_zip_subsystem(void);
+extern void z_erofs_exit_zip_subsystem(void);
+#endif
+
+static int __init erofs_module_init(void)
+{
+	int err;
+
+	erofs_check_ondisk_layout_definitions();
+	infoln("initializing erofs " EROFS_VERSION);
+
+	err = erofs_init_inode_cache();
+	if (err)
+		goto icache_err;
+
+	err = register_shrinker(&erofs_shrinker_info);
+	if (err)
+		goto shrinker_err;
+
+#ifdef CONFIG_EROFS_FS_ZIP
+	err = z_erofs_init_zip_subsystem();
+	if (err)
+		goto zip_err;
+#endif
+
+	err = register_filesystem(&erofs_fs_type);
+	if (err)
+		goto fs_err;
+
+	infoln("successfully to initialize erofs");
+	return 0;
+
+fs_err:
+#ifdef CONFIG_EROFS_FS_ZIP
+	z_erofs_exit_zip_subsystem();
+zip_err:
+#endif
+	unregister_shrinker(&erofs_shrinker_info);
+shrinker_err:
+	erofs_exit_inode_cache();
+icache_err:
+	return err;
+}
+
+static void __exit erofs_module_exit(void)
+{
+	unregister_filesystem(&erofs_fs_type);
+#ifdef CONFIG_EROFS_FS_ZIP
+	z_erofs_exit_zip_subsystem();
+#endif
+	unregister_shrinker(&erofs_shrinker_info);
+	erofs_exit_inode_cache();
+	infoln("successfully finalize erofs");
+}
+
+/* get filesystem statistics */
+static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+
+	buf->f_type = sb->s_magic;
+	buf->f_bsize = EROFS_BLKSIZ;
+	buf->f_blocks = sbi->blocks;
+	buf->f_bfree = buf->f_bavail = 0;
+
+	buf->f_files = ULLONG_MAX;
+	buf->f_ffree = ULLONG_MAX - sbi->inos;
+
+	buf->f_namelen = EROFS_NAME_LEN;
+
+	buf->f_fsid.val[0] = (u32)id;
+	buf->f_fsid.val[1] = (u32)(id >> 32);
+	return 0;
+}
+
+static int erofs_show_options(struct seq_file *seq, struct dentry *root)
+{
+	struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
+
+#ifdef CONFIG_EROFS_FS_XATTR
+	if (test_opt(sbi, XATTR_USER))
+		seq_puts(seq, ",user_xattr");
+	else
+		seq_puts(seq, ",nouser_xattr");
+#endif
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+	if (test_opt(sbi, POSIX_ACL))
+		seq_puts(seq, ",acl");
+	else
+		seq_puts(seq, ",noacl");
+#endif
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+	if (test_opt(sbi, FAULT_INJECTION))
+		seq_printf(seq, ",fault_injection=%u",
+				sbi->fault_info.inject_rate);
+#endif
+	return 0;
+}
+
+static int erofs_remount(struct super_block *sb, int *flags, char *data)
+{
+	BUG_ON(!sb_rdonly(sb));
+
+	*flags |= MS_RDONLY;
+	return 0;
+}
+
+const struct super_operations erofs_sops = {
+	.put_super = erofs_put_super,
+	.alloc_inode = alloc_inode,
+	.destroy_inode = destroy_inode,
+	.statfs = erofs_statfs,
+	.show_options = erofs_show_options,
+	.remount_fs = erofs_remount,
+};
+
+module_init(erofs_module_init);
+module_exit(erofs_module_exit);
+
+MODULE_DESCRIPTION("Enhanced ROM File System");
+MODULE_AUTHOR("Gao Xiang, Yu Chao, Miao Xie, CONSUMER BG, HUAWEI Inc.");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/staging/erofs/unzip_lz4.c b/drivers/staging/erofs/unzip_lz4.c
new file mode 100644
index 000000000000..b1ea23f66c4e
--- /dev/null
+++ b/drivers/staging/erofs/unzip_lz4.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * linux/drivers/staging/erofs/unzip_lz4.c
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * Original code taken from 'linux/lib/lz4/lz4_decompress.c'
+ */
+
+/*
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
+ *
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+ */
+#include "internal.h"
+#include <asm/unaligned.h>
+#include "lz4defs.h"
+
+/*
+ * no public solution to solve our requirement yet.
+ * see: <required buffer size for LZ4_decompress_safe_partial>
+ *      https://groups.google.com/forum/#!topic/lz4c/_3kkz5N6n00
+ */
+static FORCE_INLINE int customized_lz4_decompress_safe_partial(
+	const void * const source,
+	void * const dest,
+	int inputSize,
+	int outputSize)
+{
+	/* Local Variables */
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE * const iend = ip + inputSize;
+
+	BYTE *op = (BYTE *) dest;
+	BYTE * const oend = op + outputSize;
+	BYTE *cpy;
+
+	static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };
+	static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
+
+	/* Empty output buffer */
+	if (unlikely(outputSize == 0))
+		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+	/* Main Loop : decode sequences */
+	while (1) {
+		size_t length;
+		const BYTE *match;
+		size_t offset;
+
+		/* get literal length */
+		unsigned int const token = *ip++;
+
+		length = token>>ML_BITS;
+
+		if (length == RUN_MASK) {
+			unsigned int s;
+
+			do {
+				s = *ip++;
+				length += s;
+			} while ((ip < iend - RUN_MASK) & (s == 255));
+
+			if (unlikely((size_t)(op + length) < (size_t)(op))) {
+				/* overflow detection */
+				goto _output_error;
+			}
+			if (unlikely((size_t)(ip + length) < (size_t)(ip))) {
+				/* overflow detection */
+				goto _output_error;
+			}
+		}
+
+		/* copy literals */
+		cpy = op + length;
+		if ((cpy > oend - WILDCOPYLENGTH) ||
+			(ip + length > iend - (2 + 1 + LASTLITERALS))) {
+			if (cpy > oend) {
+				memcpy(op, ip, length = oend - op);
+				op += length;
+				break;
+			}
+
+			if (unlikely(ip + length > iend)) {
+				/*
+				 * Error :
+				 * read attempt beyond
+				 * end of input buffer
+				 */
+				goto _output_error;
+			}
+
+			memcpy(op, ip, length);
+			ip += length;
+			op += length;
+
+			if (ip > iend - 2)
+				break;
+			/* Necessarily EOF, due to parsing restrictions */
+			/* break; */
+		} else {
+			LZ4_wildCopy(op, ip, cpy);
+			ip += length;
+			op = cpy;
+		}
+
+		/* get offset */
+		offset = LZ4_readLE16(ip);
+		ip += 2;
+		match = op - offset;
+
+		if (unlikely(match < (const BYTE *)dest)) {
+			/* Error : offset outside buffers */
+			goto _output_error;
+		}
+
+		/* get matchlength */
+		length = token & ML_MASK;
+		if (length == ML_MASK) {
+			unsigned int s;
+
+			do {
+				s = *ip++;
+
+				if (ip > iend - LASTLITERALS)
+					goto _output_error;
+
+				length += s;
+			} while (s == 255);
+
+			if (unlikely((size_t)(op + length) < (size_t)op)) {
+				/* overflow detection */
+				goto _output_error;
+			}
+		}
+
+		length += MINMATCH;
+
+		/* copy match within block */
+		cpy = op + length;
+
+		if (unlikely(cpy >= oend - WILDCOPYLENGTH)) {
+			if (cpy >= oend) {
+				while (op < oend)
+					*op++ = *match++;
+				break;
+			}
+			goto __match;
+		}
+
+		/* costs ~1%; silence an msan warning when offset == 0 */
+		LZ4_write32(op, (U32)offset);
+
+		if (unlikely(offset < 8)) {
+			const int dec64 = dec64table[offset];
+
+			op[0] = match[0];
+			op[1] = match[1];
+			op[2] = match[2];
+			op[3] = match[3];
+			match += dec32table[offset];
+			memcpy(op + 4, match, 4);
+			match -= dec64;
+		} else {
+			LZ4_copy8(op, match);
+			match += 8;
+		}
+
+		op += 8;
+
+		if (unlikely(cpy > oend - 12)) {
+			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+			if (op < oCopyLimit) {
+				LZ4_wildCopy(op, match, oCopyLimit);
+				match += oCopyLimit - op;
+				op = oCopyLimit;
+			}
+__match:
+			while (op < cpy)
+				*op++ = *match++;
+		} else {
+			LZ4_copy8(op, match);
+
+			if (length > 16)
+				LZ4_wildCopy(op + 8, match + 8, cpy);
+		}
+
+		op = cpy; /* correction */
+	}
+	DBG_BUGON((void *)ip - source > inputSize);
+	DBG_BUGON((void *)op - dest > outputSize);
+
+	/* Nb of output bytes decoded */
+	return (int) ((void *)op - dest);
+
+	/* Overflow error detected */
+_output_error:
+	return -ERANGE;
+}
+
+int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen)
+{
+	int ret = customized_lz4_decompress_safe_partial(in,
+		out, inlen, outlen);
+
+	if (ret >= 0)
+		return ret;
+
+	/*
+	 * LZ4_decompress_safe will return an error code
+	 * (< 0) if decompression failed
+	 */
+	errln("%s, failed to decompress, in[%p, %zu] outlen[%p, %zu]",
+	      __func__, in, inlen, out, outlen);
+	WARN_ON(1);
+	print_hex_dump(KERN_DEBUG, "raw data [in]: ", DUMP_PREFIX_OFFSET,
+		16, 1, in, inlen, true);
+	print_hex_dump(KERN_DEBUG, "raw data [out]: ", DUMP_PREFIX_OFFSET,
+		16, 1, out, outlen, true);
+	return -EIO;
+}
+
diff --git a/drivers/staging/erofs/unzip_pagevec.h b/drivers/staging/erofs/unzip_pagevec.h
new file mode 100644
index 000000000000..0956615b86f7
--- /dev/null
+++ b/drivers/staging/erofs/unzip_pagevec.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * linux/drivers/staging/erofs/unzip_pagevec.h
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#ifndef __EROFS_UNZIP_PAGEVEC_H
+#define __EROFS_UNZIP_PAGEVEC_H
+
+#include <linux/tagptr.h>
+
+/* page type in pagevec for unzip subsystem */
+enum z_erofs_page_type {
+	/* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
+	Z_EROFS_PAGE_TYPE_EXCLUSIVE,
+
+	Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
+
+	Z_EROFS_VLE_PAGE_TYPE_HEAD,
+	Z_EROFS_VLE_PAGE_TYPE_MAX
+};
+
+extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
+	__bad_page_type_exclusive(void);
+
+/* pagevec tagged pointer */
+typedef tagptr2_t	erofs_vtptr_t;
+
+/* pagevec collector */
+struct z_erofs_pagevec_ctor {
+	struct page *curr, *next;
+	erofs_vtptr_t *pages;
+
+	unsigned int nr, index;
+};
+
+static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
+					     bool atomic)
+{
+	if (ctor->curr == NULL)
+		return;
+
+	if (atomic)
+		kunmap_atomic(ctor->pages);
+	else
+		kunmap(ctor->curr);
+}
+
+static inline struct page *
+z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
+			       unsigned nr)
+{
+	unsigned index;
+
+	/* keep away from occupied pages */
+	if (ctor->next != NULL)
+		return ctor->next;
+
+	for (index = 0; index < nr; ++index) {
+		const erofs_vtptr_t t = ctor->pages[index];
+		const unsigned tags = tagptr_unfold_tags(t);
+
+		if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
+			return tagptr_unfold_ptr(t);
+	}
+
+	if (unlikely(nr >= ctor->nr))
+		BUG();
+
+	return NULL;
+}
+
+static inline void
+z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
+			      bool atomic)
+{
+	struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
+
+	z_erofs_pagevec_ctor_exit(ctor, atomic);
+
+	ctor->curr = next;
+	ctor->next = NULL;
+	ctor->pages = atomic ?
+		kmap_atomic(ctor->curr) : kmap(ctor->curr);
+
+	ctor->nr = PAGE_SIZE / sizeof(struct page *);
+	ctor->index = 0;
+}
+
+static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
+					     unsigned nr,
+					     erofs_vtptr_t *pages, unsigned i)
+{
+	ctor->nr = nr;
+	ctor->curr = ctor->next = NULL;
+	ctor->pages = pages;
+
+	if (i >= nr) {
+		i -= nr;
+		z_erofs_pagevec_ctor_pagedown(ctor, false);
+		while (i > ctor->nr) {
+			i -= ctor->nr;
+			z_erofs_pagevec_ctor_pagedown(ctor, false);
+		}
+	}
+
+	ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
+	ctor->index = i;
+}
+
+static inline bool
+z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor,
+			     struct page *page,
+			     enum z_erofs_page_type type,
+			     bool *occupied)
+{
+	*occupied = false;
+	if (unlikely(ctor->next == NULL && type))
+		if (ctor->index + 1 == ctor->nr)
+			return false;
+
+	if (unlikely(ctor->index >= ctor->nr))
+		z_erofs_pagevec_ctor_pagedown(ctor, false);
+
+	/* exclusive page type must be 0 */
+	if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
+		__bad_page_type_exclusive();
+
+	/* should remind that collector->next never equal to 1, 2 */
+	if (type == (uintptr_t)ctor->next) {
+		ctor->next = page;
+		*occupied = true;
+	}
+
+	ctor->pages[ctor->index++] =
+		tagptr_fold(erofs_vtptr_t, page, type);
+	return true;
+}
+
+static inline struct page *
+z_erofs_pagevec_ctor_dequeue(struct z_erofs_pagevec_ctor *ctor,
+			     enum z_erofs_page_type *type)
+{
+	erofs_vtptr_t t;
+
+	if (unlikely(ctor->index >= ctor->nr)) {
+		BUG_ON(ctor->next == NULL);
+		z_erofs_pagevec_ctor_pagedown(ctor, true);
+	}
+
+	t = ctor->pages[ctor->index];
+
+	*type = tagptr_unfold_tags(t);
+
+	/* should remind that collector->next never equal to 1, 2 */
+	if (*type == (uintptr_t)ctor->next)
+		ctor->next = tagptr_unfold_ptr(t);
+
+	ctor->pages[ctor->index++] =
+		tagptr_fold(erofs_vtptr_t, NULL, 0);
+
+	return tagptr_unfold_ptr(t);
+}
+
+#endif
+
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
new file mode 100644
index 000000000000..8721f0a41d15
--- /dev/null
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -0,0 +1,1656 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/unzip_vle.c
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "unzip_vle.h"
+#include <linux/prefetch.h>
+
+static struct workqueue_struct *z_erofs_workqueue __read_mostly;
+static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly;
+
+void z_erofs_exit_zip_subsystem(void)
+{
+	BUG_ON(z_erofs_workqueue == NULL);
+	BUG_ON(z_erofs_workgroup_cachep == NULL);
+
+	destroy_workqueue(z_erofs_workqueue);
+	kmem_cache_destroy(z_erofs_workgroup_cachep);
+}
+
+static inline int init_unzip_workqueue(void)
+{
+	const unsigned onlinecpus = num_possible_cpus();
+
+	/*
+	 * we don't need too many threads, limiting threads
+	 * could improve scheduling performance.
+	 */
+	z_erofs_workqueue = alloc_workqueue("erofs_unzipd",
+		WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
+		onlinecpus + onlinecpus / 4);
+
+	return z_erofs_workqueue != NULL ? 0 : -ENOMEM;
+}
+
+int z_erofs_init_zip_subsystem(void)
+{
+	z_erofs_workgroup_cachep =
+		kmem_cache_create("erofs_compress",
+		Z_EROFS_WORKGROUP_SIZE, 0,
+		SLAB_RECLAIM_ACCOUNT, NULL);
+
+	if (z_erofs_workgroup_cachep != NULL) {
+		if (!init_unzip_workqueue())
+			return 0;
+
+		kmem_cache_destroy(z_erofs_workgroup_cachep);
+	}
+	return -ENOMEM;
+}
+
+enum z_erofs_vle_work_role {
+	Z_EROFS_VLE_WORK_SECONDARY,
+	Z_EROFS_VLE_WORK_PRIMARY,
+	/*
+	 * The current work has at least been linked with the following
+	 * processed chained works, which means if the processing page
+	 * is the tail partial page of the work, the current work can
+	 * safely use the whole page, as illustrated below:
+	 * +--------------+-------------------------------------------+
+	 * |  tail page   |      head page (of the previous work)     |
+	 * +--------------+-------------------------------------------+
+	 *   /\  which belongs to the current work
+	 * [  (*) this page can be used for the current work itself.  ]
+	 */
+	Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
+	Z_EROFS_VLE_WORK_MAX
+};
+
+struct z_erofs_vle_work_builder {
+	enum z_erofs_vle_work_role role;
+	/*
+	 * 'hosted = false' means that the current workgroup doesn't belong to
+	 * the owned chained workgroups. In the other words, it is none of our
+	 * business to submit this workgroup.
+	 */
+	bool hosted;
+
+	struct z_erofs_vle_workgroup *grp;
+	struct z_erofs_vle_work *work;
+	struct z_erofs_pagevec_ctor vector;
+
+	/* pages used for reading the compressed data */
+	struct page **compressed_pages;
+	unsigned compressed_deficit;
+};
+
+#define VLE_WORK_BUILDER_INIT()	\
+	{ .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED }
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+
+static bool grab_managed_cache_pages(struct address_space *mapping,
+				     erofs_blk_t start,
+				     struct page **compressed_pages,
+				     int clusterblks,
+				     bool reserve_allocation)
+{
+	bool noio = true;
+	unsigned int i;
+
+	/* TODO: optimize by introducing find_get_pages_range */
+	for (i = 0; i < clusterblks; ++i) {
+		struct page *page, *found;
+
+		if (READ_ONCE(compressed_pages[i]) != NULL)
+			continue;
+
+		page = found = find_get_page(mapping, start + i);
+		if (found == NULL) {
+			noio = false;
+			if (!reserve_allocation)
+				continue;
+			page = EROFS_UNALLOCATED_CACHED_PAGE;
+		}
+
+		if (NULL == cmpxchg(compressed_pages + i, NULL, page))
+			continue;
+
+		if (found != NULL)
+			put_page(found);
+	}
+	return noio;
+}
+
+/* called by erofs_shrinker to get rid of all compressed_pages */
+int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
+				       struct erofs_workgroup *egrp)
+{
+	struct z_erofs_vle_workgroup *const grp =
+		container_of(egrp, struct z_erofs_vle_workgroup, obj);
+	struct address_space *const mapping = sbi->managed_cache->i_mapping;
+	const int clusterpages = erofs_clusterpages(sbi);
+	int i;
+
+	/*
+	 * refcount of workgroup is now freezed as 1,
+	 * therefore no need to worry about available decompression users.
+	 */
+	for (i = 0; i < clusterpages; ++i) {
+		struct page *page = grp->compressed_pages[i];
+
+		if (page == NULL || page->mapping != mapping)
+			continue;
+
+		/* block other users from reclaiming or migrating the page */
+		if (!trylock_page(page))
+			return -EBUSY;
+
+		/* barrier is implied in the following 'unlock_page' */
+		WRITE_ONCE(grp->compressed_pages[i], NULL);
+
+		set_page_private(page, 0);
+		ClearPagePrivate(page);
+
+		unlock_page(page);
+		put_page(page);
+	}
+	return 0;
+}
+
+int erofs_try_to_free_cached_page(struct address_space *mapping,
+				  struct page *page)
+{
+	struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb);
+	const unsigned int clusterpages = erofs_clusterpages(sbi);
+
+	struct z_erofs_vle_workgroup *grp;
+	int ret = 0;	/* 0 - busy */
+
+	/* prevent the workgroup from being freed */
+	rcu_read_lock();
+	grp = (void *)page_private(page);
+
+	if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) {
+		unsigned int i;
+
+		for (i = 0; i < clusterpages; ++i) {
+			if (grp->compressed_pages[i] == page) {
+				WRITE_ONCE(grp->compressed_pages[i], NULL);
+				ret = 1;
+				break;
+			}
+		}
+		erofs_workgroup_unfreeze(&grp->obj, 1);
+	}
+	rcu_read_unlock();
+
+	if (ret) {
+		ClearPagePrivate(page);
+		put_page(page);
+	}
+	return ret;
+}
+#endif
+
+/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
+static inline bool try_to_reuse_as_compressed_page(
+	struct z_erofs_vle_work_builder *b,
+	struct page *page)
+{
+	while (b->compressed_deficit) {
+		--b->compressed_deficit;
+		if (NULL == cmpxchg(b->compressed_pages++, NULL, page))
+			return true;
+	}
+
+	return false;
+}
+
+/* callers must be with work->lock held */
+static int z_erofs_vle_work_add_page(
+	struct z_erofs_vle_work_builder *builder,
+	struct page *page,
+	enum z_erofs_page_type type)
+{
+	int ret;
+	bool occupied;
+
+	/* give priority for the compressed data storage */
+	if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY &&
+		type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
+		try_to_reuse_as_compressed_page(builder, page))
+		return 0;
+
+	ret = z_erofs_pagevec_ctor_enqueue(&builder->vector,
+		page, type, &occupied);
+	builder->work->vcnt += (unsigned)ret;
+
+	return ret ? 0 : -EAGAIN;
+}
+
+static inline bool try_to_claim_workgroup(
+	struct z_erofs_vle_workgroup *grp,
+	z_erofs_vle_owned_workgrp_t *owned_head,
+	bool *hosted)
+{
+	DBG_BUGON(*hosted == true);
+
+	/* let's claim these following types of workgroup */
+retry:
+	if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) {
+		/* type 1, nil workgroup */
+		if (Z_EROFS_VLE_WORKGRP_NIL != cmpxchg(&grp->next,
+			Z_EROFS_VLE_WORKGRP_NIL, *owned_head))
+			goto retry;
+
+		*owned_head = grp;
+		*hosted = true;
+	} else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
+		/*
+		 * type 2, link to the end of a existing open chain,
+		 * be careful that its submission itself is governed
+		 * by the original owned chain.
+		 */
+		if (Z_EROFS_VLE_WORKGRP_TAIL != cmpxchg(&grp->next,
+			Z_EROFS_VLE_WORKGRP_TAIL, *owned_head))
+			goto retry;
+
+		*owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
+	} else
+		return false;	/* :( better luck next time */
+
+	return true;	/* lucky, I am the followee :) */
+}
+
+static struct z_erofs_vle_work *
+z_erofs_vle_work_lookup(struct super_block *sb,
+			pgoff_t idx, unsigned pageofs,
+			struct z_erofs_vle_workgroup **grp_ret,
+			enum z_erofs_vle_work_role *role,
+			z_erofs_vle_owned_workgrp_t *owned_head,
+			bool *hosted)
+{
+	bool tag, primary;
+	struct erofs_workgroup *egrp;
+	struct z_erofs_vle_workgroup *grp;
+	struct z_erofs_vle_work *work;
+
+	egrp = erofs_find_workgroup(sb, idx, &tag);
+	if (egrp == NULL) {
+		*grp_ret = NULL;
+		return NULL;
+	}
+
+	*grp_ret = grp = container_of(egrp,
+		struct z_erofs_vle_workgroup, obj);
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+	work = z_erofs_vle_grab_work(grp, pageofs);
+	primary = true;
+#else
+	BUG();
+#endif
+
+	DBG_BUGON(work->pageofs != pageofs);
+
+	/*
+	 * lock must be taken first to avoid grp->next == NIL between
+	 * claiming workgroup and adding pages:
+	 *                        grp->next != NIL
+	 *   grp->next = NIL
+	 *   mutex_unlock_all
+	 *                        mutex_lock(&work->lock)
+	 *                        add all pages to pagevec
+	 *
+	 * [correct locking case 1]:
+	 *   mutex_lock(grp->work[a])
+	 *   ...
+	 *   mutex_lock(grp->work[b])     mutex_lock(grp->work[c])
+	 *   ...                          *role = SECONDARY
+	 *                                add all pages to pagevec
+	 *                                ...
+	 *                                mutex_unlock(grp->work[c])
+	 *   mutex_lock(grp->work[c])
+	 *   ...
+	 *   grp->next = NIL
+	 *   mutex_unlock_all
+	 *
+	 * [correct locking case 2]:
+	 *   mutex_lock(grp->work[b])
+	 *   ...
+	 *   mutex_lock(grp->work[a])
+	 *   ...
+	 *   mutex_lock(grp->work[c])
+	 *   ...
+	 *   grp->next = NIL
+	 *   mutex_unlock_all
+	 *                                mutex_lock(grp->work[a])
+	 *                                *role = PRIMARY_OWNER
+	 *                                add all pages to pagevec
+	 *                                ...
+	 */
+	mutex_lock(&work->lock);
+
+	*hosted = false;
+	if (!primary)
+		*role = Z_EROFS_VLE_WORK_SECONDARY;
+	/* claim the workgroup if possible */
+	else if (try_to_claim_workgroup(grp, owned_head, hosted))
+		*role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
+	else
+		*role = Z_EROFS_VLE_WORK_PRIMARY;
+
+	return work;
+}
+
+static struct z_erofs_vle_work *
+z_erofs_vle_work_register(struct super_block *sb,
+			  struct z_erofs_vle_workgroup **grp_ret,
+			  struct erofs_map_blocks *map,
+			  pgoff_t index, unsigned pageofs,
+			  enum z_erofs_vle_work_role *role,
+			  z_erofs_vle_owned_workgrp_t *owned_head,
+			  bool *hosted)
+{
+	bool newgrp = false;
+	struct z_erofs_vle_workgroup *grp = *grp_ret;
+	struct z_erofs_vle_work *work;
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+	BUG_ON(grp != NULL);
+#else
+	if (grp != NULL)
+		goto skip;
+#endif
+	/* no available workgroup, let's allocate one */
+	grp = kmem_cache_zalloc(z_erofs_workgroup_cachep, GFP_NOFS);
+	if (unlikely(grp == NULL))
+		return ERR_PTR(-ENOMEM);
+
+	grp->obj.index = index;
+	grp->llen = map->m_llen;
+
+	z_erofs_vle_set_workgrp_fmt(grp,
+		(map->m_flags & EROFS_MAP_ZIPPED) ?
+			Z_EROFS_VLE_WORKGRP_FMT_LZ4 :
+			Z_EROFS_VLE_WORKGRP_FMT_PLAIN);
+	atomic_set(&grp->obj.refcount, 1);
+
+	/* new workgrps have been claimed as type 1 */
+	WRITE_ONCE(grp->next, *owned_head);
+	/* primary and followed work for all new workgrps */
+	*role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
+	/* it should be submitted by ourselves */
+	*hosted = true;
+
+	newgrp = true;
+#ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
+skip:
+	/* currently unimplemented */
+	BUG();
+#else
+	work = z_erofs_vle_grab_primary_work(grp);
+#endif
+	work->pageofs = pageofs;
+
+	mutex_init(&work->lock);
+
+	if (newgrp) {
+		int err = erofs_register_workgroup(sb, &grp->obj, 0);
+
+		if (err) {
+			kmem_cache_free(z_erofs_workgroup_cachep, grp);
+			return ERR_PTR(-EAGAIN);
+		}
+	}
+
+	*owned_head = *grp_ret = grp;
+
+	mutex_lock(&work->lock);
+	return work;
+}
+
+static inline void __update_workgrp_llen(struct z_erofs_vle_workgroup *grp,
+					 unsigned int llen)
+{
+	while (1) {
+		unsigned int orig_llen = grp->llen;
+
+		if (orig_llen >= llen || orig_llen ==
+			cmpxchg(&grp->llen, orig_llen, llen))
+			break;
+	}
+}
+
+#define builder_is_followed(builder) \
+	((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
+
+static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder,
+				       struct super_block *sb,
+				       struct erofs_map_blocks *map,
+				       z_erofs_vle_owned_workgrp_t *owned_head)
+{
+	const unsigned clusterpages = erofs_clusterpages(EROFS_SB(sb));
+	const erofs_blk_t index = erofs_blknr(map->m_pa);
+	const unsigned pageofs = map->m_la & ~PAGE_MASK;
+	struct z_erofs_vle_workgroup *grp;
+	struct z_erofs_vle_work *work;
+
+	DBG_BUGON(builder->work != NULL);
+
+	/* must be Z_EROFS_WORK_TAIL or the next chained work */
+	DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL);
+	DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+
+	DBG_BUGON(erofs_blkoff(map->m_pa));
+
+repeat:
+	work = z_erofs_vle_work_lookup(sb, index,
+		pageofs, &grp, &builder->role, owned_head, &builder->hosted);
+	if (work != NULL) {
+		__update_workgrp_llen(grp, map->m_llen);
+		goto got_it;
+	}
+
+	work = z_erofs_vle_work_register(sb, &grp, map, index, pageofs,
+		&builder->role, owned_head, &builder->hosted);
+
+	if (unlikely(work == ERR_PTR(-EAGAIN)))
+		goto repeat;
+
+	if (unlikely(IS_ERR(work)))
+		return PTR_ERR(work);
+got_it:
+	z_erofs_pagevec_ctor_init(&builder->vector,
+		Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, work->vcnt);
+
+	if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) {
+		/* enable possibly in-place decompression */
+		builder->compressed_pages = grp->compressed_pages;
+		builder->compressed_deficit = clusterpages;
+	} else {
+		builder->compressed_pages = NULL;
+		builder->compressed_deficit = 0;
+	}
+
+	builder->grp = grp;
+	builder->work = work;
+	return 0;
+}
+
+/*
+ * keep in mind that no referenced workgroups will be freed
+ * only after a RCU grace period, so rcu_read_lock() could
+ * prevent a workgroup from being freed.
+ */
+static void z_erofs_rcu_callback(struct rcu_head *head)
+{
+	struct z_erofs_vle_work *work =	container_of(head,
+		struct z_erofs_vle_work, rcu);
+	struct z_erofs_vle_workgroup *grp =
+		z_erofs_vle_work_workgroup(work, true);
+
+	kmem_cache_free(z_erofs_workgroup_cachep, grp);
+}
+
+void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
+{
+	struct z_erofs_vle_workgroup *const vgrp = container_of(grp,
+		struct z_erofs_vle_workgroup, obj);
+	struct z_erofs_vle_work *const work = &vgrp->work;
+
+	call_rcu(&work->rcu, z_erofs_rcu_callback);
+}
+
+static void __z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp,
+	struct z_erofs_vle_work *work __maybe_unused)
+{
+	erofs_workgroup_put(&grp->obj);
+}
+
+void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
+{
+	struct z_erofs_vle_workgroup *grp =
+		z_erofs_vle_work_workgroup(work, true);
+
+	__z_erofs_vle_work_release(grp, work);
+}
+
+static inline bool
+z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder)
+{
+	struct z_erofs_vle_work *work = builder->work;
+
+	if (work == NULL)
+		return false;
+
+	z_erofs_pagevec_ctor_exit(&builder->vector, false);
+	mutex_unlock(&work->lock);
+
+	/*
+	 * if all pending pages are added, don't hold work reference
+	 * any longer if the current work isn't hosted by ourselves.
+	 */
+	if (!builder->hosted)
+		__z_erofs_vle_work_release(builder->grp, work);
+
+	builder->work = NULL;
+	builder->grp = NULL;
+	return true;
+}
+
+static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
+					       gfp_t gfp)
+{
+	struct page *page = erofs_allocpage(pagepool, gfp);
+
+	if (unlikely(page == NULL))
+		return NULL;
+
+	page->mapping = Z_EROFS_MAPPING_STAGING;
+	return page;
+}
+
+struct z_erofs_vle_frontend {
+	struct inode *const inode;
+
+	struct z_erofs_vle_work_builder builder;
+	struct erofs_map_blocks_iter m_iter;
+
+	z_erofs_vle_owned_workgrp_t owned_head;
+
+	bool initial;
+#if (EROFS_FS_ZIP_CACHE_LVL >= 2)
+	erofs_off_t cachedzone_la;
+#endif
+};
+
+#define VLE_FRONTEND_INIT(__i) { \
+	.inode = __i, \
+	.m_iter = { \
+		{ .m_llen = 0, .m_plen = 0 }, \
+		.mpage = NULL \
+	}, \
+	.builder = VLE_WORK_BUILDER_INIT(), \
+	.owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \
+	.initial = true, }
+
+static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
+				struct page *page,
+				struct list_head *page_pool)
+{
+	struct super_block *const sb = fe->inode->i_sb;
+	struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
+	struct erofs_map_blocks_iter *const m = &fe->m_iter;
+	struct erofs_map_blocks *const map = &m->map;
+	struct z_erofs_vle_work_builder *const builder = &fe->builder;
+	const loff_t offset = page_offset(page);
+
+	bool tight = builder_is_followed(builder);
+	struct z_erofs_vle_work *work = builder->work;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	struct address_space *const mngda = sbi->managed_cache->i_mapping;
+	struct z_erofs_vle_workgroup *grp;
+	bool noio_outoforder;
+#endif
+
+	enum z_erofs_page_type page_type;
+	unsigned cur, end, spiltted, index;
+	int err;
+
+	/* register locked file pages as online pages in pack */
+	z_erofs_onlinepage_init(page);
+
+	spiltted = 0;
+	end = PAGE_SIZE;
+repeat:
+	cur = end - 1;
+
+	/* lucky, within the range of the current map_blocks */
+	if (offset + cur >= map->m_la &&
+		offset + cur < map->m_la + map->m_llen)
+		goto hitted;
+
+	/* go ahead the next map_blocks */
+	debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
+
+	if (!z_erofs_vle_work_iter_end(builder))
+		fe->initial = false;
+
+	map->m_la = offset + cur;
+	map->m_llen = 0;
+	err = erofs_map_blocks_iter(fe->inode, map, &m->mpage, 0);
+	if (unlikely(err))
+		goto err_out;
+
+	/* deal with hole (FIXME! broken now) */
+	if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
+		goto hitted;
+
+	DBG_BUGON(map->m_plen != 1 << sbi->clusterbits);
+	BUG_ON(erofs_blkoff(map->m_pa));
+
+	err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head);
+	if (unlikely(err))
+		goto err_out;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	grp = fe->builder.grp;
+
+	/* let's do out-of-order decompression for noio */
+	noio_outoforder = grab_managed_cache_pages(mngda,
+		erofs_blknr(map->m_pa),
+		grp->compressed_pages, erofs_blknr(map->m_plen),
+		/* compressed page caching selection strategy */
+		fe->initial | (EROFS_FS_ZIP_CACHE_LVL >= 2 ?
+			map->m_la < fe->cachedzone_la : 0));
+
+	if (noio_outoforder && builder_is_followed(builder))
+		builder->role = Z_EROFS_VLE_WORK_PRIMARY;
+#endif
+
+	tight &= builder_is_followed(builder);
+	work = builder->work;
+hitted:
+	cur = end - min_t(unsigned, offset + end - map->m_la, end);
+	if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
+		zero_user_segment(page, cur, end);
+		goto next_part;
+	}
+
+	/* let's derive page type */
+	page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
+		(!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
+			(tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
+				Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
+
+retry:
+	err = z_erofs_vle_work_add_page(builder, page, page_type);
+	/* should allocate an additional staging page for pagevec */
+	if (err == -EAGAIN) {
+		struct page *const newpage =
+			__stagingpage_alloc(page_pool, GFP_NOFS);
+
+		err = z_erofs_vle_work_add_page(builder,
+			newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE);
+		if (!err)
+			goto retry;
+	}
+
+	if (unlikely(err))
+		goto err_out;
+
+	index = page->index - map->m_la / PAGE_SIZE;
+
+	/* FIXME! avoid the last relundant fixup & endio */
+	z_erofs_onlinepage_fixup(page, index, true);
+	++spiltted;
+
+	/* also update nr_pages and increase queued_pages */
+	work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1);
+next_part:
+	/* can be used for verification */
+	map->m_llen = offset + cur - map->m_la;
+
+	end = cur;
+	if (end > 0)
+		goto repeat;
+
+	/* FIXME! avoid the last relundant fixup & endio */
+	z_erofs_onlinepage_endio(page);
+
+	debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
+		__func__, page, spiltted, map->m_llen);
+	return 0;
+
+err_out:
+	/* TODO: the missing error handing cases */
+	return err;
+}
+
+static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
+{
+	tagptr1_t t = tagptr_init(tagptr1_t, ptr);
+	struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t);
+	bool background = tagptr_unfold_tags(t);
+
+	if (atomic_add_return(bios, &io->pending_bios))
+		return;
+
+	if (background)
+		queue_work(z_erofs_workqueue, &io->u.work);
+	else
+		wake_up(&io->u.wait);
+}
+
+static inline void z_erofs_vle_read_endio(struct bio *bio)
+{
+	const blk_status_t err = bio->bi_status;
+	unsigned i;
+	struct bio_vec *bvec;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	struct address_space *mngda = NULL;
+#endif
+
+	bio_for_each_segment_all(bvec, bio, i) {
+		struct page *page = bvec->bv_page;
+		bool cachemngd = false;
+
+		DBG_BUGON(PageUptodate(page));
+		BUG_ON(page->mapping == NULL);
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+		if (unlikely(mngda == NULL && !z_erofs_is_stagingpage(page))) {
+			struct inode *const inode = page->mapping->host;
+			struct super_block *const sb = inode->i_sb;
+
+			mngda = EROFS_SB(sb)->managed_cache->i_mapping;
+		}
+
+		/*
+		 * If mngda has not gotten, it equals NULL,
+		 * however, page->mapping never be NULL if working properly.
+		 */
+		cachemngd = (page->mapping == mngda);
+#endif
+
+		if (unlikely(err))
+			SetPageError(page);
+		else if (cachemngd)
+			SetPageUptodate(page);
+
+		if (cachemngd)
+			unlock_page(page);
+	}
+
+	z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
+	bio_put(bio);
+}
+
+static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES];
+static DEFINE_MUTEX(z_pagemap_global_lock);
+
+static int z_erofs_vle_unzip(struct super_block *sb,
+	struct z_erofs_vle_workgroup *grp,
+	struct list_head *page_pool)
+{
+	struct erofs_sb_info *const sbi = EROFS_SB(sb);
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	struct address_space *const mngda = sbi->managed_cache->i_mapping;
+#endif
+	const unsigned clusterpages = erofs_clusterpages(sbi);
+
+	struct z_erofs_pagevec_ctor ctor;
+	unsigned nr_pages;
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+	unsigned sparsemem_pages = 0;
+#endif
+	struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES];
+	struct page **pages, **compressed_pages, *page;
+	unsigned i, llen;
+
+	enum z_erofs_page_type page_type;
+	bool overlapped;
+	struct z_erofs_vle_work *work;
+	void *vout;
+	int err;
+
+	might_sleep();
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+	work = z_erofs_vle_grab_primary_work(grp);
+#else
+	BUG();
+#endif
+	BUG_ON(!READ_ONCE(work->nr_pages));
+
+	mutex_lock(&work->lock);
+	nr_pages = work->nr_pages;
+
+	if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES))
+		pages = pages_onstack;
+	else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES &&
+		mutex_trylock(&z_pagemap_global_lock))
+		pages = z_pagemap_global;
+	else {
+repeat:
+		pages = kvmalloc_array(nr_pages,
+			sizeof(struct page *), GFP_KERNEL);
+
+		/* fallback to global pagemap for the lowmem scenario */
+		if (unlikely(pages == NULL)) {
+			if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES)
+				goto repeat;
+			else {
+				mutex_lock(&z_pagemap_global_lock);
+				pages = z_pagemap_global;
+			}
+		}
+	}
+
+	for (i = 0; i < nr_pages; ++i)
+		pages[i] = NULL;
+
+	z_erofs_pagevec_ctor_init(&ctor,
+		Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, 0);
+
+	for (i = 0; i < work->vcnt; ++i) {
+		unsigned pagenr;
+
+		page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type);
+
+		/* all pages in pagevec ought to be valid */
+		DBG_BUGON(page == NULL);
+		DBG_BUGON(page->mapping == NULL);
+
+		if (z_erofs_gather_if_stagingpage(page_pool, page))
+			continue;
+
+		if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
+			pagenr = 0;
+		else
+			pagenr = z_erofs_onlinepage_index(page);
+
+		BUG_ON(pagenr >= nr_pages);
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+		BUG_ON(pages[pagenr] != NULL);
+		++sparsemem_pages;
+#endif
+		pages[pagenr] = page;
+	}
+
+	z_erofs_pagevec_ctor_exit(&ctor, true);
+
+	overlapped = false;
+	compressed_pages = grp->compressed_pages;
+
+	for (i = 0; i < clusterpages; ++i) {
+		unsigned pagenr;
+
+		page = compressed_pages[i];
+
+		/* all compressed pages ought to be valid */
+		DBG_BUGON(page == NULL);
+		DBG_BUGON(page->mapping == NULL);
+
+		if (z_erofs_is_stagingpage(page))
+			continue;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+		else if (page->mapping == mngda) {
+			BUG_ON(PageLocked(page));
+			BUG_ON(!PageUptodate(page));
+			continue;
+		}
+#endif
+
+		/* only non-head page could be reused as a compressed page */
+		pagenr = z_erofs_onlinepage_index(page);
+
+		BUG_ON(pagenr >= nr_pages);
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+		BUG_ON(pages[pagenr] != NULL);
+		++sparsemem_pages;
+#endif
+		pages[pagenr] = page;
+
+		overlapped = true;
+	}
+
+	llen = (nr_pages << PAGE_SHIFT) - work->pageofs;
+
+	if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) {
+		/* FIXME! this should be fixed in the future */
+		BUG_ON(grp->llen != llen);
+
+		err = z_erofs_vle_plain_copy(compressed_pages, clusterpages,
+			pages, nr_pages, work->pageofs);
+		goto out;
+	}
+
+	if (llen > grp->llen)
+		llen = grp->llen;
+
+	err = z_erofs_vle_unzip_fast_percpu(compressed_pages,
+		clusterpages, pages, llen, work->pageofs,
+		z_erofs_onlinepage_endio);
+	if (err != -ENOTSUPP)
+		goto out_percpu;
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+	if (sparsemem_pages >= nr_pages) {
+		BUG_ON(sparsemem_pages > nr_pages);
+		goto skip_allocpage;
+	}
+#endif
+
+	for (i = 0; i < nr_pages; ++i) {
+		if (pages[i] != NULL)
+			continue;
+
+		pages[i] = __stagingpage_alloc(page_pool, GFP_NOFS);
+	}
+
+#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
+skip_allocpage:
+#endif
+	vout = erofs_vmap(pages, nr_pages);
+
+	err = z_erofs_vle_unzip_vmap(compressed_pages,
+		clusterpages, vout, llen, work->pageofs, overlapped);
+
+	erofs_vunmap(vout, nr_pages);
+
+out:
+	for (i = 0; i < nr_pages; ++i) {
+		page = pages[i];
+		DBG_BUGON(page->mapping == NULL);
+
+		/* recycle all individual staging pages */
+		if (z_erofs_gather_if_stagingpage(page_pool, page))
+			continue;
+
+		if (unlikely(err < 0))
+			SetPageError(page);
+
+		z_erofs_onlinepage_endio(page);
+	}
+
+out_percpu:
+	for (i = 0; i < clusterpages; ++i) {
+		page = compressed_pages[i];
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+		if (page->mapping == mngda)
+			continue;
+#endif
+		/* recycle all individual staging pages */
+		(void)z_erofs_gather_if_stagingpage(page_pool, page);
+
+		WRITE_ONCE(compressed_pages[i], NULL);
+	}
+
+	if (pages == z_pagemap_global)
+		mutex_unlock(&z_pagemap_global_lock);
+	else if (unlikely(pages != pages_onstack))
+		kvfree(pages);
+
+	work->nr_pages = 0;
+	work->vcnt = 0;
+
+	/* all work locks MUST be taken before the following line */
+
+	WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL);
+
+	/* all work locks SHOULD be released right now */
+	mutex_unlock(&work->lock);
+
+	z_erofs_vle_work_release(work);
+	return err;
+}
+
+static void z_erofs_vle_unzip_all(struct super_block *sb,
+				  struct z_erofs_vle_unzip_io *io,
+				  struct list_head *page_pool)
+{
+	z_erofs_vle_owned_workgrp_t owned = io->head;
+
+	while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) {
+		struct z_erofs_vle_workgroup *grp;
+
+		/* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
+		DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL);
+
+		/* no possible that 'owned' equals NULL */
+		DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL);
+
+		grp = owned;
+		owned = READ_ONCE(grp->next);
+
+		z_erofs_vle_unzip(sb, grp, page_pool);
+	}
+}
+
+static void z_erofs_vle_unzip_wq(struct work_struct *work)
+{
+	struct z_erofs_vle_unzip_io_sb *iosb = container_of(work,
+		struct z_erofs_vle_unzip_io_sb, io.u.work);
+	LIST_HEAD(page_pool);
+
+	BUG_ON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+	z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool);
+
+	put_pages_list(&page_pool);
+	kvfree(iosb);
+}
+
+static inline struct z_erofs_vle_unzip_io *
+prepare_io_handler(struct super_block *sb,
+		   struct z_erofs_vle_unzip_io *io,
+		   bool background)
+{
+	struct z_erofs_vle_unzip_io_sb *iosb;
+
+	if (!background) {
+		/* waitqueue available for foreground io */
+		BUG_ON(io == NULL);
+
+		init_waitqueue_head(&io->u.wait);
+		atomic_set(&io->pending_bios, 0);
+		goto out;
+	}
+
+	if (io != NULL)
+		BUG();
+	else {
+		/* allocate extra io descriptor for background io */
+		iosb = kvzalloc(sizeof(struct z_erofs_vle_unzip_io_sb),
+			GFP_KERNEL | __GFP_NOFAIL);
+		BUG_ON(iosb == NULL);
+
+		io = &iosb->io;
+	}
+
+	iosb->sb = sb;
+	INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
+out:
+	io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
+	return io;
+}
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+/* true - unlocked (noio), false - locked (need submit io) */
+static inline bool recover_managed_page(struct z_erofs_vle_workgroup *grp,
+					struct page *page)
+{
+	wait_on_page_locked(page);
+	if (PagePrivate(page) && PageUptodate(page))
+		return true;
+
+	lock_page(page);
+	if (unlikely(!PagePrivate(page))) {
+		set_page_private(page, (unsigned long)grp);
+		SetPagePrivate(page);
+	}
+	if (unlikely(PageUptodate(page))) {
+		unlock_page(page);
+		return true;
+	}
+	return false;
+}
+
+#define __FSIO_1 1
+#else
+#define __FSIO_1 0
+#endif
+
+static bool z_erofs_vle_submit_all(struct super_block *sb,
+				   z_erofs_vle_owned_workgrp_t owned_head,
+				   struct list_head *pagepool,
+				   struct z_erofs_vle_unzip_io *fg_io,
+				   bool force_fg)
+{
+	struct erofs_sb_info *const sbi = EROFS_SB(sb);
+	const unsigned clusterpages = erofs_clusterpages(sbi);
+	const gfp_t gfp = GFP_NOFS;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	struct address_space *const mngda = sbi->managed_cache->i_mapping;
+	struct z_erofs_vle_workgroup *lstgrp_noio = NULL, *lstgrp_io = NULL;
+#endif
+	struct z_erofs_vle_unzip_io *ios[1 + __FSIO_1];
+	struct bio *bio;
+	tagptr1_t bi_private;
+	/* since bio will be NULL, no need to initialize last_index */
+	pgoff_t uninitialized_var(last_index);
+	bool force_submit = false;
+	unsigned nr_bios;
+
+	if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL))
+		return false;
+
+	/*
+	 * force_fg == 1, (io, fg_io[0]) no io, (io, fg_io[1]) need submit io
+	 * force_fg == 0, (io, fg_io[0]) no io; (io[1], bg_io) need submit io
+	 */
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	ios[0] = prepare_io_handler(sb, fg_io + 0, false);
+#endif
+
+	if (force_fg) {
+		ios[__FSIO_1] = prepare_io_handler(sb, fg_io + __FSIO_1, false);
+		bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 0);
+	} else {
+		ios[__FSIO_1] = prepare_io_handler(sb, NULL, true);
+		bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 1);
+	}
+
+	nr_bios = 0;
+	force_submit = false;
+	bio = NULL;
+
+	/* by default, all need io submission */
+	ios[__FSIO_1]->head = owned_head;
+
+	do {
+		struct z_erofs_vle_workgroup *grp;
+		struct page **compressed_pages, *oldpage, *page;
+		pgoff_t first_index;
+		unsigned i = 0;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+		unsigned int noio = 0;
+		bool cachemngd;
+#endif
+		int err;
+
+		/* no possible 'owned_head' equals the following */
+		DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+		DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL);
+
+		grp = owned_head;
+
+		/* close the main owned chain at first */
+		owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
+			Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+
+		first_index = grp->obj.index;
+		compressed_pages = grp->compressed_pages;
+
+		force_submit |= (first_index != last_index + 1);
+repeat:
+		/* fulfill all compressed pages */
+		oldpage = page = READ_ONCE(compressed_pages[i]);
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+		cachemngd = false;
+
+		if (page == EROFS_UNALLOCATED_CACHED_PAGE) {
+			cachemngd = true;
+			goto do_allocpage;
+		} else if (page != NULL) {
+			if (page->mapping != mngda)
+				BUG_ON(PageUptodate(page));
+			else if (recover_managed_page(grp, page)) {
+				/* page is uptodate, skip io submission */
+				force_submit = true;
+				++noio;
+				goto skippage;
+			}
+		} else {
+do_allocpage:
+#else
+		if (page != NULL)
+			BUG_ON(PageUptodate(page));
+		else {
+#endif
+			page = __stagingpage_alloc(pagepool, gfp);
+
+			if (oldpage != cmpxchg(compressed_pages + i,
+				oldpage, page)) {
+				list_add(&page->lru, pagepool);
+				goto repeat;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+			} else if (cachemngd && !add_to_page_cache_lru(page,
+				mngda, first_index + i, gfp)) {
+				set_page_private(page, (unsigned long)grp);
+				SetPagePrivate(page);
+#endif
+			}
+		}
+
+		if (bio != NULL && force_submit) {
+submit_bio_retry:
+			__submit_bio(bio, REQ_OP_READ, 0);
+			bio = NULL;
+		}
+
+		if (bio == NULL) {
+			bio = prepare_bio(sb, first_index + i,
+				BIO_MAX_PAGES, z_erofs_vle_read_endio);
+			bio->bi_private = tagptr_cast_ptr(bi_private);
+
+			++nr_bios;
+		}
+
+		err = bio_add_page(bio, page, PAGE_SIZE, 0);
+		if (err < PAGE_SIZE)
+			goto submit_bio_retry;
+
+		force_submit = false;
+		last_index = first_index + i;
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+skippage:
+#endif
+		if (++i < clusterpages)
+			goto repeat;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+		if (noio < clusterpages) {
+			lstgrp_io = grp;
+		} else {
+			z_erofs_vle_owned_workgrp_t iogrp_next =
+				owned_head == Z_EROFS_VLE_WORKGRP_TAIL ?
+				Z_EROFS_VLE_WORKGRP_TAIL_CLOSED :
+				owned_head;
+
+			if (lstgrp_io == NULL)
+				ios[1]->head = iogrp_next;
+			else
+				WRITE_ONCE(lstgrp_io->next, iogrp_next);
+
+			if (lstgrp_noio == NULL)
+				ios[0]->head = grp;
+			else
+				WRITE_ONCE(lstgrp_noio->next, grp);
+
+			lstgrp_noio = grp;
+		}
+#endif
+	} while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL);
+
+	if (bio != NULL)
+		__submit_bio(bio, REQ_OP_READ, 0);
+
+#ifndef EROFS_FS_HAS_MANAGED_CACHE
+	BUG_ON(!nr_bios);
+#else
+	if (lstgrp_noio != NULL)
+		WRITE_ONCE(lstgrp_noio->next, Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+
+	if (!force_fg && !nr_bios) {
+		kvfree(container_of(ios[1],
+			struct z_erofs_vle_unzip_io_sb, io));
+		return true;
+	}
+#endif
+
+	z_erofs_vle_unzip_kickoff(tagptr_cast_ptr(bi_private), nr_bios);
+	return true;
+}
+
+static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f,
+				     struct list_head *pagepool,
+				     bool force_fg)
+{
+	struct super_block *sb = f->inode->i_sb;
+	struct z_erofs_vle_unzip_io io[1 + __FSIO_1];
+
+	if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg))
+		return;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+	z_erofs_vle_unzip_all(sb, &io[0], pagepool);
+#endif
+	if (!force_fg)
+		return;
+
+	/* wait until all bios are completed */
+	wait_event(io[__FSIO_1].u.wait,
+		!atomic_read(&io[__FSIO_1].pending_bios));
+
+	/* let's synchronous decompression */
+	z_erofs_vle_unzip_all(sb, &io[__FSIO_1], pagepool);
+}
+
+static int z_erofs_vle_normalaccess_readpage(struct file *file,
+					     struct page *page)
+{
+	struct inode *const inode = page->mapping->host;
+	struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
+	int err;
+	LIST_HEAD(pagepool);
+
+#if (EROFS_FS_ZIP_CACHE_LVL >= 2)
+	f.cachedzone_la = page->index << PAGE_SHIFT;
+#endif
+	err = z_erofs_do_read_page(&f, page, &pagepool);
+	(void)z_erofs_vle_work_iter_end(&f.builder);
+
+	if (err) {
+		errln("%s, failed to read, err [%d]", __func__, err);
+		goto out;
+	}
+
+	z_erofs_submit_and_unzip(&f, &pagepool, true);
+out:
+	if (f.m_iter.mpage != NULL)
+		put_page(f.m_iter.mpage);
+
+	/* clean up the remaining free pages */
+	put_pages_list(&pagepool);
+	return 0;
+}
+
+static inline int __z_erofs_vle_normalaccess_readpages(
+	struct file *filp,
+	struct address_space *mapping,
+	struct list_head *pages, unsigned nr_pages, bool sync)
+{
+	struct inode *const inode = mapping->host;
+
+	struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
+	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
+	struct page *head = NULL;
+	LIST_HEAD(pagepool);
+
+#if (EROFS_FS_ZIP_CACHE_LVL >= 2)
+	f.cachedzone_la = lru_to_page(pages)->index << PAGE_SHIFT;
+#endif
+	for (; nr_pages; --nr_pages) {
+		struct page *page = lru_to_page(pages);
+
+		prefetchw(&page->flags);
+		list_del(&page->lru);
+
+		if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+			list_add(&page->lru, &pagepool);
+			continue;
+		}
+
+		BUG_ON(PagePrivate(page));
+		set_page_private(page, (unsigned long)head);
+		head = page;
+	}
+
+	while (head != NULL) {
+		struct page *page = head;
+		int err;
+
+		/* traversal in reverse order */
+		head = (void *)page_private(page);
+
+		err = z_erofs_do_read_page(&f, page, &pagepool);
+		if (err) {
+			struct erofs_vnode *vi = EROFS_V(inode);
+
+			errln("%s, readahead error at page %lu of nid %llu",
+				__func__, page->index, vi->nid);
+		}
+
+		put_page(page);
+	}
+
+	(void)z_erofs_vle_work_iter_end(&f.builder);
+
+	z_erofs_submit_and_unzip(&f, &pagepool, sync);
+
+	if (f.m_iter.mpage != NULL)
+		put_page(f.m_iter.mpage);
+
+	/* clean up the remaining free pages */
+	put_pages_list(&pagepool);
+	return 0;
+}
+
+static int z_erofs_vle_normalaccess_readpages(
+	struct file *filp,
+	struct address_space *mapping,
+	struct list_head *pages, unsigned nr_pages)
+{
+	return __z_erofs_vle_normalaccess_readpages(filp,
+		mapping, pages, nr_pages,
+		nr_pages < 4 /* sync */);
+}
+
+const struct address_space_operations z_erofs_vle_normalaccess_aops = {
+	.readpage = z_erofs_vle_normalaccess_readpage,
+	.readpages = z_erofs_vle_normalaccess_readpages,
+};
+
+#define __vle_cluster_advise(x, bit, bits) \
+	((le16_to_cpu(x) >> (bit)) & ((1 << (bits)) - 1))
+
+#define __vle_cluster_type(advise) __vle_cluster_advise(advise, \
+	Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT, Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS)
+
+enum {
+	Z_EROFS_VLE_CLUSTER_TYPE_PLAIN,
+	Z_EROFS_VLE_CLUSTER_TYPE_HEAD,
+	Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD,
+	Z_EROFS_VLE_CLUSTER_TYPE_RESERVED,
+	Z_EROFS_VLE_CLUSTER_TYPE_MAX
+};
+
+#define vle_cluster_type(di)	\
+	__vle_cluster_type((di)->di_advise)
+
+static inline unsigned
+vle_compressed_index_clusterofs(unsigned clustersize,
+	struct z_erofs_vle_decompressed_index *di)
+{
+	debugln("%s, vle=%pK, advise=%x (type %u), clusterofs=%x blkaddr=%x",
+		__func__, di, di->di_advise, vle_cluster_type(di),
+		di->di_clusterofs, di->di_u.blkaddr);
+
+	switch (vle_cluster_type(di)) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+		break;
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		return di->di_clusterofs;
+	default:
+		BUG_ON(1);
+	}
+	return clustersize;
+}
+
+static inline erofs_blk_t
+vle_extent_blkaddr(struct inode *inode, pgoff_t index)
+{
+	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+	struct erofs_vnode *vi = EROFS_V(inode);
+
+	unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
+		vi->xattr_isize) + sizeof(struct erofs_extent_header) +
+		index * sizeof(struct z_erofs_vle_decompressed_index);
+
+	return erofs_blknr(iloc(sbi, vi->nid) + ofs);
+}
+
+static inline unsigned int
+vle_extent_blkoff(struct inode *inode, pgoff_t index)
+{
+	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+	struct erofs_vnode *vi = EROFS_V(inode);
+
+	unsigned ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
+		vi->xattr_isize) + sizeof(struct erofs_extent_header) +
+		index * sizeof(struct z_erofs_vle_decompressed_index);
+
+	return erofs_blkoff(iloc(sbi, vi->nid) + ofs);
+}
+
+/*
+ * Variable-sized Logical Extent (Fixed Physical Cluster) Compression Mode
+ * ---
+ * VLE compression mode attempts to compress a number of logical data into
+ * a physical cluster with a fixed size.
+ * VLE compression mode uses "struct z_erofs_vle_decompressed_index".
+ */
+static erofs_off_t vle_get_logical_extent_head(
+	struct inode *inode,
+	struct page **page_iter,
+	void **kaddr_iter,
+	unsigned lcn,	/* logical cluster number */
+	erofs_blk_t *pcn,
+	unsigned *flags)
+{
+	/* for extent meta */
+	struct page *page = *page_iter;
+	erofs_blk_t blkaddr = vle_extent_blkaddr(inode, lcn);
+	struct z_erofs_vle_decompressed_index *di;
+	unsigned long long ofs;
+	const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits;
+	const unsigned int clustersize = 1 << clusterbits;
+
+	if (page->index != blkaddr) {
+		kunmap_atomic(*kaddr_iter);
+		unlock_page(page);
+		put_page(page);
+
+		*page_iter = page = erofs_get_meta_page(inode->i_sb,
+			blkaddr, false);
+		*kaddr_iter = kmap_atomic(page);
+	}
+
+	di = *kaddr_iter + vle_extent_blkoff(inode, lcn);
+	switch (vle_cluster_type(di)) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+		BUG_ON(!di->di_u.delta[0]);
+		BUG_ON(lcn < di->di_u.delta[0]);
+
+		ofs = vle_get_logical_extent_head(inode,
+			page_iter, kaddr_iter,
+			lcn - di->di_u.delta[0], pcn, flags);
+		break;
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+		*flags ^= EROFS_MAP_ZIPPED;
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		/* clustersize should be a power of two */
+		ofs = ((unsigned long long)lcn << clusterbits) +
+			(le16_to_cpu(di->di_clusterofs) & (clustersize - 1));
+		*pcn = le32_to_cpu(di->di_u.blkaddr);
+		break;
+	default:
+		BUG_ON(1);
+	}
+	return ofs;
+}
+
+int z_erofs_map_blocks_iter(struct inode *inode,
+	struct erofs_map_blocks *map,
+	struct page **mpage_ret, int flags)
+{
+	/* logicial extent (start, end) offset */
+	unsigned long long ofs, end;
+	struct z_erofs_vle_decompressed_index *di;
+	erofs_blk_t e_blkaddr, pcn;
+	unsigned lcn, logical_cluster_ofs, cluster_type;
+	u32 ofs_rem;
+	struct page *mpage = *mpage_ret;
+	void *kaddr;
+	bool initial;
+	const unsigned int clusterbits = EROFS_SB(inode->i_sb)->clusterbits;
+	const unsigned int clustersize = 1 << clusterbits;
+	int err = 0;
+
+	/* if both m_(l,p)len are 0, regularize l_lblk, l_lofs, etc... */
+	initial = !map->m_llen;
+
+	/* when trying to read beyond EOF, leave it unmapped */
+	if (unlikely(map->m_la >= inode->i_size)) {
+		BUG_ON(!initial);
+		map->m_llen = map->m_la + 1 - inode->i_size;
+		map->m_la = inode->i_size - 1;
+		map->m_flags = 0;
+		goto out;
+	}
+
+	debugln("%s, m_la %llu m_llen %llu --- start", __func__,
+		map->m_la, map->m_llen);
+
+	ofs = map->m_la + map->m_llen;
+
+	/* clustersize should be power of two */
+	lcn = ofs >> clusterbits;
+	ofs_rem = ofs & (clustersize - 1);
+
+	e_blkaddr = vle_extent_blkaddr(inode, lcn);
+
+	if (mpage == NULL || mpage->index != e_blkaddr) {
+		if (mpage != NULL)
+			put_page(mpage);
+
+		mpage = erofs_get_meta_page(inode->i_sb, e_blkaddr, false);
+		*mpage_ret = mpage;
+	} else {
+		lock_page(mpage);
+		DBG_BUGON(!PageUptodate(mpage));
+	}
+
+	kaddr = kmap_atomic(mpage);
+	di = kaddr + vle_extent_blkoff(inode, lcn);
+
+	debugln("%s, lcn %u e_blkaddr %u e_blkoff %u", __func__, lcn,
+		e_blkaddr, vle_extent_blkoff(inode, lcn));
+
+	logical_cluster_ofs = vle_compressed_index_clusterofs(clustersize, di);
+	if (!initial) {
+		/* [walking mode] 'map' has been already initialized */
+		map->m_llen += logical_cluster_ofs;
+		goto unmap_out;
+	}
+
+	/* by default, compressed */
+	map->m_flags |= EROFS_MAP_ZIPPED;
+
+	end = (u64)(lcn + 1) * clustersize;
+
+	cluster_type = vle_cluster_type(di);
+
+	switch (cluster_type) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+		if (ofs_rem >= logical_cluster_ofs)
+			map->m_flags ^= EROFS_MAP_ZIPPED;
+		/* fallthrough */
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		if (ofs_rem == logical_cluster_ofs) {
+			pcn = le32_to_cpu(di->di_u.blkaddr);
+			goto exact_hitted;
+		}
+
+		if (ofs_rem > logical_cluster_ofs) {
+			ofs = lcn * clustersize | logical_cluster_ofs;
+			pcn = le32_to_cpu(di->di_u.blkaddr);
+			break;
+		}
+
+		/* logical cluster number should be >= 1 */
+		if (unlikely(!lcn)) {
+			errln("invalid logical cluster 0 at nid %llu",
+				EROFS_V(inode)->nid);
+			err = -EIO;
+			goto unmap_out;
+		}
+		end = (lcn-- * clustersize) | logical_cluster_ofs;
+		/* fallthrough */
+	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+		/* get the correspoinding first chunk */
+		ofs = vle_get_logical_extent_head(inode, mpage_ret,
+			&kaddr, lcn, &pcn, &map->m_flags);
+		mpage = *mpage_ret;
+		break;
+	default:
+		errln("unknown cluster type %u at offset %llu of nid %llu",
+			cluster_type, ofs, EROFS_V(inode)->nid);
+		err = -EIO;
+		goto unmap_out;
+	}
+
+	map->m_la = ofs;
+exact_hitted:
+	map->m_llen = end - ofs;
+	map->m_plen = clustersize;
+	map->m_pa = blknr_to_addr(pcn);
+	map->m_flags |= EROFS_MAP_MAPPED;
+unmap_out:
+	kunmap_atomic(kaddr);
+	unlock_page(mpage);
+out:
+	debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
+		__func__, map->m_la, map->m_pa,
+		map->m_llen, map->m_plen, map->m_flags);
+
+	/* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
+	DBG_BUGON(err < 0);
+	return err;
+}
+
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
new file mode 100644
index 000000000000..393998500865
--- /dev/null
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * linux/drivers/staging/erofs/unzip_vle.h
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#ifndef __EROFS_FS_UNZIP_VLE_H
+#define __EROFS_FS_UNZIP_VLE_H
+
+#include "internal.h"
+#include "unzip_pagevec.h"
+
+/*
+ *  - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
+ * used for temporary allocated pages (via erofs_allocpage),
+ * in order to seperate those from NULL mapping (eg. truncated pages)
+ */
+#define Z_EROFS_MAPPING_STAGING		((void *)0x5A110C8D)
+
+#define z_erofs_is_stagingpage(page)	\
+	((page)->mapping == Z_EROFS_MAPPING_STAGING)
+
+static inline bool z_erofs_gather_if_stagingpage(struct list_head *page_pool,
+						 struct page *page)
+{
+	if (z_erofs_is_stagingpage(page)) {
+		list_add(&page->lru, page_pool);
+		return true;
+	}
+	return false;
+}
+
+/*
+ * Structure fields follow one of the following exclusion rules.
+ *
+ * I: Modifiable by initialization/destruction paths and read-only
+ *    for everyone else.
+ *
+ */
+
+#define Z_EROFS_VLE_INLINE_PAGEVECS     3
+
+struct z_erofs_vle_work {
+	/* struct z_erofs_vle_work *left, *right; */
+
+#ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
+	struct list_head list;
+
+	atomic_t refcount;
+#endif
+	struct mutex lock;
+
+	/* I: decompression offset in page */
+	unsigned short pageofs;
+	unsigned short nr_pages;
+
+	/* L: queued pages in pagevec[] */
+	unsigned vcnt;
+
+	union {
+		/* L: pagevec */
+		erofs_vtptr_t pagevec[Z_EROFS_VLE_INLINE_PAGEVECS];
+		struct rcu_head rcu;
+	};
+};
+
+#define Z_EROFS_VLE_WORKGRP_FMT_PLAIN        0
+#define Z_EROFS_VLE_WORKGRP_FMT_LZ4          1
+#define Z_EROFS_VLE_WORKGRP_FMT_MASK         1
+
+typedef struct z_erofs_vle_workgroup *z_erofs_vle_owned_workgrp_t;
+
+struct z_erofs_vle_workgroup {
+	struct erofs_workgroup obj;
+	struct z_erofs_vle_work work;
+
+	/* next owned workgroup */
+	z_erofs_vle_owned_workgrp_t next;
+
+	/* compressed pages (including multi-usage pages) */
+	struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];
+	unsigned int llen, flags;
+};
+
+/* let's avoid the valid 32-bit kernel addresses */
+
+/* the chained workgroup has't submitted io (still open) */
+#define Z_EROFS_VLE_WORKGRP_TAIL        ((void *)0x5F0ECAFE)
+/* the chained workgroup has already submitted io */
+#define Z_EROFS_VLE_WORKGRP_TAIL_CLOSED ((void *)0x5F0EDEAD)
+
+#define Z_EROFS_VLE_WORKGRP_NIL         (NULL)
+
+#define z_erofs_vle_workgrp_fmt(grp)	\
+	((grp)->flags & Z_EROFS_VLE_WORKGRP_FMT_MASK)
+
+static inline void z_erofs_vle_set_workgrp_fmt(
+	struct z_erofs_vle_workgroup *grp,
+	unsigned int fmt)
+{
+	grp->flags = fmt | (grp->flags & ~Z_EROFS_VLE_WORKGRP_FMT_MASK);
+}
+
+#ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
+#error multiref decompression is unimplemented yet
+#else
+
+#define z_erofs_vle_grab_primary_work(grp)	(&(grp)->work)
+#define z_erofs_vle_grab_work(grp, pageofs)	(&(grp)->work)
+#define z_erofs_vle_work_workgroup(wrk, primary)	\
+	((primary) ? container_of(wrk,	\
+		struct z_erofs_vle_workgroup, work) : \
+		({ BUG(); (void *)NULL; }))
+
+#endif
+
+#define Z_EROFS_WORKGROUP_SIZE       sizeof(struct z_erofs_vle_workgroup)
+
+struct z_erofs_vle_unzip_io {
+	atomic_t pending_bios;
+	z_erofs_vle_owned_workgrp_t head;
+
+	union {
+		wait_queue_head_t wait;
+		struct work_struct work;
+	} u;
+};
+
+struct z_erofs_vle_unzip_io_sb {
+	struct z_erofs_vle_unzip_io io;
+	struct super_block *sb;
+};
+
+#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
+#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
+#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT  (Z_EROFS_ONLINEPAGE_COUNT_BITS)
+
+/*
+ * waiters (aka. ongoing_packs): # to unlock the page
+ * sub-index: 0 - for partial page, >= 1 full page sub-index
+ */
+typedef atomic_t z_erofs_onlinepage_t;
+
+/* type punning */
+union z_erofs_onlinepage_converter {
+	z_erofs_onlinepage_t *o;
+	unsigned long *v;
+};
+
+static inline unsigned z_erofs_onlinepage_index(struct page *page)
+{
+	union z_erofs_onlinepage_converter u;
+
+	BUG_ON(!PagePrivate(page));
+	u.v = &page_private(page);
+
+	return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+}
+
+static inline void z_erofs_onlinepage_init(struct page *page)
+{
+	union {
+		z_erofs_onlinepage_t o;
+		unsigned long v;
+	/* keep from being unlocked in advance */
+	} u = { .o = ATOMIC_INIT(1) };
+
+	set_page_private(page, u.v);
+	smp_wmb();
+	SetPagePrivate(page);
+}
+
+static inline void z_erofs_onlinepage_fixup(struct page *page,
+	uintptr_t index, bool down)
+{
+	unsigned long *p, o, v, id;
+repeat:
+	p = &page_private(page);
+	o = READ_ONCE(*p);
+
+	id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+	if (id) {
+		if (!index)
+			return;
+
+		BUG_ON(id != index);
+	}
+
+	v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
+		((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned)down);
+	if (cmpxchg(p, o, v) != o)
+		goto repeat;
+}
+
+static inline void z_erofs_onlinepage_endio(struct page *page)
+{
+	union z_erofs_onlinepage_converter u;
+	unsigned v;
+
+	BUG_ON(!PagePrivate(page));
+	u.v = &page_private(page);
+
+	v = atomic_dec_return(u.o);
+	if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
+		ClearPagePrivate(page);
+		if (!PageError(page))
+			SetPageUptodate(page);
+		unlock_page(page);
+	}
+
+	debugln("%s, page %p value %x", __func__, page, atomic_read(u.o));
+}
+
+#define Z_EROFS_VLE_VMAP_ONSTACK_PAGES	\
+	min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
+#define Z_EROFS_VLE_VMAP_GLOBAL_PAGES	2048
+
+/* unzip_vle_lz4.c */
+extern int z_erofs_vle_plain_copy(struct page **compressed_pages,
+	unsigned clusterpages, struct page **pages,
+	unsigned nr_pages, unsigned short pageofs);
+
+extern int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
+	unsigned clusterpages, struct page **pages,
+	unsigned outlen, unsigned short pageofs,
+	void (*endio)(struct page *));
+
+extern int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
+	unsigned clusterpages, void *vaddr, unsigned llen,
+	unsigned short pageofs, bool overlapped);
+
+#endif
+
diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c
new file mode 100644
index 000000000000..f5b665f15be5
--- /dev/null
+++ b/drivers/staging/erofs/unzip_vle_lz4.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/unzip_vle_lz4.c
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include "unzip_vle.h"
+
+#if Z_EROFS_CLUSTER_MAX_PAGES > Z_EROFS_VLE_INLINE_PAGEVECS
+#define EROFS_PERCPU_NR_PAGES   Z_EROFS_CLUSTER_MAX_PAGES
+#else
+#define EROFS_PERCPU_NR_PAGES   Z_EROFS_VLE_INLINE_PAGEVECS
+#endif
+
+static struct {
+	char data[PAGE_SIZE * EROFS_PERCPU_NR_PAGES];
+} erofs_pcpubuf[NR_CPUS];
+
+int z_erofs_vle_plain_copy(struct page **compressed_pages,
+			   unsigned clusterpages,
+			   struct page **pages,
+			   unsigned nr_pages,
+			   unsigned short pageofs)
+{
+	unsigned i, j;
+	void *src = NULL;
+	const unsigned righthalf = PAGE_SIZE - pageofs;
+	char *percpu_data;
+	bool mirrored[Z_EROFS_CLUSTER_MAX_PAGES] = { 0 };
+
+	preempt_disable();
+	percpu_data = erofs_pcpubuf[smp_processor_id()].data;
+
+	j = 0;
+	for (i = 0; i < nr_pages; j = i++) {
+		struct page *page = pages[i];
+		void *dst;
+
+		if (page == NULL) {
+			if (src != NULL) {
+				if (!mirrored[j])
+					kunmap_atomic(src);
+				src = NULL;
+			}
+			continue;
+		}
+
+		dst = kmap_atomic(page);
+
+		for (; j < clusterpages; ++j) {
+			if (compressed_pages[j] != page)
+				continue;
+
+			BUG_ON(mirrored[j]);
+			memcpy(percpu_data + j * PAGE_SIZE, dst, PAGE_SIZE);
+			mirrored[j] = true;
+			break;
+		}
+
+		if (i) {
+			if (src == NULL)
+				src = mirrored[i-1] ?
+					percpu_data + (i-1) * PAGE_SIZE :
+					kmap_atomic(compressed_pages[i-1]);
+
+			memcpy(dst, src + righthalf, pageofs);
+
+			if (!mirrored[i-1])
+				kunmap_atomic(src);
+
+			if (unlikely(i >= clusterpages)) {
+				kunmap_atomic(dst);
+				break;
+			}
+		}
+
+		if (!righthalf)
+			src = NULL;
+		else {
+			src = mirrored[i] ? percpu_data + i * PAGE_SIZE :
+				kmap_atomic(compressed_pages[i]);
+
+			memcpy(dst + pageofs, src, righthalf);
+		}
+
+		kunmap_atomic(dst);
+	}
+
+	if (src != NULL && !mirrored[j])
+		kunmap_atomic(src);
+
+	preempt_enable();
+	return 0;
+}
+
+extern int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen);
+
+int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
+				  unsigned clusterpages,
+				  struct page **pages,
+				  unsigned outlen,
+				  unsigned short pageofs,
+				  void (*endio)(struct page *))
+{
+	void *vin, *vout;
+	unsigned nr_pages, i, j;
+	int ret;
+
+	if (outlen + pageofs > EROFS_PERCPU_NR_PAGES * PAGE_SIZE)
+		return -ENOTSUPP;
+
+	nr_pages = DIV_ROUND_UP(outlen + pageofs, PAGE_SIZE);
+
+	if (clusterpages == 1)
+		vin = kmap_atomic(compressed_pages[0]);
+	else
+		vin = erofs_vmap(compressed_pages, clusterpages);
+
+	preempt_disable();
+	vout = erofs_pcpubuf[smp_processor_id()].data;
+
+	ret = z_erofs_unzip_lz4(vin, vout + pageofs,
+		clusterpages * PAGE_SIZE, outlen);
+
+	if (ret >= 0) {
+		outlen = ret;
+		ret = 0;
+	}
+
+	for (i = 0; i < nr_pages; ++i) {
+		j = min((unsigned)PAGE_SIZE - pageofs, outlen);
+
+		if (pages[i] != NULL) {
+			if (ret < 0)
+				SetPageError(pages[i]);
+			else if (clusterpages == 1 && pages[i] == compressed_pages[0])
+				memcpy(vin + pageofs, vout + pageofs, j);
+			else {
+				void *dst = kmap_atomic(pages[i]);
+
+				memcpy(dst + pageofs, vout + pageofs, j);
+				kunmap_atomic(dst);
+			}
+			endio(pages[i]);
+		}
+		vout += PAGE_SIZE;
+		outlen -= j;
+		pageofs = 0;
+	}
+	preempt_enable();
+
+	if (clusterpages == 1)
+		kunmap_atomic(vin);
+	else
+		erofs_vunmap(vin, clusterpages);
+
+	return ret;
+}
+
+int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
+			   unsigned clusterpages,
+			   void *vout,
+			   unsigned llen,
+			   unsigned short pageofs,
+			   bool overlapped)
+{
+	void *vin;
+	unsigned i;
+	int ret;
+
+	if (overlapped) {
+		preempt_disable();
+		vin = erofs_pcpubuf[smp_processor_id()].data;
+
+		for (i = 0; i < clusterpages; ++i) {
+			void *t = kmap_atomic(compressed_pages[i]);
+
+			memcpy(vin + PAGE_SIZE *i, t, PAGE_SIZE);
+			kunmap_atomic(t);
+		}
+	} else if (clusterpages == 1)
+		vin = kmap_atomic(compressed_pages[0]);
+	else {
+		vin = erofs_vmap(compressed_pages, clusterpages);
+	}
+
+	ret = z_erofs_unzip_lz4(vin, vout + pageofs,
+		clusterpages * PAGE_SIZE, llen);
+	if (ret > 0)
+		ret = 0;
+
+	if (!overlapped) {
+		if (clusterpages == 1)
+			kunmap_atomic(vin);
+		else {
+			erofs_vunmap(vin, clusterpages);
+		}
+	} else
+		preempt_enable();
+
+	return ret;
+}
+
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
new file mode 100644
index 000000000000..595cf90af9bb
--- /dev/null
+++ b/drivers/staging/erofs/utils.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/utils.c
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include "internal.h"
+#include <linux/pagevec.h>
+
+struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
+{
+	struct page *page;
+
+	if (!list_empty(pool)) {
+		page = lru_to_page(pool);
+		list_del(&page->lru);
+	} else {
+		page = alloc_pages(gfp | __GFP_NOFAIL, 0);
+
+		BUG_ON(page == NULL);
+		BUG_ON(page->mapping != NULL);
+	}
+	return page;
+}
+
+/* global shrink count (for all mounted EROFS instances) */
+static atomic_long_t erofs_global_shrink_cnt;
+
+#ifdef CONFIG_EROFS_FS_ZIP
+
+/* radix_tree and the future XArray both don't use tagptr_t yet */
+struct erofs_workgroup *erofs_find_workgroup(
+	struct super_block *sb, pgoff_t index, bool *tag)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	struct erofs_workgroup *grp;
+	int oldcount;
+
+repeat:
+	rcu_read_lock();
+	grp = radix_tree_lookup(&sbi->workstn_tree, index);
+	if (grp != NULL) {
+		*tag = radix_tree_exceptional_entry(grp);
+		grp = (void *)((unsigned long)grp &
+			~RADIX_TREE_EXCEPTIONAL_ENTRY);
+
+		if (erofs_workgroup_get(grp, &oldcount)) {
+			/* prefer to relax rcu read side */
+			rcu_read_unlock();
+			goto repeat;
+		}
+
+		/* decrease refcount added by erofs_workgroup_put */
+		if (unlikely(oldcount == 1))
+			atomic_long_dec(&erofs_global_shrink_cnt);
+		BUG_ON(index != grp->index);
+	}
+	rcu_read_unlock();
+	return grp;
+}
+
+int erofs_register_workgroup(struct super_block *sb,
+			     struct erofs_workgroup *grp,
+			     bool tag)
+{
+	struct erofs_sb_info *sbi;
+	int err;
+
+	/* grp->refcount should not < 1 */
+	BUG_ON(!atomic_read(&grp->refcount));
+
+	err = radix_tree_preload(GFP_NOFS);
+	if (err)
+		return err;
+
+	sbi = EROFS_SB(sb);
+	erofs_workstn_lock(sbi);
+
+	if (tag)
+		grp = (void *)((unsigned long)grp |
+			1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
+
+	err = radix_tree_insert(&sbi->workstn_tree,
+		grp->index, grp);
+
+	if (!err) {
+		__erofs_workgroup_get(grp);
+	}
+
+	erofs_workstn_unlock(sbi);
+	radix_tree_preload_end();
+	return err;
+}
+
+extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
+
+int erofs_workgroup_put(struct erofs_workgroup *grp)
+{
+	int count = atomic_dec_return(&grp->refcount);
+
+	if (count == 1)
+		atomic_long_inc(&erofs_global_shrink_cnt);
+	else if (!count) {
+		atomic_long_dec(&erofs_global_shrink_cnt);
+		erofs_workgroup_free_rcu(grp);
+	}
+	return count;
+}
+
+unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
+				       unsigned long nr_shrink,
+				       bool cleanup)
+{
+	pgoff_t first_index = 0;
+	void *batch[PAGEVEC_SIZE];
+	unsigned freed = 0;
+
+	int i, found;
+repeat:
+	erofs_workstn_lock(sbi);
+
+	found = radix_tree_gang_lookup(&sbi->workstn_tree,
+		batch, first_index, PAGEVEC_SIZE);
+
+	for (i = 0; i < found; ++i) {
+		int cnt;
+		struct erofs_workgroup *grp = (void *)
+			((unsigned long)batch[i] &
+				~RADIX_TREE_EXCEPTIONAL_ENTRY);
+
+		first_index = grp->index + 1;
+
+		cnt = atomic_read(&grp->refcount);
+		BUG_ON(cnt <= 0);
+
+		if (cleanup)
+			BUG_ON(cnt != 1);
+
+#ifndef EROFS_FS_HAS_MANAGED_CACHE
+		else if (cnt > 1)
+#else
+		if (!erofs_workgroup_try_to_freeze(grp, 1))
+#endif
+			continue;
+
+		if (radix_tree_delete(&sbi->workstn_tree,
+			grp->index) != grp) {
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+skip:
+			erofs_workgroup_unfreeze(grp, 1);
+#endif
+			continue;
+		}
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+		if (erofs_try_to_free_all_cached_pages(sbi, grp))
+			goto skip;
+
+		erofs_workgroup_unfreeze(grp, 1);
+#endif
+		/* (rarely) grabbed again when freeing */
+		erofs_workgroup_put(grp);
+
+		++freed;
+		if (unlikely(!--nr_shrink))
+			break;
+	}
+	erofs_workstn_unlock(sbi);
+
+	if (i && nr_shrink)
+		goto repeat;
+	return freed;
+}
+
+#endif
+
+/* protected by 'erofs_sb_list_lock' */
+static unsigned int shrinker_run_no;
+
+/* protects the mounted 'erofs_sb_list' */
+static DEFINE_SPINLOCK(erofs_sb_list_lock);
+static LIST_HEAD(erofs_sb_list);
+
+void erofs_register_super(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+	mutex_init(&sbi->umount_mutex);
+
+	spin_lock(&erofs_sb_list_lock);
+	list_add(&sbi->list, &erofs_sb_list);
+	spin_unlock(&erofs_sb_list_lock);
+}
+
+void erofs_unregister_super(struct super_block *sb)
+{
+	spin_lock(&erofs_sb_list_lock);
+	list_del(&EROFS_SB(sb)->list);
+	spin_unlock(&erofs_sb_list_lock);
+}
+
+unsigned long erofs_shrink_count(struct shrinker *shrink,
+				 struct shrink_control *sc)
+{
+	return atomic_long_read(&erofs_global_shrink_cnt);
+}
+
+unsigned long erofs_shrink_scan(struct shrinker *shrink,
+				struct shrink_control *sc)
+{
+	struct erofs_sb_info *sbi;
+	struct list_head *p;
+
+	unsigned long nr = sc->nr_to_scan;
+	unsigned int run_no;
+	unsigned long freed = 0;
+
+	spin_lock(&erofs_sb_list_lock);
+	do
+		run_no = ++shrinker_run_no;
+	while (run_no == 0);
+
+	/* Iterate over all mounted superblocks and try to shrink them */
+	p = erofs_sb_list.next;
+	while (p != &erofs_sb_list) {
+		sbi = list_entry(p, struct erofs_sb_info, list);
+
+		/*
+		 * We move the ones we do to the end of the list, so we stop
+		 * when we see one we have already done.
+		 */
+		if (sbi->shrinker_run_no == run_no)
+			break;
+
+		if (!mutex_trylock(&sbi->umount_mutex)) {
+			p = p->next;
+			continue;
+		}
+
+		spin_unlock(&erofs_sb_list_lock);
+		sbi->shrinker_run_no = run_no;
+
+#ifdef CONFIG_EROFS_FS_ZIP
+		freed += erofs_shrink_workstation(sbi, nr, false);
+#endif
+
+		spin_lock(&erofs_sb_list_lock);
+		/* Get the next list element before we move this one */
+		p = p->next;
+
+		/*
+		 * Move this one to the end of the list to provide some
+		 * fairness.
+		 */
+		list_move_tail(&sbi->list, &erofs_sb_list);
+		mutex_unlock(&sbi->umount_mutex);
+
+		if (freed >= nr)
+			break;
+	}
+	spin_unlock(&erofs_sb_list_lock);
+	return freed;
+}
+
diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c
new file mode 100644
index 000000000000..0e9cfeccdf99
--- /dev/null
+++ b/drivers/staging/erofs/xattr.c
@@ -0,0 +1,577 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/xattr.c
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#include <linux/security.h>
+#include "xattr.h"
+
+struct xattr_iter {
+	struct super_block *sb;
+	struct page *page;
+	void *kaddr;
+
+	erofs_blk_t blkaddr;
+	unsigned ofs;
+};
+
+static inline void xattr_iter_end(struct xattr_iter *it, bool atomic)
+{
+	/* only init_inode_xattrs use non-atomic once */
+	if (unlikely(!atomic))
+		kunmap(it->page);
+	else
+		kunmap_atomic(it->kaddr);
+	unlock_page(it->page);
+	put_page(it->page);
+}
+
+static void init_inode_xattrs(struct inode *inode)
+{
+	struct xattr_iter it;
+	unsigned i;
+	struct erofs_xattr_ibody_header *ih;
+	struct erofs_sb_info *sbi;
+	struct erofs_vnode *vi;
+	bool atomic_map;
+
+	if (likely(inode_has_inited_xattr(inode)))
+		return;
+
+	vi = EROFS_V(inode);
+	BUG_ON(!vi->xattr_isize);
+
+	sbi = EROFS_I_SB(inode);
+	it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize);
+	it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize);
+
+	it.page = erofs_get_inline_page(inode, it.blkaddr);
+	BUG_ON(IS_ERR(it.page));
+
+	/* read in shared xattr array (non-atomic, see kmalloc below) */
+	it.kaddr = kmap(it.page);
+	atomic_map = false;
+
+	ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
+
+	vi->xattr_shared_count = ih->h_shared_count;
+	vi->xattr_shared_xattrs = (unsigned *)kmalloc_array(
+		vi->xattr_shared_count, sizeof(unsigned),
+		GFP_KERNEL | __GFP_NOFAIL);
+
+	/* let's skip ibody header */
+	it.ofs += sizeof(struct erofs_xattr_ibody_header);
+
+	for (i = 0; i < vi->xattr_shared_count; ++i) {
+		if (unlikely(it.ofs >= EROFS_BLKSIZ)) {
+			/* cannot be unaligned */
+			BUG_ON(it.ofs != EROFS_BLKSIZ);
+			xattr_iter_end(&it, atomic_map);
+
+			it.page = erofs_get_meta_page(inode->i_sb,
+				++it.blkaddr, S_ISDIR(inode->i_mode));
+			BUG_ON(IS_ERR(it.page));
+
+			it.kaddr = kmap_atomic(it.page);
+			atomic_map = true;
+			it.ofs = 0;
+		}
+		vi->xattr_shared_xattrs[i] =
+			le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs));
+		it.ofs += sizeof(__le32);
+	}
+	xattr_iter_end(&it, atomic_map);
+
+	inode_set_inited_xattr(inode);
+}
+
+struct xattr_iter_handlers {
+	int (*entry)(struct xattr_iter *, struct erofs_xattr_entry *);
+	int (*name)(struct xattr_iter *, unsigned, char *, unsigned);
+	int (*alloc_buffer)(struct xattr_iter *, unsigned);
+	void (*value)(struct xattr_iter *, unsigned, char *, unsigned);
+};
+
+static void xattr_iter_fixup(struct xattr_iter *it)
+{
+	if (unlikely(it->ofs >= EROFS_BLKSIZ)) {
+		xattr_iter_end(it, true);
+
+		it->blkaddr += erofs_blknr(it->ofs);
+		it->page = erofs_get_meta_page(it->sb, it->blkaddr, false);
+		BUG_ON(IS_ERR(it->page));
+
+		it->kaddr = kmap_atomic(it->page);
+		it->ofs = erofs_blkoff(it->ofs);
+	}
+}
+
+static int inline_xattr_iter_begin(struct xattr_iter *it,
+	struct inode *inode)
+{
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
+	unsigned xattr_header_sz, inline_xattr_ofs;
+
+	xattr_header_sz = inlinexattr_header_size(inode);
+	if (unlikely(xattr_header_sz >= vi->xattr_isize)) {
+		BUG_ON(xattr_header_sz > vi->xattr_isize);
+		return -ENOATTR;
+	}
+
+	inline_xattr_ofs = vi->inode_isize + xattr_header_sz;
+
+	it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs);
+	it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
+
+	it->page = erofs_get_inline_page(inode, it->blkaddr);
+	BUG_ON(IS_ERR(it->page));
+	it->kaddr = kmap_atomic(it->page);
+
+	return vi->xattr_isize - xattr_header_sz;
+}
+
+static int xattr_foreach(struct xattr_iter *it,
+	struct xattr_iter_handlers *op, unsigned *tlimit)
+{
+	struct erofs_xattr_entry entry;
+	unsigned value_sz, processed, slice;
+	int err;
+
+	/* 0. fixup blkaddr, ofs, ipage */
+	xattr_iter_fixup(it);
+
+	/*
+	 * 1. read xattr entry to the memory,
+	 *    since we do EROFS_XATTR_ALIGN
+	 *    therefore entry should be in the page
+	 */
+	entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs);
+	if (tlimit != NULL) {
+		unsigned entry_sz = EROFS_XATTR_ENTRY_SIZE(&entry);
+
+		BUG_ON(*tlimit < entry_sz);
+		*tlimit -= entry_sz;
+	}
+
+	it->ofs += sizeof(struct erofs_xattr_entry);
+	value_sz = le16_to_cpu(entry.e_value_size);
+
+	/* handle entry */
+	err = op->entry(it, &entry);
+	if (err) {
+		it->ofs += entry.e_name_len + value_sz;
+		goto out;
+	}
+
+	/* 2. handle xattr name (ofs will finally be at the end of name) */
+	processed = 0;
+
+	while (processed < entry.e_name_len) {
+		if (it->ofs >= EROFS_BLKSIZ) {
+			BUG_ON(it->ofs > EROFS_BLKSIZ);
+
+			xattr_iter_fixup(it);
+			it->ofs = 0;
+		}
+
+		slice = min_t(unsigned, PAGE_SIZE - it->ofs,
+			entry.e_name_len - processed);
+
+		/* handle name */
+		err = op->name(it, processed, it->kaddr + it->ofs, slice);
+		if (err) {
+			it->ofs += entry.e_name_len - processed + value_sz;
+			goto out;
+		}
+
+		it->ofs += slice;
+		processed += slice;
+	}
+
+	/* 3. handle xattr value */
+	processed = 0;
+
+	if (op->alloc_buffer != NULL) {
+		err = op->alloc_buffer(it, value_sz);
+		if (err) {
+			it->ofs += value_sz;
+			goto out;
+		}
+	}
+
+	while (processed < value_sz) {
+		if (it->ofs >= EROFS_BLKSIZ) {
+			BUG_ON(it->ofs > EROFS_BLKSIZ);
+			xattr_iter_fixup(it);
+			it->ofs = 0;
+		}
+
+		slice = min_t(unsigned, PAGE_SIZE - it->ofs,
+			value_sz - processed);
+		op->value(it, processed, it->kaddr + it->ofs, slice);
+		it->ofs += slice;
+		processed += slice;
+	}
+
+out:
+	/* we assume that ofs is aligned with 4 bytes */
+	it->ofs = EROFS_XATTR_ALIGN(it->ofs);
+	return err;
+}
+
+struct getxattr_iter {
+	struct xattr_iter it;
+
+	char *buffer;
+	int buffer_size, index;
+	struct qstr name;
+};
+
+static int xattr_entrymatch(struct xattr_iter *_it,
+	struct erofs_xattr_entry *entry)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+	return (it->index != entry->e_name_index ||
+		it->name.len != entry->e_name_len) ? -ENOATTR : 0;
+}
+
+static int xattr_namematch(struct xattr_iter *_it,
+	unsigned processed, char *buf, unsigned len)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+	return memcmp(buf, it->name.name + processed, len) ? -ENOATTR : 0;
+}
+
+static int xattr_checkbuffer(struct xattr_iter *_it,
+	unsigned value_sz)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+	int err = it->buffer_size < value_sz ? -ERANGE : 0;
+
+	it->buffer_size = value_sz;
+	return it->buffer == NULL ? 1 : err;
+}
+
+static void xattr_copyvalue(struct xattr_iter *_it,
+	unsigned processed, char *buf, unsigned len)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+	memcpy(it->buffer + processed, buf, len);
+}
+
+static struct xattr_iter_handlers find_xattr_handlers = {
+	.entry = xattr_entrymatch,
+	.name = xattr_namematch,
+	.alloc_buffer = xattr_checkbuffer,
+	.value = xattr_copyvalue
+};
+
+static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
+{
+	int ret;
+	unsigned remaining;
+
+	ret = inline_xattr_iter_begin(&it->it, inode);
+	if (ret < 0)
+		return ret;
+
+	remaining = ret;
+	while (remaining) {
+		ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining);
+		if (ret >= 0)
+			break;
+	}
+	xattr_iter_end(&it->it, true);
+
+	return ret < 0 ? ret : it->buffer_size;
+}
+
+static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
+{
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
+	unsigned i;
+	int ret = -ENOATTR;
+
+	for (i = 0; i < vi->xattr_shared_count; ++i) {
+		erofs_blk_t blkaddr =
+			xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
+
+		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
+
+		if (!i || blkaddr != it->it.blkaddr) {
+			if (i)
+				xattr_iter_end(&it->it, true);
+
+			it->it.page = erofs_get_meta_page(inode->i_sb,
+				blkaddr, false);
+			BUG_ON(IS_ERR(it->it.page));
+			it->it.kaddr = kmap_atomic(it->it.page);
+			it->it.blkaddr = blkaddr;
+		}
+
+		ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
+		if (ret >= 0)
+			break;
+	}
+	if (vi->xattr_shared_count)
+		xattr_iter_end(&it->it, true);
+
+	return ret < 0 ? ret : it->buffer_size;
+}
+
+static bool erofs_xattr_user_list(struct dentry *dentry)
+{
+	return test_opt(EROFS_SB(dentry->d_sb), XATTR_USER);
+}
+
+static bool erofs_xattr_trusted_list(struct dentry *dentry)
+{
+	return capable(CAP_SYS_ADMIN);
+}
+
+int erofs_getxattr(struct inode *inode, int index,
+	const char *name,
+	void *buffer, size_t buffer_size)
+{
+	int ret;
+	struct getxattr_iter it;
+
+	if (unlikely(name == NULL))
+		return -EINVAL;
+
+	init_inode_xattrs(inode);
+
+	it.index = index;
+
+	it.name.len = strlen(name);
+	if (it.name.len > EROFS_NAME_LEN)
+		return -ERANGE;
+	it.name.name = name;
+
+	it.buffer = buffer;
+	it.buffer_size = buffer_size;
+
+	it.it.sb = inode->i_sb;
+	ret = inline_getxattr(inode, &it);
+	if (ret == -ENOATTR)
+		ret = shared_getxattr(inode, &it);
+	return ret;
+}
+
+static int erofs_xattr_generic_get(const struct xattr_handler *handler,
+		struct dentry *unused, struct inode *inode,
+		const char *name, void *buffer, size_t size)
+{
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
+
+	switch (handler->flags) {
+	case EROFS_XATTR_INDEX_USER:
+		if (!test_opt(sbi, XATTR_USER))
+			return -EOPNOTSUPP;
+		break;
+	case EROFS_XATTR_INDEX_TRUSTED:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		break;
+	case EROFS_XATTR_INDEX_SECURITY:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!vi->xattr_isize)
+		return -ENOATTR;
+
+	return erofs_getxattr(inode, handler->flags, name, buffer, size);
+}
+
+const struct xattr_handler erofs_xattr_user_handler = {
+	.prefix	= XATTR_USER_PREFIX,
+	.flags	= EROFS_XATTR_INDEX_USER,
+	.list	= erofs_xattr_user_list,
+	.get	= erofs_xattr_generic_get,
+};
+
+const struct xattr_handler erofs_xattr_trusted_handler = {
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.flags	= EROFS_XATTR_INDEX_TRUSTED,
+	.list	= erofs_xattr_trusted_list,
+	.get	= erofs_xattr_generic_get,
+};
+
+#ifdef CONFIG_EROFS_FS_SECURITY
+const struct xattr_handler __maybe_unused erofs_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.flags	= EROFS_XATTR_INDEX_SECURITY,
+	.get	= erofs_xattr_generic_get,
+};
+#endif
+
+const struct xattr_handler *erofs_xattr_handlers[] = {
+	&erofs_xattr_user_handler,
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+	&posix_acl_access_xattr_handler,
+	&posix_acl_default_xattr_handler,
+#endif
+	&erofs_xattr_trusted_handler,
+#ifdef CONFIG_EROFS_FS_SECURITY
+	&erofs_xattr_security_handler,
+#endif
+	NULL,
+};
+
+struct listxattr_iter {
+	struct xattr_iter it;
+
+	struct dentry *dentry;
+	char *buffer;
+	int buffer_size, buffer_ofs;
+};
+
+static int xattr_entrylist(struct xattr_iter *_it,
+	struct erofs_xattr_entry *entry)
+{
+	struct listxattr_iter *it =
+		container_of(_it, struct listxattr_iter, it);
+	unsigned prefix_len;
+	const char *prefix;
+
+	const struct xattr_handler *h =
+		erofs_xattr_handler(entry->e_name_index);
+
+	if (h == NULL || (h->list != NULL && !h->list(it->dentry)))
+		return 1;
+
+	/* Note that at least one of 'prefix' and 'name' should be non-NULL */
+	prefix = h->prefix != NULL ? h->prefix : h->name;
+	prefix_len = strlen(prefix);
+
+	if (it->buffer == NULL) {
+		it->buffer_ofs += prefix_len + entry->e_name_len + 1;
+		return 1;
+	}
+
+	if (it->buffer_ofs + prefix_len
+		+ entry->e_name_len + 1 > it->buffer_size)
+		return -ERANGE;
+
+	memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len);
+	it->buffer_ofs += prefix_len;
+	return 0;
+}
+
+static int xattr_namelist(struct xattr_iter *_it,
+	unsigned processed, char *buf, unsigned len)
+{
+	struct listxattr_iter *it =
+		container_of(_it, struct listxattr_iter, it);
+
+	memcpy(it->buffer + it->buffer_ofs, buf, len);
+	it->buffer_ofs += len;
+	return 0;
+}
+
+static int xattr_skipvalue(struct xattr_iter *_it,
+	unsigned value_sz)
+{
+	struct listxattr_iter *it =
+		container_of(_it, struct listxattr_iter, it);
+
+	it->buffer[it->buffer_ofs++] = '\0';
+	return 1;
+}
+
+static struct xattr_iter_handlers list_xattr_handlers = {
+	.entry = xattr_entrylist,
+	.name = xattr_namelist,
+	.alloc_buffer = xattr_skipvalue,
+	.value = NULL
+};
+
+static int inline_listxattr(struct listxattr_iter *it)
+{
+	int ret;
+	unsigned remaining;
+
+	ret = inline_xattr_iter_begin(&it->it, d_inode(it->dentry));
+	if (ret < 0)
+		return ret;
+
+	remaining = ret;
+	while (remaining) {
+		ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining);
+		if (ret < 0)
+			break;
+	}
+	xattr_iter_end(&it->it, true);
+	return ret < 0 ? ret : it->buffer_ofs;
+}
+
+static int shared_listxattr(struct listxattr_iter *it)
+{
+	struct inode *const inode = d_inode(it->dentry);
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
+	unsigned i;
+	int ret = 0;
+
+	for (i = 0; i < vi->xattr_shared_count; ++i) {
+		erofs_blk_t blkaddr =
+			xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
+
+		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
+		if (!i || blkaddr != it->it.blkaddr) {
+			if (i)
+				xattr_iter_end(&it->it, true);
+
+			it->it.page = erofs_get_meta_page(inode->i_sb,
+				blkaddr, false);
+			BUG_ON(IS_ERR(it->it.page));
+			it->it.kaddr = kmap_atomic(it->it.page);
+			it->it.blkaddr = blkaddr;
+		}
+
+		ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
+		if (ret < 0)
+			break;
+	}
+	if (vi->xattr_shared_count)
+		xattr_iter_end(&it->it, true);
+
+	return ret < 0 ? ret : it->buffer_ofs;
+}
+
+ssize_t erofs_listxattr(struct dentry *dentry,
+	char *buffer, size_t buffer_size)
+{
+	int ret;
+	struct listxattr_iter it;
+
+	init_inode_xattrs(d_inode(dentry));
+
+	it.dentry = dentry;
+	it.buffer = buffer;
+	it.buffer_size = buffer_size;
+	it.buffer_ofs = 0;
+
+	it.it.sb = dentry->d_sb;
+
+	ret = inline_listxattr(&it);
+	if (ret < 0 && ret != -ENOATTR)
+		return ret;
+	return shared_listxattr(&it);
+}
+
diff --git a/drivers/staging/erofs/xattr.h b/drivers/staging/erofs/xattr.h
new file mode 100644
index 000000000000..0c7379282fc5
--- /dev/null
+++ b/drivers/staging/erofs/xattr.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * linux/drivers/staging/erofs/xattr.h
+ *
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+#ifndef __EROFS_XATTR_H
+#define __EROFS_XATTR_H
+
+#include "internal.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+/* Attribute not found */
+#define ENOATTR         ENODATA
+
+static inline unsigned inlinexattr_header_size(struct inode *inode)
+{
+	return sizeof(struct erofs_xattr_ibody_header)
+		+ sizeof(u32) * EROFS_V(inode)->xattr_shared_count;
+}
+
+static inline erofs_blk_t
+xattrblock_addr(struct erofs_sb_info *sbi, unsigned xattr_id)
+{
+#ifdef CONFIG_EROFS_FS_XATTR
+	return sbi->xattr_blkaddr +
+		xattr_id * sizeof(__u32) / EROFS_BLKSIZ;
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned
+xattrblock_offset(struct erofs_sb_info *sbi, unsigned xattr_id)
+{
+	return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ;
+}
+
+extern const struct xattr_handler erofs_xattr_user_handler;
+extern const struct xattr_handler erofs_xattr_trusted_handler;
+#ifdef CONFIG_EROFS_FS_SECURITY
+extern const struct xattr_handler erofs_xattr_security_handler;
+#endif
+
+static inline const struct xattr_handler *erofs_xattr_handler(unsigned index)
+{
+static const struct xattr_handler *xattr_handler_map[] = {
+	[EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler,
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+	[EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler,
+	[EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] =
+		&posix_acl_default_xattr_handler,
+#endif
+	[EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler,
+#ifdef CONFIG_EROFS_FS_SECURITY
+	[EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler,
+#endif
+};
+	return index && index < ARRAY_SIZE(xattr_handler_map) ?
+		xattr_handler_map[index] : NULL;
+}
+
+#ifdef CONFIG_EROFS_FS_XATTR
+
+extern const struct inode_operations erofs_generic_xattr_iops;
+extern const struct inode_operations erofs_dir_xattr_iops;
+
+int erofs_getxattr(struct inode *, int, const char *, void *, size_t);
+ssize_t erofs_listxattr(struct dentry *, char *, size_t);
+#else
+static int __maybe_unused erofs_getxattr(struct inode *inode, int index,
+	const char *name,
+	void *buffer, size_t buffer_size)
+{
+	return -ENOTSUPP;
+}
+
+static ssize_t __maybe_unused erofs_listxattr(struct dentry *dentry,
+	char *buffer, size_t buffer_size)
+{
+	return -ENOTSUPP;
+}
+#endif
+
+#endif
+