From b0def88d807f6db6076aab2aeb11f542036e81a6 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 9 Apr 2020 18:58:34 +0300
Subject: ovl: resolve more conflicting mount options

Similar to the way that a conflict between metacopy=on,redirect_dir=off is
resolved, also resolve conflicts between nfs_export=on,index=off and
nfs_export=on,metacopy=on.

An explicit mount option wins over a default config value.  Both explicit
mount options result in an error.

Without this change the xfstests group overlay/exportfs are skipped if
metacopy is enabled by default.

Reported-by: Chengguang Xu <cgxu519@mykernel.net>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/overlayfs.rst |  7 +++--
 fs/overlayfs/super.c                    | 48 +++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index c9d2bf96b02d..660dbaf0b9b8 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -365,8 +365,8 @@ pointed by REDIRECT. This should not be possible on local system as setting
 "trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible
 for untrusted layers like from a pen drive.
 
-Note: redirect_dir={off|nofollow|follow[*]} conflicts with metacopy=on, and
-results in an error.
+Note: redirect_dir={off|nofollow|follow[*]} and nfs_export=on mount options
+conflict with metacopy=on, and will result in an error.
 
 [*] redirect_dir=follow only conflicts with metacopy=on if upperdir=... is
 given.
@@ -560,6 +560,9 @@ When the NFS export feature is enabled, all directory index entries are
 verified on mount time to check that upper file handles are not stale.
 This verification may cause significant overhead in some cases.
 
+Note: the mount options index=off,nfs_export=on are conflicting and will
+result in an error.
+
 
 Testsuite
 ---------
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 732ad5495c92..fbd6207acdbf 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -470,6 +470,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 	char *p;
 	int err;
 	bool metacopy_opt = false, redirect_opt = false;
+	bool nfs_export_opt = false, index_opt = false;
 
 	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
 	if (!config->redirect_mode)
@@ -519,18 +520,22 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 
 		case OPT_INDEX_ON:
 			config->index = true;
+			index_opt = true;
 			break;
 
 		case OPT_INDEX_OFF:
 			config->index = false;
+			index_opt = true;
 			break;
 
 		case OPT_NFS_EXPORT_ON:
 			config->nfs_export = true;
+			nfs_export_opt = true;
 			break;
 
 		case OPT_NFS_EXPORT_OFF:
 			config->nfs_export = false;
+			nfs_export_opt = true;
 			break;
 
 		case OPT_XINO_ON:
@@ -552,6 +557,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 
 		case OPT_METACOPY_OFF:
 			config->metacopy = false;
+			metacopy_opt = true;
 			break;
 
 		default:
@@ -601,6 +607,48 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 		}
 	}
 
+	/* Resolve nfs_export -> index dependency */
+	if (config->nfs_export && !config->index) {
+		if (nfs_export_opt && index_opt) {
+			pr_err("conflicting options: nfs_export=on,index=off\n");
+			return -EINVAL;
+		}
+		if (index_opt) {
+			/*
+			 * There was an explicit index=off that resulted
+			 * in this conflict.
+			 */
+			pr_info("disabling nfs_export due to index=off\n");
+			config->nfs_export = false;
+		} else {
+			/* Automatically enable index otherwise. */
+			config->index = true;
+		}
+	}
+
+	/* Resolve nfs_export -> !metacopy dependency */
+	if (config->nfs_export && config->metacopy) {
+		if (nfs_export_opt && metacopy_opt) {
+			pr_err("conflicting options: nfs_export=on,metacopy=on\n");
+			return -EINVAL;
+		}
+		if (metacopy_opt) {
+			/*
+			 * There was an explicit metacopy=on that resulted
+			 * in this conflict.
+			 */
+			pr_info("disabling nfs_export due to metacopy=on\n");
+			config->nfs_export = false;
+		} else {
+			/*
+			 * There was an explicit nfs_export=on that resulted
+			 * in this conflict.
+			 */
+			pr_info("disabling metacopy due to nfs_export=on\n");
+			config->metacopy = false;
+		}
+	}
+
 	return 0;
 }
 
-- 
cgit 


From 3011645b5b061e99cf0f024b3260ec506f91b27c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 3 Apr 2020 07:58:49 +0300
Subject: ovl: cleanup non-empty directories in ovl_indexdir_cleanup()

Teach ovl_indexdir_cleanup() to remove temp directories containing
whiteouts to prepare for using index dir instead of work dir for removing
merge directories.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c     | 11 -----------
 fs/overlayfs/overlayfs.h |  4 ++--
 fs/overlayfs/readdir.c   | 16 ++++++++++++----
 3 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 0db23baf98e7..723d17744758 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -484,12 +484,6 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
 	return upper;
 }
 
-/* Is this a leftover from create/whiteout of directory index entry? */
-static bool ovl_is_temp_index(struct dentry *index)
-{
-	return index->d_name.name[0] == '#';
-}
-
 /*
  * Verify that an index entry name matches the origin file handle stored in
  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
@@ -507,11 +501,6 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 	if (!d_inode(index))
 		return 0;
 
-	/* Cleanup leftover from index create/cleanup attempt */
-	err = -ESTALE;
-	if (ovl_is_temp_index(index))
-		goto fail;
-
 	err = -EINVAL;
 	if (index->d_name.len < sizeof(struct ovl_fb)*2)
 		goto fail;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index e6f3670146ed..e00b1ff6dea9 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -394,8 +394,8 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
 void ovl_cache_free(struct list_head *list);
 void ovl_dir_cache_free(struct inode *inode);
 int ovl_check_d_type_supported(struct path *realpath);
-void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
-			 struct dentry *dentry, int level);
+int ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+			struct dentry *dentry, int level);
 int ovl_indexdir_cleanup(struct ovl_fs *ofs);
 
 /* inode.c */
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index e452ff7d583d..20f5310d3ee4 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1071,14 +1071,13 @@ out:
 	ovl_cache_free(&list);
 }
 
-void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+int ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 			 struct dentry *dentry, int level)
 {
 	int err;
 
 	if (!d_is_dir(dentry) || level > 1) {
-		ovl_cleanup(dir, dentry);
-		return;
+		return ovl_cleanup(dir, dentry);
 	}
 
 	err = ovl_do_rmdir(dir, dentry);
@@ -1088,8 +1087,10 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 		inode_unlock(dir);
 		ovl_workdir_cleanup_recurse(&path, level + 1);
 		inode_lock_nested(dir, I_MUTEX_PARENT);
-		ovl_cleanup(dir, dentry);
+		err = ovl_cleanup(dir, dentry);
 	}
+
+	return err;
 }
 
 int ovl_indexdir_cleanup(struct ovl_fs *ofs)
@@ -1128,6 +1129,13 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
 			index = NULL;
 			break;
 		}
+		/* Cleanup leftover from index create/cleanup attempt */
+		if (index->d_name.name[0] == '#') {
+			err = ovl_workdir_cleanup(dir, path.mnt, index, 1);
+			if (err)
+				break;
+			goto next;
+		}
 		err = ovl_verify_index(ofs, index);
 		if (!err) {
 			goto next;
-- 
cgit 


From 773cb4c56b1bedeb5644f5bd06b76e348bb21634 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 3 Apr 2020 08:43:12 +0300
Subject: ovl: prepare to copy up without workdir

With index=on, we copy up lower hardlinks to work dir and move them into
index dir. Fix locking to allow work dir and index dir to be the same
directory.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 9709cf22cab3..66004534bd40 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -584,9 +584,10 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 		.link = c->link
 	};
 
-	err = ovl_lock_rename_workdir(c->workdir, c->destdir);
-	if (err)
-		return err;
+	/* workdir and destdir could be the same when copying up to indexdir */
+	err = -EIO;
+	if (lock_rename(c->workdir, c->destdir) != NULL)
+		goto unlock;
 
 	err = ovl_prep_cu_creds(c->dentry, &cc);
 	if (err)
-- 
cgit 


From 62a8a85be8355b01330667c2b4676fb60c184380 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 3 Apr 2020 10:03:38 +0300
Subject: ovl: index dir act as work dir

With index=on, let index dir act as the work dir for copy up and cleanups.
This will help implementing whiteout inode sharing.

We still create the "work" dir on mount regardless of index=on and it is
used to test the features supported by upper fs.  One reason is that before
the feature tests, we do not know if index could be enabled or not.

The reason we do not use "index" directory also as workdir with index=off
is because the existence of the "index" directory acts as a simple
persistent signal that index was enabled on this filesystem and tools may
want to use that signal.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fbd6207acdbf..6e3fa96613f5 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1825,17 +1825,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		sb->s_flags |= SB_RDONLY;
 
 	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
+		/* index dir will act also as workdir */
+		dput(ofs->workdir);
+		ofs->workdir = NULL;
+		iput(ofs->workdir_trap);
+		ofs->workdir_trap = NULL;
+
 		err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
 		if (err)
 			goto out_free_oe;
 
 		/* Force r/o mount with no index dir */
-		if (!ofs->indexdir) {
-			dput(ofs->workdir);
-			ofs->workdir = NULL;
+		if (ofs->indexdir)
+			ofs->workdir = dget(ofs->indexdir);
+		else
 			sb->s_flags |= SB_RDONLY;
-		}
-
 	}
 
 	err = ovl_check_overlapping_layers(sb, ofs);
-- 
cgit 


From 32b1924b210a70dcacdf65abd687c5ef86a67541 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Thu, 9 Apr 2020 11:29:47 +0300
Subject: ovl: skip overlayfs superblocks at global sync

Stacked filesystems like overlayfs has no own writeback, but they have to
forward syncfs() requests to backend for keeping data integrity.

During global sync() each overlayfs instance calls method ->sync_fs() for
backend although it itself is in global list of superblocks too.  As a
result one syscall sync() could write one superblock several times and send
multiple disk barriers.

This patch adds flag SB_I_SKIP_SYNC into sb->sb_iflags to avoid that.

Reported-by: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 5 +++--
 fs/sync.c            | 3 ++-
 include/linux/fs.h   | 2 ++
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 6e3fa96613f5..f57aa348dcd6 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -261,8 +261,8 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
 		return 0;
 
 	/*
-	 * If this is a sync(2) call or an emergency sync, all the super blocks
-	 * will be iterated, including upper_sb, so no need to do anything.
+	 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
+	 * All the super blocks will be iterated, including upper_sb.
 	 *
 	 * If this is a syncfs(2) call, then we do need to call
 	 * sync_filesystem() on upper_sb, but enough if we do it when being
@@ -1870,6 +1870,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_xattr = ovl_xattr_handlers;
 	sb->s_fs_info = ofs;
 	sb->s_flags |= SB_POSIXACL;
+	sb->s_iflags |= SB_I_SKIP_SYNC;
 
 	err = -ENOMEM;
 	root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
diff --git a/fs/sync.c b/fs/sync.c
index 4d1ff010bc5a..16c2630ee4bf 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -76,7 +76,8 @@ static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 
 static void sync_fs_one_sb(struct super_block *sb, void *arg)
 {
-	if (!sb_rdonly(sb) && sb->s_op->sync_fs)
+	if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
+	    sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, *(int *)arg);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4f6f59b4f22a..f186a966a36c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1409,6 +1409,8 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_I_IMA_UNVERIFIABLE_SIGNATURE	0x00000020
 #define SB_I_UNTRUSTED_MOUNTER		0x00000040
 
+#define SB_I_SKIP_SYNC	0x00000100	/* Skip superblock at global sync */
+
 /* Possible states of 'frozen' field */
 enum {
 	SB_UNFROZEN = 0,		/* FS is unfrozen */
-- 
cgit 


From 654255fa205cb2b010e9abb34b0c8afcca9c78c7 Mon Sep 17 00:00:00 2001
From: Jeffle Xu <jefflexu@linux.alibaba.com>
Date: Thu, 23 Apr 2020 19:06:55 +0800
Subject: ovl: inherit SB_NOSEC flag from upperdir

Since the stacking of regular file operations [1], the overlayfs edition of
write_iter() is called when writing regular files.

Since then, xattr lookup is needed on every write since file_remove_privs()
is called from ovl_write_iter(), which would become the performance
bottleneck when writing small chunks of data. In my test case,
file_remove_privs() would consume ~15% CPU when running fstime of unixbench
(the workload is repeadly writing 1 KB to the same file) [2].

Inherit the SB_NOSEC flag from upperdir. Since then xattr lookup would be
done only once on the first write. Unixbench fstime gets a ~20% performance
gain with this patch.

[1] https://lore.kernel.org/lkml/20180606150905.GC9426@magnolia/T/
[2] https://www.spinics.net/lists/linux-unionfs/msg07153.html

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index f57aa348dcd6..af69f41f564d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1100,6 +1100,18 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
 	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
 	ofs->upper_mnt = upper_mnt;
 
+	/*
+	 * Inherit SB_NOSEC flag from upperdir.
+	 *
+	 * This optimization changes behavior when a security related attribute
+	 * (suid/sgid/security.*) is changed on an underlying layer.  This is
+	 * okay because we don't yet have guarantees in that case, but it will
+	 * need careful treatment once we want to honour changes to underlying
+	 * filesystems.
+	 */
+	if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
+		sb->s_flags |= SB_NOSEC;
+
 	if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
 		ofs->upperdir_locked = true;
 	} else {
-- 
cgit 


From c21c839b8448dd4b1e37ffc1bde928f57d34c0db Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@mykernel.net>
Date: Fri, 24 Apr 2020 10:55:17 +0800
Subject: ovl: whiteout inode sharing

Share inode with different whiteout files for saving inode and speeding up
delete operation.

If EMLINK is encountered when linking a shared whiteout, create a new one.
In case of any other error, disable sharing for this super block.

Note: ofs->whiteout is protected by inode lock on workdir.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c       | 49 ++++++++++++++++++++++++++++++++++++------------
 fs/overlayfs/overlayfs.h |  2 +-
 fs/overlayfs/ovl_entry.h |  3 +++
 fs/overlayfs/readdir.c   |  2 +-
 fs/overlayfs/super.c     |  4 ++++
 fs/overlayfs/util.c      |  3 ++-
 6 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 279009dee366..09faa63cf24d 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -62,35 +62,59 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
 }
 
 /* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir)
+static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
 {
 	int err;
 	struct dentry *whiteout;
+	struct dentry *workdir = ofs->workdir;
 	struct inode *wdir = workdir->d_inode;
 
-	whiteout = ovl_lookup_temp(workdir);
-	if (IS_ERR(whiteout))
-		return whiteout;
+	if (!ofs->whiteout) {
+		whiteout = ovl_lookup_temp(workdir);
+		if (IS_ERR(whiteout))
+			goto out;
 
-	err = ovl_do_whiteout(wdir, whiteout);
-	if (err) {
-		dput(whiteout);
-		whiteout = ERR_PTR(err);
+		err = ovl_do_whiteout(wdir, whiteout);
+		if (err) {
+			dput(whiteout);
+			whiteout = ERR_PTR(err);
+			goto out;
+		}
+		ofs->whiteout = whiteout;
 	}
 
+	if (ofs->share_whiteout) {
+		whiteout = ovl_lookup_temp(workdir);
+		if (IS_ERR(whiteout))
+			goto out;
+
+		err = ovl_do_link(ofs->whiteout, wdir, whiteout);
+		if (!err)
+			goto out;
+
+		if (err != -EMLINK) {
+			pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
+				ofs->whiteout->d_inode->i_nlink, err);
+			ofs->share_whiteout = false;
+		}
+		dput(whiteout);
+	}
+	whiteout = ofs->whiteout;
+	ofs->whiteout = NULL;
+out:
 	return whiteout;
 }
 
 /* Caller must hold i_mutex on both workdir and dir */
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
 			     struct dentry *dentry)
 {
-	struct inode *wdir = workdir->d_inode;
+	struct inode *wdir = ofs->workdir->d_inode;
 	struct dentry *whiteout;
 	int err;
 	int flags = 0;
 
-	whiteout = ovl_whiteout(workdir);
+	whiteout = ovl_whiteout(ofs);
 	err = PTR_ERR(whiteout);
 	if (IS_ERR(whiteout))
 		return err;
@@ -715,6 +739,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
 static int ovl_remove_and_whiteout(struct dentry *dentry,
 				   struct list_head *list)
 {
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	struct dentry *workdir = ovl_workdir(dentry);
 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 	struct dentry *upper;
@@ -748,7 +773,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
 		goto out_dput_upper;
 	}
 
-	err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
+	err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper);
 	if (err)
 		goto out_d_drop;
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index e00b1ff6dea9..76747f5b0517 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -455,7 +455,7 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to)
 
 /* dir.c */
 extern const struct inode_operations ovl_dir_inode_operations;
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
 			     struct dentry *dentry);
 struct ovl_cattr {
 	dev_t rdev;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 5762d802fe01..a8f82fb7ffb4 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -68,6 +68,7 @@ struct ovl_fs {
 	/* Did we take the inuse lock? */
 	bool upperdir_locked;
 	bool workdir_locked;
+	bool share_whiteout;
 	/* Traps in ovl inode cache */
 	struct inode *upperdir_trap;
 	struct inode *workbasedir_trap;
@@ -77,6 +78,8 @@ struct ovl_fs {
 	int xino_mode;
 	/* For allocation of non-persistent inode numbers */
 	atomic_long_t last_ino;
+	/* Whiteout dentry cache */
+	struct dentry *whiteout;
 };
 
 static inline struct ovl_fs *OVL_FS(struct super_block *sb)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 20f5310d3ee4..a10b21b562ad 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1154,7 +1154,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
 			 * Whiteout orphan index to block future open by
 			 * handle after overlay nlink dropped to zero.
 			 */
-			err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+			err = ovl_cleanup_and_whiteout(ofs, dir, index);
 		} else {
 			/* Cleanup orphan index entries */
 			err = ovl_cleanup(dir, index);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index af69f41f564d..a88a7badf444 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -217,6 +217,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	iput(ofs->indexdir_trap);
 	iput(ofs->workdir_trap);
 	iput(ofs->upperdir_trap);
+	dput(ofs->whiteout);
 	dput(ofs->indexdir);
 	dput(ofs->workdir);
 	if (ofs->workdir_locked)
@@ -1776,6 +1777,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!cred)
 		goto out_err;
 
+	/* Is there a reason anyone would want not to share whiteouts? */
+	ofs->share_whiteout = true;
+
 	ofs->config.index = ovl_index_def;
 	ofs->config.nfs_export = ovl_nfs_export_def;
 	ofs->config.xino = ovl_xino_def();
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 36b60788ee47..01755bc186c9 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -707,7 +707,8 @@ static void ovl_cleanup_index(struct dentry *dentry)
 		index = NULL;
 	} else if (ovl_index_all(dentry->d_sb)) {
 		/* Whiteout orphan index to block future open by handle */
-		err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+		err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb),
+					       dir, index);
 	} else {
 		/* Cleanup orphan index entries */
 		err = ovl_cleanup(dir, index);
-- 
cgit 


From 399c109d357a7e217cf7ef551e7e234439c68c15 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@mykernel.net>
Date: Wed, 22 Apr 2020 12:28:43 +0800
Subject: ovl: sync dirty data when remounting to ro mode

sync_filesystem() does not sync dirty data for readonly filesystem during
umount, so before changing to readonly filesystem we should sync dirty data
for data integrity.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index a88a7badf444..60dfb27bc12b 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -365,11 +365,20 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 static int ovl_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
+	struct super_block *upper_sb;
+	int ret = 0;
 
 	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
 		return -EROFS;
 
-	return 0;
+	if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
+		upper_sb = ofs->upper_mnt->mnt_sb;
+		down_read(&upper_sb->s_umount);
+		ret = sync_filesystem(upper_sb);
+		up_read(&upper_sb->s_umount);
+	}
+
+	return ret;
 }
 
 static const struct super_operations ovl_super_operations = {
-- 
cgit 


From 144da23beab87b27992e5e1b41bd954de0bf2581 Mon Sep 17 00:00:00 2001
From: Lubos Dolezel <lubos@dolezel.info>
Date: Mon, 4 May 2020 21:35:09 +0200
Subject: ovl: return required buffer size for file handles

Overlayfs doesn't work well with the fanotify mechanism.

Fanotify first probes for the required buffer size for the file handle,
but overlayfs currently bails out without passing the size back.

That results in errors in the kernel log, such as:

[527944.485384] overlayfs: failed to encode file handle (/, err=-75, buflen=0, len=29, type=1)
[527944.485386] fanotify: failed to encode fid (fsid=ae521e68.a434d95f, type=255, bytes=0, err=-2)

Signed-off-by: Lubos Dolezel <lubos@dolezel.info>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index ed5c1078919c..321e24bf5c60 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -231,12 +231,9 @@ static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
 	if (IS_ERR(fh))
 		return PTR_ERR(fh);
 
-	err = -EOVERFLOW;
 	len = OVL_FH_LEN(fh);
-	if (len > buflen)
-		goto fail;
-
-	memcpy(fid, fh, len);
+	if (len <= buflen)
+		memcpy(fid, fh, len);
 	err = len;
 
 out:
@@ -244,9 +241,8 @@ out:
 	return err;
 
 fail:
-	pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
-			    dentry, err, buflen, fh ? (int)fh->fb.len : 0,
-			    fh ? fh->fb.type : 0);
+	pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n",
+			    dentry, err);
 	goto out;
 }
 
@@ -254,7 +250,7 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
 			 struct inode *parent)
 {
 	struct dentry *dentry;
-	int bytes = *max_len << 2;
+	int bytes, buflen = *max_len << 2;
 
 	/* TODO: encode connectable file handles */
 	if (parent)
@@ -264,12 +260,14 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
 	if (WARN_ON(!dentry))
 		return FILEID_INVALID;
 
-	bytes = ovl_dentry_to_fid(dentry, fid, bytes);
+	bytes = ovl_dentry_to_fid(dentry, fid, buflen);
 	dput(dentry);
 	if (bytes <= 0)
 		return FILEID_INVALID;
 
 	*max_len = bytes >> 2;
+	if (bytes > buflen)
+		return FILEID_INVALID;
 
 	return OVL_FILEID_V1;
 }
-- 
cgit 


From 522f6e6cba6880a038e2bd88e10390b84cd3febd Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 23 May 2020 16:21:55 +0300
Subject: ovl: fix out of bounds access warning in ovl_check_fb_len()

syzbot reported out of bounds memory access from open_by_handle_at()
with a crafted file handle that looks like this:

  { .handle_bytes = 2, .handle_type = OVL_FILEID_V1 }

handle_bytes gets rounded down to 0 and we end up calling:
  ovl_check_fh_len(fh, 0) => ovl_check_fb_len(fh + 3, -3)

But fh buffer is only 2 bytes long, so accessing struct ovl_fb at
fh + 3 is illegal.

Fixes: cbe7fba8edfc ("ovl: make sure that real fid is 32bit aligned in memory")
Reported-and-tested-by: syzbot+61958888b1c60361a791@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org> # v5.5
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 76747f5b0517..ffbb57b2d7f6 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -355,6 +355,9 @@ int ovl_check_fb_len(struct ovl_fb *fb, int fb_len);
 
 static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
 {
+	if (fh_len < sizeof(struct ovl_fh))
+		return -EINVAL;
+
 	return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET);
 }
 
-- 
cgit 


From 59fb20138a9b5249a4176d5bbc5c670a97343061 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Mon, 1 Jun 2020 11:56:50 -0400
Subject: ovl: simplify setting of origin for index lookup

overlayfs can keep index of copied up files and directories and it seems to
serve two primary puroposes.  For regular files, it avoids breaking lower
hardlinks over copy up.  For directories it seems to be used for various
error checks.

During ovl_lookup(), we lookup for index using lower dentry in many a
cases.  That lower dentry is called "origin" and following is a summary of
current logic.

If there is no upperdentry, always lookup for index using lower dentry.
For regular files it helps avoiding breaking hard links over copyup and for
directories it seems to be just error checks.

If there is an upperdentry, then there are 3 possible cases.

 - For directories, lower dentry is found using two ways.  One is regular
  path based lookup in lower layers and second is using ORIGIN xattr on
  upper dentry.  First verify that path based lookup lower dentry matches
  the one pointed by upper ORIGIN xattr.  If yes, use this verified origin
  for index lookup.

 - For regular files (non-metacopy), there is no path based lookup in lower
  layers as lookup stops once we find upper dentry.  So there is no origin
  verification.  If there is ORIGIN xattr present on upper, use that to
  lookup index otherwise don't.

 - For regular metacopy files, again lower dentry is found using path based
  lookup as well as ORIGIN xattr on upper.  Path based lookup is continued
  in this case to find lower data dentry for metacopy upper.  So like
  directories we only use verified origin.  If ORIGIN xattr is not present
  (Either because lower did not support file handles or because this is
  hardlink copied up with index=off), then don't use path lookup based
  lower dentry as origin.  This is same as regular non-metacopy file case.

Suggested-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 723d17744758..e6868110d9f1 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -994,25 +994,30 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		}
 		stack = origin_path;
 		ctr = 1;
+		origin = origin_path->dentry;
 		origin_path = NULL;
 	}
 
 	/*
-	 * Lookup index by lower inode and verify it matches upper inode.
-	 * We only trust dir index if we verified that lower dir matches
-	 * origin, otherwise dir index entries may be inconsistent and we
-	 * ignore them.
+	 * Always lookup index if there is no-upperdentry.
 	 *
-	 * For non-dir upper metacopy dentry, we already set "origin" if we
-	 * verified that lower matched upper origin. If upper origin was
-	 * not present (because lower layer did not support fh encode/decode),
-	 * or indexing is not enabled, do not set "origin" and skip looking up
-	 * index. This case should be handled in same way as a non-dir upper
-	 * without ORIGIN is handled.
+	 * For the case of upperdentry, we have set origin by now if it
+	 * needed to be set. There are basically three cases.
+	 *
+	 * For directories, lookup index by lower inode and verify it matches
+	 * upper inode. We only trust dir index if we verified that lower dir
+	 * matches origin, otherwise dir index entries may be inconsistent
+	 * and we ignore them.
+	 *
+	 * For regular upper, we already set origin if upper had ORIGIN
+	 * xattr. There is no verification though as there is no path
+	 * based dentry lookup in lower in this case.
+	 *
+	 * For metacopy upper, we set a verified origin already if index
+	 * is enabled and if upper had an ORIGIN xattr.
 	 *
-	 * Always lookup index of non-dir non-metacopy and non-upper.
 	 */
-	if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
+	if (!upperdentry && ctr)
 		origin = stack[0].dentry;
 
 	if (origin && ovl_indexdir(dentry->d_sb) &&
-- 
cgit 


From 6815f479ca90ee7fd2e28b2a420f796b974155fe Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Mon, 1 Jun 2020 11:56:51 -0400
Subject: ovl: use only uppermetacopy state in ovl_lookup()

Currently we use a variable "metacopy" which signifies that dentry could be
either uppermetacopy or lowermetacopy.  Amir suggested that we can move
code around and use d.metacopy in such a way that we don't need
lowermetacopy and just can do away with uppermetacopy.

So this patch replaces "metacopy" with "uppermetacopy".

It also moves some code little higher to keep reading little simpler.

Suggested-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 57 ++++++++++++++++++++++++++--------------------------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index e6868110d9f1..59363c4f6ed4 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -812,7 +812,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	struct dentry *this;
 	unsigned int i;
 	int err;
-	bool metacopy = false;
+	bool uppermetacopy = false;
 	struct ovl_lookup_data d = {
 		.sb = dentry->d_sb,
 		.name = dentry->d_name,
@@ -858,7 +858,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 				goto out_put_upper;
 
 			if (d.metacopy)
-				metacopy = true;
+				uppermetacopy = true;
 		}
 
 		if (d.redirect) {
@@ -895,6 +895,21 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		if (!this)
 			continue;
 
+		if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
+			err = -EPERM;
+			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
+			goto out_put;
+		}
+
+		/*
+		 * Do not store intermediate metacopy dentries in chain,
+		 * except top most lower metacopy dentry
+		 */
+		if (d.metacopy && ctr) {
+			dput(this);
+			continue;
+		}
+
 		/*
 		 * If no origin fh is stored in upper of a merge dir, store fh
 		 * of lower dir and set upper parent "impure".
@@ -929,17 +944,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			origin = this;
 		}
 
-		if (d.metacopy)
-			metacopy = true;
-		/*
-		 * Do not store intermediate metacopy dentries in chain,
-		 * except top most lower metacopy dentry
-		 */
-		if (d.metacopy && ctr) {
-			dput(this);
-			continue;
-		}
-
 		stack[ctr].dentry = this;
 		stack[ctr].layer = lower.layer;
 		ctr++;
@@ -971,22 +975,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		}
 	}
 
-	if (metacopy) {
-		/*
-		 * Found a metacopy dentry but did not find corresponding
-		 * data dentry
-		 */
-		if (d.metacopy) {
-			err = -EIO;
-			goto out_put;
-		}
-
-		err = -EPERM;
-		if (!ofs->config.metacopy) {
-			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n",
-					    dentry);
-			goto out_put;
-		}
+	/*
+	 * For regular non-metacopy upper dentries, there is no lower
+	 * path based lookup, hence ctr will be zero. If a dentry is found
+	 * using ORIGIN xattr on upper, install it in stack.
+	 *
+	 * For metacopy dentry, path based lookup will find lower dentries.
+	 * Just make sure a corresponding data dentry has been found.
+	 */
+	if (d.metacopy || (uppermetacopy && !ctr)) {
+		err = -EIO;
+		goto out_put;
 	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
 		if (WARN_ON(stack != NULL)) {
 			err = -EIO;
-- 
cgit 


From 28166ab3c875b8cbe19b6ad43e29257d1605e3b9 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Mon, 1 Jun 2020 11:56:52 -0400
Subject: ovl: initialize OVL_UPPERDATA in ovl_lookup()

Currently ovl_get_inode() initializes OVL_UPPERDATA flag and for that it
has to call ovl_check_metacopy_xattr() and check if metacopy xattr is
present or not.

yangerkun reported sometimes underlying filesystem might return -EIO and in
that case error handling path does not cleanup properly leading to various
warnings.

Run generic/461 with ext4 upper/lower layer sometimes may trigger the bug
as below(linux 4.19):

[  551.001349] overlayfs: failed to get metacopy (-5)
[  551.003464] overlayfs: failed to get inode (-5)
[  551.004243] overlayfs: cleanup of 'd44/fd51' failed (-5)
[  551.004941] overlayfs: failed to get origin (-5)
[  551.005199] ------------[ cut here ]------------
[  551.006697] WARNING: CPU: 3 PID: 24674 at fs/inode.c:1528 iput+0x33b/0x400
...
[  551.027219] Call Trace:
[  551.027623]  ovl_create_object+0x13f/0x170
[  551.028268]  ovl_create+0x27/0x30
[  551.028799]  path_openat+0x1a35/0x1ea0
[  551.029377]  do_filp_open+0xad/0x160
[  551.029944]  ? vfs_writev+0xe9/0x170
[  551.030499]  ? page_counter_try_charge+0x77/0x120
[  551.031245]  ? __alloc_fd+0x160/0x2a0
[  551.031832]  ? do_sys_open+0x189/0x340
[  551.032417]  ? get_unused_fd_flags+0x34/0x40
[  551.033081]  do_sys_open+0x189/0x340
[  551.033632]  __x64_sys_creat+0x24/0x30
[  551.034219]  do_syscall_64+0xd5/0x430
[  551.034800]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

One solution is to improve error handling and call iget_failed() if error
is encountered.  Amir thinks that this path is little intricate and there
is not real need to check and initialize OVL_UPPERDATA in ovl_get_inode().
Instead caller of ovl_get_inode() can initialize this state.  And this will
avoid double checking of metacopy xattr lookup in ovl_lookup() and
ovl_get_inode().

OVL_UPPERDATA is inode flag.  So I was little concerned that initializing
it outside ovl_get_inode() might have some races.  But this is one way
transition.  That is once a file has been fully copied up, it can't go back
to metacopy file again.  And that seems to help avoid races.  So as of now
I can't see any races w.r.t OVL_UPPERDATA being set wrongly.  So move
settingof OVL_UPPERDATA inside the callers of ovl_get_inode().
ovl_obtain_alias() already does it.  So only two callers now left are
ovl_lookup() and ovl_instantiate().

Reported-by: yangerkun <yangerkun@huawei.com>
Suggested-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c   |  2 ++
 fs/overlayfs/inode.c | 11 +----------
 fs/overlayfs/namei.c |  2 ++
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 09faa63cf24d..1bba4813f9cb 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -286,6 +286,8 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
 		inode = ovl_get_inode(dentry->d_sb, &oip);
 		if (IS_ERR(inode))
 			return PTR_ERR(inode);
+		if (inode == oip.newinode)
+			ovl_set_flag(OVL_UPPERDATA, inode);
 	} else {
 		WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
 		dput(newdentry);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 981f11ec51bc..f2aaf00821c0 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -957,7 +957,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
 	bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
 					oip->index);
 	int fsid = bylower ? lowerpath->layer->fsid : 0;
-	bool is_dir, metacopy = false;
+	bool is_dir;
 	unsigned long ino = 0;
 	int err = oip->newinode ? -EEXIST : -ENOMEM;
 
@@ -1018,15 +1018,6 @@ struct inode *ovl_get_inode(struct super_block *sb,
 	if (oip->index)
 		ovl_set_flag(OVL_INDEX, inode);
 
-	if (upperdentry) {
-		err = ovl_check_metacopy_xattr(upperdentry);
-		if (err < 0)
-			goto out_err;
-		metacopy = err;
-		if (!metacopy)
-			ovl_set_flag(OVL_UPPERDATA, inode);
-	}
-
 	OVL_I(inode)->redirect = oip->redirect;
 
 	if (bylower)
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 59363c4f6ed4..1687a770ff39 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -1067,6 +1067,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		err = PTR_ERR(inode);
 		if (IS_ERR(inode))
 			goto out_free_oe;
+		if (upperdentry && !uppermetacopy)
+			ovl_set_flag(OVL_UPPERDATA, inode);
 	}
 
 	ovl_dentry_update_reval(dentry, upperdentry,
-- 
cgit 


From 21d8d66abffb0c7834c1b09b8b043ea6a66d6089 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Tue, 2 Jun 2020 11:23:38 -0400
Subject: ovl: fix redirect traversal on metacopy dentries

Amir pointed me to metacopy test cases in unionmount-testsuite and I
decided to run "./run --ov=10 --meta" and it failed while running test
"rename-mass-5.py".

Problem is w.r.t absolute redirect traversal on intermediate metacopy
dentry.  We do not store intermediate metacopy dentries and also skip
current loop/layer and move onto lookup in next layer.  But at the end of
loop, we have logic to reset "poe" and layer index if currnently looked up
dentry has absolute redirect.  We skip all that and that means lookup in
next layer will fail.

Following is simple test case to reproduce this.

- mkdir -p lower upper work merged lower/a lower/b
- touch lower/a/foo.txt
- mount -t overlay -o lowerdir=lower,upperdir=upper,workdir=work,metacopy=on none merged

# Following will create absolute redirect "/a/foo.txt" on upper/b/bar.txt.
- mv merged/a/foo.txt merged/b/bar.txt

# unmount overlay and use upper as lower layer (lower2) for next mount.
- umount merged
- mv upper lower2
- rm -rf work; mkdir -p upper work
- mount -t overlay -o lowerdir=lower2:lower,upperdir=upper,workdir=work,metacopy=on none merged

# Force a metacopy copy-up
- chown bin:bin merged/b/bar.txt

# unmount overlay and use upper as lower layer (lower3) for next mount.
- umount merged
- mv upper lower3
- rm -rf work; mkdir -p upper work
- mount -t overlay -o lowerdir=lower3:lower2:lower,upperdir=upper,workdir=work,metacopy=on none merged

# ls merged/b/bar.txt
ls: cannot access 'bar.txt': Input/output error

Intermediate lower layer (lower2) has metacopy dentry b/bar.txt with
absolute redirect "/a/foo.txt".  We skipped redirect processing at the end
of loop which sets poe to roe and sets the appropriate next lower layer
index.  And that means lookup failed in next layer.

Fix this by continuing the loop for any intermediate dentries.  We still do
not save these at lower stack.  With this fix applied unionmount-testsuite,
"./run --ov-10 --meta" now passes.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 1687a770ff39..f9640d27ddab 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -901,15 +901,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			goto out_put;
 		}
 
-		/*
-		 * Do not store intermediate metacopy dentries in chain,
-		 * except top most lower metacopy dentry
-		 */
-		if (d.metacopy && ctr) {
-			dput(this);
-			continue;
-		}
-
 		/*
 		 * If no origin fh is stored in upper of a merge dir, store fh
 		 * of lower dir and set upper parent "impure".
@@ -944,9 +935,20 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			origin = this;
 		}
 
-		stack[ctr].dentry = this;
-		stack[ctr].layer = lower.layer;
-		ctr++;
+		if (d.metacopy && ctr) {
+			/*
+			 * Do not store intermediate metacopy dentries in
+			 * lower chain, except top most lower metacopy dentry.
+			 * Continue the loop so that if there is an absolute
+			 * redirect on this dentry, poe can be reset to roe.
+			 */
+			dput(this);
+			this = NULL;
+		} else {
+			stack[ctr].dentry = this;
+			stack[ctr].layer = lower.layer;
+			ctr++;
+		}
 
 		/*
 		 * Following redirects can have security consequences: it's like
-- 
cgit 


From 130fdbc3d1f9966dd4230709c30f3768bccd3065 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 2 Jun 2020 22:20:25 +0200
Subject: ovl: pass correct flags for opening real directory

The three instances of ovl_path_open() in overlayfs/readdir.c do three
different things:

 - pass f_flags from overlay file
 - pass O_RDONLY | O_DIRECTORY
 - pass just O_RDONLY

The value of f_flags can be (other than O_RDONLY):

O_WRONLY	- not possible for a directory
O_RDWR		- not possible for a directory
O_CREAT		- masked out by dentry_open()
O_EXCL		- masked out by dentry_open()
O_NOCTTY	- masked out by dentry_open()
O_TRUNC		- masked out by dentry_open()
O_APPEND	- no effect on directory ops
O_NDELAY	- no effect on directory ops
O_NONBLOCK	- no effect on directory ops
__O_SYNC	- no effect on directory ops
O_DSYNC		- no effect on directory ops
FASYNC		- no effect on directory ops
O_DIRECT	- no effect on directory ops
O_LARGEFILE	- ?
O_DIRECTORY	- only affects lookup
O_NOFOLLOW	- only affects lookup
O_NOATIME	- overlay sets this unconditionally in ovl_path_open()
O_CLOEXEC	- only affects fd allocation
O_PATH		- no effect on directory ops
__O_TMPFILE	- not possible for a directory


Fon non-merge directories we use the underlying filesystem's iterate; in
this case honor O_LARGEFILE from the original file to make sure that open
doesn't get rejected.

For merge directories it's safe to pass O_LARGEFILE unconditionally since
userspace will only see the artificial offsets created by overlayfs.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/readdir.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index a10b21b562ad..3bc97065cc70 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -297,7 +297,7 @@ static inline int ovl_dir_read(struct path *realpath,
 	struct file *realfile;
 	int err;
 
-	realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
+	realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
 	if (IS_ERR(realfile))
 		return PTR_ERR(realfile);
 
@@ -831,6 +831,12 @@ out_unlock:
 	return res;
 }
 
+static struct file *ovl_dir_open_realfile(struct file *file,
+					  struct path *realpath)
+{
+	return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
+}
+
 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
 			 int datasync)
 {
@@ -853,7 +859,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
 			struct path upperpath;
 
 			ovl_path_upper(dentry, &upperpath);
-			realfile = ovl_path_open(&upperpath, O_RDONLY);
+			realfile = ovl_dir_open_realfile(file, &upperpath);
 
 			inode_lock(inode);
 			if (!od->upperfile) {
@@ -904,7 +910,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 
 	type = ovl_path_real(file->f_path.dentry, &realpath);
-	realfile = ovl_path_open(&realpath, file->f_flags);
+	realfile = ovl_dir_open_realfile(file, &realpath);
 	if (IS_ERR(realfile)) {
 		kfree(od);
 		return PTR_ERR(realfile);
-- 
cgit 


From 48bd024b8a40d73ad6b086de2615738da0c7004f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 2 Jun 2020 22:20:25 +0200
Subject: ovl: switch to mounter creds in readdir

In preparation for more permission checking, override credentials for
directory operations on the underlying filesystems.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/readdir.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 3bc97065cc70..1621460417be 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -743,8 +743,10 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
 	struct ovl_dir_file *od = file->private_data;
 	struct dentry *dentry = file->f_path.dentry;
 	struct ovl_cache_entry *p;
+	const struct cred *old_cred;
 	int err;
 
+	old_cred = ovl_override_creds(dentry->d_sb);
 	if (!ctx->pos)
 		ovl_dir_reset(file);
 
@@ -758,17 +760,20 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
 		    (ovl_same_fs(dentry->d_sb) &&
 		     (ovl_is_impure_dir(file) ||
 		      OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
-			return ovl_iterate_real(file, ctx);
+			err = ovl_iterate_real(file, ctx);
+		} else {
+			err = iterate_dir(od->realfile, ctx);
 		}
-		return iterate_dir(od->realfile, ctx);
+		goto out;
 	}
 
 	if (!od->cache) {
 		struct ovl_dir_cache *cache;
 
 		cache = ovl_cache_get(dentry);
+		err = PTR_ERR(cache);
 		if (IS_ERR(cache))
-			return PTR_ERR(cache);
+			goto out;
 
 		od->cache = cache;
 		ovl_seek_cursor(od, ctx->pos);
@@ -780,7 +785,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
 			if (!p->ino) {
 				err = ovl_cache_update_ino(&file->f_path, p);
 				if (err)
-					return err;
+					goto out;
 			}
 			if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
 				break;
@@ -788,7 +793,10 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
 		od->cursor = p->l_node.next;
 		ctx->pos++;
 	}
-	return 0;
+	err = 0;
+out:
+	revert_creds(old_cred);
+	return err;
 }
 
 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
@@ -834,7 +842,14 @@ out_unlock:
 static struct file *ovl_dir_open_realfile(struct file *file,
 					  struct path *realpath)
 {
-	return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
+	struct file *res;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
+	revert_creds(old_cred);
+
+	return res;
 }
 
 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
-- 
cgit 


From 56230d956739b9cb1cbde439d76227d77979a04d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 2 Jun 2020 22:20:26 +0200
Subject: ovl: verify permissions in ovl_path_open()

Check permission before opening a real file.

ovl_path_open() is used by readdir and copy-up routines.

ovl_permission() theoretically already checked copy up permissions, but it
doesn't hurt to re-do these checks during the actual copy-up.

For directory reading ovl_permission() only checks access to topmost
underlying layer.  Readdir on a merged directory accesses layers below the
topmost one as well.  Permission wasn't checked for these layers.

Note: modifying ovl_permission() to perform this check would be far more
complex and hence more bug prone.  The result is less precise permissions
returned in access(2).  If this turns out to be an issue, we can revisit
this bug.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/util.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 01755bc186c9..33a7c60be3ed 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -459,7 +459,32 @@ bool ovl_is_whiteout(struct dentry *dentry)
 
 struct file *ovl_path_open(struct path *path, int flags)
 {
-	return dentry_open(path, flags | O_NOATIME, current_cred());
+	struct inode *inode = d_inode(path->dentry);
+	int err, acc_mode;
+
+	if (flags & ~(O_ACCMODE | O_LARGEFILE))
+		BUG();
+
+	switch (flags & O_ACCMODE) {
+	case O_RDONLY:
+		acc_mode = MAY_READ;
+		break;
+	case O_WRONLY:
+		acc_mode = MAY_WRITE;
+		break;
+	default:
+		BUG();
+	}
+
+	err = inode_permission(inode, acc_mode | MAY_OPEN);
+	if (err)
+		return ERR_PTR(err);
+
+	/* O_NOATIME is an optimization, don't fail if not permitted */
+	if (inode_owner_or_capable(inode))
+		flags |= O_NOATIME;
+
+	return dentry_open(path, flags, current_cred());
 }
 
 /* Caller should hold ovl_inode->lock */
-- 
cgit 


From 292f902a40c11f043a5ca1305a114da0e523eaa3 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 2 Jun 2020 22:20:26 +0200
Subject: ovl: call secutiry hook in ovl_real_ioctl()

Verify LSM permissions for underlying file, since vfs_ioctl() doesn't do
it.

[Stephen Rothwell] export security_file_ioctl

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/file.c | 5 ++++-
 security/security.c | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 87c362f65448..1860e220c82d 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -10,6 +10,7 @@
 #include <linux/uio.h>
 #include <linux/uaccess.h>
 #include <linux/splice.h>
+#include <linux/security.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include "overlayfs.h"
@@ -520,7 +521,9 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd,
 		return ret;
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	ret = vfs_ioctl(real.file, cmd, arg);
+	ret = security_file_ioctl(real.file, cmd, arg);
+	if (!ret)
+		ret = vfs_ioctl(real.file, cmd, arg);
 	revert_creds(old_cred);
 
 	fdput(real);
diff --git a/security/security.c b/security/security.c
index 7fed24b9d57e..a67414105130 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1459,6 +1459,7 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	return call_int_hook(file_ioctl, 0, file, cmd, arg);
 }
+EXPORT_SYMBOL_GPL(security_file_ioctl);
 
 static inline unsigned long mmap_prot(struct file *file, unsigned long prot)
 {
-- 
cgit 


From 05acefb4872dae89e772729efb194af754c877e8 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 2 Jun 2020 22:20:26 +0200
Subject: ovl: check permission to open real file

Call inode_permission() on real inode before opening regular file on one of
the underlying layers.

In some cases ovl_permission() already checks access to an underlying file,
but it misses the metacopy case, and possibly other ones as well.

Removing the redundant permission check from ovl_permission() should be
considered later.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/file.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 1860e220c82d..0f83c8dfec46 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -40,10 +40,22 @@ static struct file *ovl_open_realfile(const struct file *file,
 	struct file *realfile;
 	const struct cred *old_cred;
 	int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY;
+	int acc_mode = ACC_MODE(flags);
+	int err;
+
+	if (flags & O_APPEND)
+		acc_mode |= MAY_APPEND;
 
 	old_cred = ovl_override_creds(inode->i_sb);
-	realfile = open_with_fake_path(&file->f_path, flags, realinode,
-				       current_cred());
+	err = inode_permission(realinode, MAY_OPEN | acc_mode);
+	if (err) {
+		realfile = ERR_PTR(err);
+	} else if (!inode_owner_or_capable(realinode)) {
+		realfile = ERR_PTR(-EPERM);
+	} else {
+		realfile = open_with_fake_path(&file->f_path, flags, realinode,
+					       current_cred());
+	}
 	revert_creds(old_cred);
 
 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
-- 
cgit 


From 1434a65ea625c51317ccdf06dabf4bd27d20fa10 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@mykernel.net>
Date: Tue, 26 May 2020 09:35:57 +0800
Subject: ovl: drop negative dentry in upper layer

Negative dentries of upper layer are useless after construction of
overlayfs' own dentry and may keep in the memory long time even after
unmount of overlayfs instance. This patch tries to drop unnecessary
negative dentry of upper layer to effectively reclaim memory.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index f9640d27ddab..5dc19dcea5a4 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -191,16 +191,36 @@ static bool ovl_is_opaquedir(struct dentry *dentry)
 	return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
 }
 
+static struct dentry *ovl_lookup_positive_unlocked(const char *name,
+						   struct dentry *base, int len,
+						   bool drop_negative)
+{
+	struct dentry *ret = lookup_one_len_unlocked(name, base, len);
+
+	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
+		if (drop_negative && ret->d_lockref.count == 1) {
+			spin_lock(&ret->d_lock);
+			/* Recheck condition under lock */
+			if (d_is_negative(ret) && ret->d_lockref.count == 1)
+				__d_drop(ret);
+			spin_unlock(&ret->d_lock);
+		}
+		dput(ret);
+		ret = ERR_PTR(-ENOENT);
+	}
+	return ret;
+}
+
 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 			     const char *name, unsigned int namelen,
 			     size_t prelen, const char *post,
-			     struct dentry **ret)
+			     struct dentry **ret, bool drop_negative)
 {
 	struct dentry *this;
 	int err;
 	bool last_element = !post[0];
 
-	this = lookup_positive_unlocked(name, base, namelen);
+	this = ovl_lookup_positive_unlocked(name, base, namelen, drop_negative);
 	if (IS_ERR(this)) {
 		err = PTR_ERR(this);
 		this = NULL;
@@ -276,7 +296,7 @@ out_err:
 }
 
 static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
-			    struct dentry **ret)
+			    struct dentry **ret, bool drop_negative)
 {
 	/* Counting down from the end, since the prefix can change */
 	size_t rem = d->name.len - 1;
@@ -285,7 +305,7 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 
 	if (d->name.name[0] != '/')
 		return ovl_lookup_single(base, d, d->name.name, d->name.len,
-					 0, "", ret);
+					 0, "", ret, drop_negative);
 
 	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
 		const char *s = d->name.name + d->name.len - rem;
@@ -298,7 +318,8 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 			return -EIO;
 
 		err = ovl_lookup_single(base, d, s, thislen,
-					d->name.len - rem, next, &base);
+					d->name.len - rem, next, &base,
+					drop_negative);
 		dput(dentry);
 		if (err)
 			return err;
@@ -830,7 +851,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	old_cred = ovl_override_creds(dentry->d_sb);
 	upperdir = ovl_dentry_upper(dentry->d_parent);
 	if (upperdir) {
-		err = ovl_lookup_layer(upperdir, &d, &upperdentry);
+		err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
 		if (err)
 			goto out;
 
@@ -888,7 +909,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		else
 			d.last = lower.layer->idx == roe->numlower;
 
-		err = ovl_lookup_layer(lower.dentry, &d, &this);
+		err = ovl_lookup_layer(lower.dentry, &d, &this, false);
 		if (err)
 			goto out_put;
 
-- 
cgit 


From 520da69d265a91c6536c63851cbb8a53946974f0 Mon Sep 17 00:00:00 2001
From: Yuxuan Shui <yshuiv7@gmail.com>
Date: Wed, 27 May 2020 04:08:02 +0100
Subject: ovl: initialize error in ovl_copy_xattr

In ovl_copy_xattr, if all the xattrs to be copied are overlayfs private
xattrs, the copy loop will terminate without assigning anything to the
error variable, thus returning an uninitialized value.

If ovl_copy_xattr is called from ovl_clear_empty, this uninitialized error
value is put into a pointer by ERR_PTR(), causing potential invalid memory
accesses down the line.

This commit initialize error with 0. This is the correct value because when
there's no xattr to copy, because all xattrs are private, ovl_copy_xattr
should succeed.

This bug is discovered with the help of INIT_STACK_ALL and clang.

Signed-off-by: Yuxuan Shui <yshuiv7@gmail.com>
Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1050405
Fixes: 0956254a2d5b ("ovl: don't copy up opaqueness")
Cc: stable@vger.kernel.org # v4.8
Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 66004534bd40..79dd052c7dbf 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -47,7 +47,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
 {
 	ssize_t list_size, size, value_size = 0;
 	char *buf, *name, *value = NULL;
-	int uninitialized_var(error);
+	int error = 0;
 	size_t slen;
 
 	if (!(old->d_inode->i_opflags & IOP_XATTR) ||
-- 
cgit 


From 08f4c7c86d4cf125389dce9d94423024549f9b02 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 4 Jun 2020 10:48:19 +0200
Subject: ovl: add accessor for ofs->upper_mnt

Next patch will remove ofs->upper_mnt, so add an accessor function for this
field.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c    |  6 +++---
 fs/overlayfs/inode.c     |  4 ++--
 fs/overlayfs/namei.c     |  2 +-
 fs/overlayfs/ovl_entry.h |  5 +++++
 fs/overlayfs/readdir.c   |  2 +-
 fs/overlayfs/super.c     | 40 ++++++++++++++++++++--------------------
 fs/overlayfs/util.c      |  6 +++---
 7 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 321e24bf5c60..8f4286450f92 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -204,7 +204,7 @@ static int ovl_check_encode_origin(struct dentry *dentry)
 	 * ovl_connect_layer() will try to make origin's layer "connected" by
 	 * copying up a "connectable" ancestor.
 	 */
-	if (d_is_dir(dentry) && ofs->upper_mnt)
+	if (d_is_dir(dentry) && ovl_upper_mnt(ofs))
 		return ovl_connect_layer(dentry);
 
 	/* Lower file handle for indexed and non-upper dir/non-dir */
@@ -677,10 +677,10 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
 	struct dentry *dentry;
 	struct dentry *upper;
 
-	if (!ofs->upper_mnt)
+	if (!ovl_upper_mnt(ofs))
 		return ERR_PTR(-EACCES);
 
-	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
+	upper = ovl_decode_real_fh(fh, ovl_upper_mnt(ofs), true);
 	if (IS_ERR_OR_NULL(upper))
 		return upper;
 
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index f2aaf00821c0..5148c63bbf4e 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -456,7 +456,7 @@ int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
 	if (flags & S_ATIME) {
 		struct ovl_fs *ofs = inode->i_sb->s_fs_info;
 		struct path upperpath = {
-			.mnt = ofs->upper_mnt,
+			.mnt = ovl_upper_mnt(ofs),
 			.dentry = ovl_upperdentry_dereference(OVL_I(inode)),
 		};
 
@@ -921,7 +921,7 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
 		return true;
 
 	/* Yes, if won't be copied up */
-	if (!ofs->upper_mnt)
+	if (!ovl_upper_mnt(ofs))
 		return true;
 
 	/* No, if lower hardlink is or will be broken on copy up */
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 5dc19dcea5a4..3566282a9199 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -489,7 +489,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
 	if (IS_ERR_OR_NULL(fh))
 		return ERR_CAST(fh);
 
-	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
+	upper = ovl_decode_real_fh(fh, ovl_upper_mnt(ofs), true);
 	kfree(fh);
 
 	if (IS_ERR_OR_NULL(upper))
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index a8f82fb7ffb4..2da0ff355824 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -82,6 +82,11 @@ struct ovl_fs {
 	struct dentry *whiteout;
 };
 
+static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
+{
+	return ofs->upper_mnt;
+}
+
 static inline struct ovl_fs *OVL_FS(struct super_block *sb)
 {
 	return (struct ovl_fs *)sb->s_fs_info;
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 1621460417be..6918b98faeb6 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1120,7 +1120,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
 	struct dentry *indexdir = ofs->indexdir;
 	struct dentry *index = NULL;
 	struct inode *dir = indexdir->d_inode;
-	struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir };
+	struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
 	LIST_HEAD(list);
 	struct rb_root root = RB_ROOT;
 	struct ovl_cache_entry *p;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 60dfb27bc12b..201be5024494 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -224,7 +224,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 		ovl_inuse_unlock(ofs->workbasedir);
 	dput(ofs->workbasedir);
 	if (ofs->upperdir_locked)
-		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
+		ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
 	mntput(ofs->upper_mnt);
 	for (i = 1; i < ofs->numlayer; i++) {
 		iput(ofs->layers[i].trap);
@@ -258,7 +258,7 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
 	struct super_block *upper_sb;
 	int ret;
 
-	if (!ofs->upper_mnt)
+	if (!ovl_upper_mnt(ofs))
 		return 0;
 
 	/*
@@ -272,7 +272,7 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
 	if (!wait)
 		return 0;
 
-	upper_sb = ofs->upper_mnt->mnt_sb;
+	upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
 
 	down_read(&upper_sb->s_umount);
 	ret = sync_filesystem(upper_sb);
@@ -310,7 +310,7 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 /* Will this overlay be forced to mount/remount ro? */
 static bool ovl_force_readonly(struct ovl_fs *ofs)
 {
-	return (!ofs->upper_mnt || !ofs->workdir);
+	return (!ovl_upper_mnt(ofs) || !ofs->workdir);
 }
 
 static const char *ovl_redirect_mode_def(void)
@@ -372,7 +372,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 		return -EROFS;
 
 	if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
-		upper_sb = ofs->upper_mnt->mnt_sb;
+		upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
 		down_read(&upper_sb->s_umount);
 		ret = sync_filesystem(upper_sb);
 		up_read(&upper_sb->s_umount);
@@ -669,7 +669,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
 					 const char *name, bool persist)
 {
 	struct inode *dir =  ofs->workbasedir->d_inode;
-	struct vfsmount *mnt = ofs->upper_mnt;
+	struct vfsmount *mnt = ovl_upper_mnt(ofs);
 	struct dentry *work;
 	int err;
 	bool retried = false;
@@ -1122,7 +1122,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
 	if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
 		sb->s_flags |= SB_NOSEC;
 
-	if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
+	if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
 		ofs->upperdir_locked = true;
 	} else {
 		err = ovl_report_in_use(ofs, "upperdir");
@@ -1198,7 +1198,7 @@ out_unlock:
 static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
 			    struct path *workpath)
 {
-	struct vfsmount *mnt = ofs->upper_mnt;
+	struct vfsmount *mnt = ovl_upper_mnt(ofs);
 	struct dentry *temp;
 	bool rename_whiteout;
 	bool d_type;
@@ -1342,7 +1342,7 @@ out:
 static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
 			    struct ovl_entry *oe, struct path *upperpath)
 {
-	struct vfsmount *mnt = ofs->upper_mnt;
+	struct vfsmount *mnt = ovl_upper_mnt(ofs);
 	int err;
 
 	err = mnt_want_write(mnt);
@@ -1398,7 +1398,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
 {
 	unsigned int i;
 
-	if (!ofs->config.nfs_export && !ofs->upper_mnt)
+	if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
 		return true;
 
 	for (i = 0; i < ofs->numfs; i++) {
@@ -1477,7 +1477,7 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
 	/* idx/fsid 0 are reserved for upper fs even with lower only overlay */
 	ofs->numfs++;
 
-	layers[0].mnt = ofs->upper_mnt;
+	layers[0].mnt = ovl_upper_mnt(ofs);
 	layers[0].idx = 0;
 	layers[0].fsid = 0;
 	ofs->numlayer = 1;
@@ -1494,8 +1494,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
 		goto out;
 	}
 
-	if (ofs->upper_mnt) {
-		ofs->fs[0].sb = ofs->upper_mnt->mnt_sb;
+	if (ovl_upper_mnt(ofs)) {
+		ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
 		ofs->fs[0].is_lower = false;
 	}
 
@@ -1550,7 +1550,7 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
 	 * inode number or a non persistent inode number allocated from a
 	 * dedicated range.
 	 */
-	if (ofs->numfs - !ofs->upper_mnt == 1) {
+	if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
 		if (ofs->config.xino == OVL_XINO_ON)
 			pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
 		ofs->xino_mode = 0;
@@ -1699,8 +1699,8 @@ static int ovl_check_overlapping_layers(struct super_block *sb,
 {
 	int i, err;
 
-	if (ofs->upper_mnt) {
-		err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root,
+	if (ovl_upper_mnt(ofs)) {
+		err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
 				      "upperdir");
 		if (err)
 			return err;
@@ -1836,8 +1836,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		if (!ofs->workdir)
 			sb->s_flags |= SB_RDONLY;
 
-		sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
-		sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
+		sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
+		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
 
 	}
 	oe = ovl_get_lowerstack(sb, ofs);
@@ -1846,7 +1846,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_err;
 
 	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
-	if (!ofs->upper_mnt)
+	if (!ovl_upper_mnt(ofs))
 		sb->s_flags |= SB_RDONLY;
 
 	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
@@ -1874,7 +1874,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	/* Show index=off in /proc/mounts for forced r/o mount */
 	if (!ofs->indexdir) {
 		ofs->config.index = false;
-		if (ofs->upper_mnt && ofs->config.nfs_export) {
+		if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
 			pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
 			ofs->config.nfs_export = false;
 		}
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 33a7c60be3ed..56c1f89f20c9 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -18,13 +18,13 @@
 int ovl_want_write(struct dentry *dentry)
 {
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	return mnt_want_write(ofs->upper_mnt);
+	return mnt_want_write(ovl_upper_mnt(ofs));
 }
 
 void ovl_drop_write(struct dentry *dentry)
 {
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	mnt_drop_write(ofs->upper_mnt);
+	mnt_drop_write(ovl_upper_mnt(ofs));
 }
 
 struct dentry *ovl_workdir(struct dentry *dentry)
@@ -150,7 +150,7 @@ void ovl_path_upper(struct dentry *dentry, struct path *path)
 {
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 
-	path->mnt = ofs->upper_mnt;
+	path->mnt = ovl_upper_mnt(ofs);
 	path->dentry = ovl_dentry_upper(dentry);
 }
 
-- 
cgit 


From b8e42a651bdee06202ebdd96cff64fdeabd5b1d6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 4 Jun 2020 10:48:19 +0200
Subject: ovl: get rid of redundant members in struct ovl_fs

ofs->upper_mnt is copied to ->layers[0].mnt and ->layers[0].trap could be
used instead of a separate ->upperdir_trap.

Split the lowerdir option early to get the number of layers, then allocate
the ->layers array, and finally fill the upper and lower layers, as before.

Get rid of path_put_init() in ovl_lower_dir(), since the only caller will
take care of that.

[Colin Ian King] Fix null pointer dereference on null stack pointer on
error return found by Coverity.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/ovl_entry.h |  4 +-
 fs/overlayfs/super.c     | 99 +++++++++++++++++++++++-------------------------
 2 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 2da0ff355824..b429c80879ee 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -46,7 +46,6 @@ struct ovl_path {
 
 /* private information held for overlayfs's superblock */
 struct ovl_fs {
-	struct vfsmount *upper_mnt;
 	unsigned int numlayer;
 	/* Number of unique fs among layers including upper fs */
 	unsigned int numfs;
@@ -70,7 +69,6 @@ struct ovl_fs {
 	bool workdir_locked;
 	bool share_whiteout;
 	/* Traps in ovl inode cache */
-	struct inode *upperdir_trap;
 	struct inode *workbasedir_trap;
 	struct inode *workdir_trap;
 	struct inode *indexdir_trap;
@@ -84,7 +82,7 @@ struct ovl_fs {
 
 static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
 {
-	return ofs->upper_mnt;
+	return ofs->layers[0].mnt;
 }
 
 static inline struct ovl_fs *OVL_FS(struct super_block *sb)
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 201be5024494..eb81d8760a6a 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -216,7 +216,6 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	iput(ofs->workbasedir_trap);
 	iput(ofs->indexdir_trap);
 	iput(ofs->workdir_trap);
-	iput(ofs->upperdir_trap);
 	dput(ofs->whiteout);
 	dput(ofs->indexdir);
 	dput(ofs->workdir);
@@ -225,8 +224,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	dput(ofs->workbasedir);
 	if (ofs->upperdir_locked)
 		ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
-	mntput(ofs->upper_mnt);
-	for (i = 1; i < ofs->numlayer; i++) {
+	for (i = 0; i < ofs->numlayer; i++) {
 		iput(ofs->layers[i].trap);
 		mntput(ofs->layers[i].mnt);
 	}
@@ -837,11 +835,11 @@ static int ovl_lower_dir(const char *name, struct path *path,
 
 	err = ovl_mount_dir_noesc(name, path);
 	if (err)
-		goto out;
+		return err;
 
 	err = ovl_check_namelen(path, ofs, name);
 	if (err)
-		goto out_put;
+		return err;
 
 	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
 
@@ -863,11 +861,6 @@ static int ovl_lower_dir(const char *name, struct path *path,
 		ofs->xino_mode = -1;
 
 	return 0;
-
-out_put:
-	path_put_init(path);
-out:
-	return err;
 }
 
 /* Workdir should not be subdir of upperdir and vice versa */
@@ -1074,7 +1067,7 @@ static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
 }
 
 static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
-			 struct path *upperpath)
+			 struct ovl_layer *upper_layer, struct path *upperpath)
 {
 	struct vfsmount *upper_mnt;
 	int err;
@@ -1094,7 +1087,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
 	if (err)
 		goto out;
 
-	err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
+	err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
 			     "upperdir");
 	if (err)
 		goto out;
@@ -1108,7 +1101,9 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
 
 	/* Don't inherit atime flags */
 	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
-	ofs->upper_mnt = upper_mnt;
+	upper_layer->mnt = upper_mnt;
+	upper_layer->idx = 0;
+	upper_layer->fsid = 0;
 
 	/*
 	 * Inherit SB_NOSEC flag from upperdir.
@@ -1458,18 +1453,13 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
 }
 
 static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
-			  struct path *stack, unsigned int numlower)
+			  struct path *stack, unsigned int numlower,
+			  struct ovl_layer *layers)
 {
 	int err;
 	unsigned int i;
-	struct ovl_layer *layers;
 
 	err = -ENOMEM;
-	layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
-	if (!layers)
-		goto out;
-	ofs->layers = layers;
-
 	ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
 	if (ofs->fs == NULL)
 		goto out;
@@ -1477,11 +1467,6 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
 	/* idx/fsid 0 are reserved for upper fs even with lower only overlay */
 	ofs->numfs++;
 
-	layers[0].mnt = ovl_upper_mnt(ofs);
-	layers[0].idx = 0;
-	layers[0].fsid = 0;
-	ofs->numlayer = 1;
-
 	/*
 	 * All lower layers that share the same fs as upper layer, use the same
 	 * pseudo_dev as upper layer.  Allocate fs[0].pseudo_dev even for lower
@@ -1579,44 +1564,30 @@ out:
 }
 
 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
-					    struct ovl_fs *ofs)
+				const char *lower, unsigned int numlower,
+				struct ovl_fs *ofs, struct ovl_layer *layers)
 {
 	int err;
-	char *lowertmp, *lower;
 	struct path *stack = NULL;
-	unsigned int stacklen, numlower = 0, i;
+	unsigned int i;
 	struct ovl_entry *oe;
 
-	err = -ENOMEM;
-	lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
-	if (!lowertmp)
-		goto out_err;
-
-	err = -EINVAL;
-	stacklen = ovl_split_lowerdirs(lowertmp);
-	if (stacklen > OVL_MAX_STACK) {
-		pr_err("too many lower directories, limit is %d\n",
-		       OVL_MAX_STACK);
-		goto out_err;
-	} else if (!ofs->config.upperdir && stacklen == 1) {
+	if (!ofs->config.upperdir && numlower == 1) {
 		pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
-		goto out_err;
+		return ERR_PTR(-EINVAL);
 	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
 		   ofs->config.redirect_follow) {
 		pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
 		ofs->config.nfs_export = false;
 	}
 
-	err = -ENOMEM;
-	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
+	stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL);
 	if (!stack)
-		goto out_err;
+		return ERR_PTR(-ENOMEM);
 
 	err = -EINVAL;
-	lower = lowertmp;
-	for (numlower = 0; numlower < stacklen; numlower++) {
-		err = ovl_lower_dir(lower, &stack[numlower], ofs,
-				    &sb->s_stack_depth);
+	for (i = 0; i < numlower; i++) {
+		err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth);
 		if (err)
 			goto out_err;
 
@@ -1630,7 +1601,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
 		goto out_err;
 	}
 
-	err = ovl_get_layers(sb, ofs, stack, numlower);
+	err = ovl_get_layers(sb, ofs, stack, numlower, layers);
 	if (err)
 		goto out_err;
 
@@ -1648,7 +1619,6 @@ out:
 	for (i = 0; i < numlower; i++)
 		path_put(&stack[i]);
 	kfree(stack);
-	kfree(lowertmp);
 
 	return oe;
 
@@ -1772,7 +1742,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	struct dentry *root_dentry;
 	struct ovl_entry *oe;
 	struct ovl_fs *ofs;
+	struct ovl_layer *layers;
 	struct cred *cred;
+	char *splitlower = NULL;
+	unsigned int numlower;
 	int err;
 
 	sb->s_d_op = &ovl_dentry_operations;
@@ -1804,6 +1777,26 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_err;
 	}
 
+	err = -ENOMEM;
+	splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
+	if (!splitlower)
+		goto out_err;
+
+	numlower = ovl_split_lowerdirs(splitlower);
+	if (numlower > OVL_MAX_STACK) {
+		pr_err("too many lower directories, limit is %d\n",
+		       OVL_MAX_STACK);
+		goto out_err;
+	}
+
+	layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
+	if (!layers)
+		goto out_err;
+
+	ofs->layers = layers;
+	/* Layer 0 is reserved for upper even if there's no upper */
+	ofs->numlayer = 1;
+
 	sb->s_stack_depth = 0;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	atomic_long_set(&ofs->last_ino, 1);
@@ -1825,7 +1818,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			goto out_err;
 		}
 
-		err = ovl_get_upper(sb, ofs, &upperpath);
+		err = ovl_get_upper(sb, ofs, &layers[0], &upperpath);
 		if (err)
 			goto out_err;
 
@@ -1840,7 +1833,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
 
 	}
-	oe = ovl_get_lowerstack(sb, ofs);
+	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
 	err = PTR_ERR(oe);
 	if (IS_ERR(oe))
 		goto out_err;
@@ -1903,6 +1896,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_free_oe;
 
 	mntput(upperpath.mnt);
+	kfree(splitlower);
 
 	sb->s_root = root_dentry;
 
@@ -1912,6 +1906,7 @@ out_free_oe:
 	ovl_entry_stack_free(oe);
 	kfree(oe);
 out_err:
+	kfree(splitlower);
 	path_put(&upperpath);
 	ovl_free_fs(ofs);
 out:
-- 
cgit 


From df820f8de4e481222b17f9bcee7b909ae8167529 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 4 Jun 2020 10:48:19 +0200
Subject: ovl: make private mounts longterm

Overlayfs is using clone_private_mount() to create internal mounts for
underlying layers.  These are used for operations requiring a path, such as
dentry_open().

Since these private mounts are not in any namespace they are treated as
short term, "detached" mounts and mntput() involves taking the global
mount_lock, which can result in serious cacheline pingpong.

Make these private mounts longterm instead, which trade the penalty on
mntput() for a slightly longer shutdown time due to an added RCU grace
period when putting these mounts.

Introduce a new helper kern_unmount_many() that can take care of multiple
longterm mounts with a single RCU grace period.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/porting.rst |  7 +++++++
 fs/namespace.c                        | 16 ++++++++++++++++
 fs/overlayfs/super.c                  |  7 ++++++-
 include/linux/mount.h                 |  2 ++
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 26c093969573..867036aa90b8 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -858,3 +858,10 @@ be misspelled d_alloc_anon().
 [should've been added in 2016] stale comment in finish_open() nonwithstanding,
 failure exits in ->atomic_open() instances should *NOT* fput() the file,
 no matter what.  Everything is handled by the caller.
+
+---
+
+**mandatory**
+
+clone_private_mount() returns a longterm mount now, so the proper destructor of
+its result is kern_unmount() or kern_unmount_array().
diff --git a/fs/namespace.c b/fs/namespace.c
index a28e4db075ed..d53517f1d741 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1879,6 +1879,9 @@ struct vfsmount *clone_private_mount(const struct path *path)
 	if (IS_ERR(new_mnt))
 		return ERR_CAST(new_mnt);
 
+	/* Longterm mount to be removed by kern_unmount*() */
+	new_mnt->mnt_ns = MNT_NS_INTERNAL;
+
 	return &new_mnt->mnt;
 }
 EXPORT_SYMBOL_GPL(clone_private_mount);
@@ -3804,6 +3807,19 @@ void kern_unmount(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL(kern_unmount);
 
+void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++)
+		if (mnt[i])
+			real_mount(mnt[i])->mnt_ns = NULL;
+	synchronize_rcu_expedited();
+	for (i = 0; i < num; i++)
+		mntput(mnt[i]);
+}
+EXPORT_SYMBOL(kern_unmount_array);
+
 bool our_mnt(struct vfsmount *mnt)
 {
 	return check_mnt(real_mount(mnt));
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index eb81d8760a6a..8d8cd46e1482 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -211,6 +211,7 @@ static void ovl_destroy_inode(struct inode *inode)
 
 static void ovl_free_fs(struct ovl_fs *ofs)
 {
+	struct vfsmount **mounts;
 	unsigned i;
 
 	iput(ofs->workbasedir_trap);
@@ -224,10 +225,14 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	dput(ofs->workbasedir);
 	if (ofs->upperdir_locked)
 		ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
+
+	/* Hack!  Reuse ofs->layers as a vfsmount array before freeing it */
+	mounts = (struct vfsmount **) ofs->layers;
 	for (i = 0; i < ofs->numlayer; i++) {
 		iput(ofs->layers[i].trap);
-		mntput(ofs->layers[i].mnt);
+		mounts[i] = ofs->layers[i].mnt;
 	}
+	kern_unmount_array(mounts, ofs->numlayer);
 	kfree(ofs->layers);
 	for (i = 0; i < ofs->numfs; i++)
 		free_anon_bdev(ofs->fs[i].pseudo_dev);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index bf8cc4108b8f..8de95a0bec8d 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -109,4 +109,6 @@ extern unsigned int sysctl_mount_max;
 
 extern bool path_is_mountpoint(const struct path *path);
 
+extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
+
 #endif /* _LINUX_MOUNT_H */
-- 
cgit 


From b778e1ee1afe2784c2a034bb1e0192423097cb36 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 4 Jun 2020 10:48:19 +0200
Subject: ovl: only pass ->ki_flags to ovl_iocb_to_rwf()

Next patch will want to pass a modified set of flags, so...

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/file.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0f83c8dfec46..01820e654a21 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -232,9 +232,8 @@ static void ovl_file_accessed(struct file *file)
 	touch_atime(&file->f_path);
 }
 
-static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
+static rwf_t ovl_iocb_to_rwf(int ifl)
 {
-	int ifl = iocb->ki_flags;
 	rwf_t flags = 0;
 
 	if (ifl & IOCB_NOWAIT)
@@ -296,7 +295,7 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	if (is_sync_kiocb(iocb)) {
 		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
-				    ovl_iocb_to_rwf(iocb));
+				    ovl_iocb_to_rwf(iocb->ki_flags));
 	} else {
 		struct ovl_aio_req *aio_req;
 
@@ -349,7 +348,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	if (is_sync_kiocb(iocb)) {
 		file_start_write(real.file);
 		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
-				     ovl_iocb_to_rwf(iocb));
+				     ovl_iocb_to_rwf(iocb->ki_flags));
 		file_end_write(real.file);
 		/* Update size */
 		ovl_copyattr(ovl_inode_real(inode), inode);
-- 
cgit 


From 74c6e384e991c5305754e3c79edf768a62b00563 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 4 Jun 2020 10:48:19 +0200
Subject: ovl: make oip->index bool

ovl_get_inode() uses oip->index as a bool value, not as a pointer.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c     | 2 +-
 fs/overlayfs/overlayfs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 5148c63bbf4e..0424ee11210f 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -908,7 +908,7 @@ struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir)
  * Does overlay inode need to be hashed by lower inode?
  */
 static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
-			     struct dentry *lower, struct dentry *index)
+			     struct dentry *lower, bool index)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index ffbb57b2d7f6..b725c7f15ff4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -424,7 +424,7 @@ struct ovl_inode_params {
 	struct inode *newinode;
 	struct dentry *upperdentry;
 	struct ovl_path *lowerpath;
-	struct dentry *index;
+	bool index;
 	unsigned int numlower;
 	char *redirect;
 	struct dentry *lowerdata;
-- 
cgit 


From 2068cf7dfbc69c4097c95af3a0bd943ced155a76 Mon Sep 17 00:00:00 2001
From: youngjun <her0gyugyu@gmail.com>
Date: Sun, 7 Jun 2020 02:04:06 -0700
Subject: ovl: remove unnecessary lock check

Directory is always locked until "out_unlock" label.  So lock check is not
needed.

Signed-off-by: youngjun <her0gyugyu@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 8d8cd46e1482..91476bc422f9 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -676,11 +676,8 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
 	struct dentry *work;
 	int err;
 	bool retried = false;
-	bool locked = false;
 
 	inode_lock_nested(dir, I_MUTEX_PARENT);
-	locked = true;
-
 retry:
 	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
 
@@ -741,9 +738,7 @@ retry:
 		goto out_err;
 	}
 out_unlock:
-	if (locked)
-		inode_unlock(dir);
-
+	inode_unlock(dir);
 	return work;
 
 out_dput:
-- 
cgit