From c2dfd2276cec63a0c6f6ce18ed83800d96fde542 Mon Sep 17 00:00:00 2001
From: Jaedon Shin <jaedon.shin@gmail.com>
Date: Fri, 1 Dec 2017 07:31:29 -0500
Subject: media: dvb_frontend: Add compat_ioctl callback

Adds compat_ioctl for 32-bit user space applications on a 64-bit system.

[m.chehab@osg.samsung.com: add missing include compat.h]
Signed-off-by: Jaedon Shin <jaedon.shin@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 fs/compat_ioctl.c | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 5fc5dc660600..9a1fe60cce9a 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1218,23 +1218,6 @@ COMPATIBLE_IOCTL(DMX_SET_PES_FILTER)
 COMPATIBLE_IOCTL(DMX_SET_BUFFER_SIZE)
 COMPATIBLE_IOCTL(DMX_GET_PES_PIDS)
 COMPATIBLE_IOCTL(DMX_GET_STC)
-COMPATIBLE_IOCTL(FE_GET_INFO)
-COMPATIBLE_IOCTL(FE_DISEQC_RESET_OVERLOAD)
-COMPATIBLE_IOCTL(FE_DISEQC_SEND_MASTER_CMD)
-COMPATIBLE_IOCTL(FE_DISEQC_RECV_SLAVE_REPLY)
-COMPATIBLE_IOCTL(FE_DISEQC_SEND_BURST)
-COMPATIBLE_IOCTL(FE_SET_TONE)
-COMPATIBLE_IOCTL(FE_SET_VOLTAGE)
-COMPATIBLE_IOCTL(FE_ENABLE_HIGH_LNB_VOLTAGE)
-COMPATIBLE_IOCTL(FE_READ_STATUS)
-COMPATIBLE_IOCTL(FE_READ_BER)
-COMPATIBLE_IOCTL(FE_READ_SIGNAL_STRENGTH)
-COMPATIBLE_IOCTL(FE_READ_SNR)
-COMPATIBLE_IOCTL(FE_READ_UNCORRECTED_BLOCKS)
-COMPATIBLE_IOCTL(FE_SET_FRONTEND)
-COMPATIBLE_IOCTL(FE_GET_FRONTEND)
-COMPATIBLE_IOCTL(FE_GET_EVENT)
-COMPATIBLE_IOCTL(FE_DISHNETWORK_SEND_LEGACY_CMD)
 COMPATIBLE_IOCTL(VIDEO_STOP)
 COMPATIBLE_IOCTL(VIDEO_PLAY)
 COMPATIBLE_IOCTL(VIDEO_FREEZE)
-- 
cgit 


From d5bd821350e69f5f464c175455135727f986f793 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 19 Dec 2017 08:11:01 +0100
Subject: udf: Sanitize nanoseconds for time stamps

Reportedly some UDF filesystems are recorded with bogus subsecond values
resulting in nanoseconds being over 10^9. Sanitize nanoseconds in time
stamps when loading them from disk.

Reported-by: Ian Turner <vectro@vectro.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/udftime.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index 14626b34d13e..0927a4b2ecaf 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -62,6 +62,11 @@ udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 	dest->tv_sec -= offset * 60;
 	dest->tv_nsec = 1000 * (src.centiseconds * 10000 +
 			src.hundredsOfMicroseconds * 100 + src.microseconds);
+	/*
+	 * Sanitize nanosecond field since reportedly some filesystems are
+	 * recorded with bogus sub-second values.
+	 */
+	dest->tv_nsec %= NSEC_PER_SEC;
 	return dest;
 }
 
-- 
cgit 


From 651d66660589b1a1773667f35ffb6a850cd2e834 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Thu, 21 Dec 2017 11:00:35 -0500
Subject: fs: compat_ioctl: add new DVB demux ioctls

Use trivial handling for the new DVB demux ioctls, as none
of them passes a pointer inside their structures.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 fs/compat_ioctl.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9a1fe60cce9a..ef80085ed564 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1218,6 +1218,11 @@ COMPATIBLE_IOCTL(DMX_SET_PES_FILTER)
 COMPATIBLE_IOCTL(DMX_SET_BUFFER_SIZE)
 COMPATIBLE_IOCTL(DMX_GET_PES_PIDS)
 COMPATIBLE_IOCTL(DMX_GET_STC)
+COMPATIBLE_IOCTL(DMX_REQBUFS)
+COMPATIBLE_IOCTL(DMX_QUERYBUF)
+COMPATIBLE_IOCTL(DMX_EXPBUF)
+COMPATIBLE_IOCTL(DMX_QBUF)
+COMPATIBLE_IOCTL(DMX_DQBUF)
 COMPATIBLE_IOCTL(VIDEO_STOP)
 COMPATIBLE_IOCTL(VIDEO_PLAY)
 COMPATIBLE_IOCTL(VIDEO_FREEZE)
-- 
cgit 


From f463589a7cd2d156e3b6b64ee26588c0cdcce08a Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 27 Dec 2017 15:51:37 +0100
Subject: ext2: drop unneeded newline

ext2_msg prints a newline at the end of the message string, so the message
string does not need to include a newline explicitly.  Done using
Coccinelle.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7646818ab266..15f90f5f3e13 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1225,7 +1225,7 @@ static void ext2_clear_super_error(struct super_block *sb)
 		 * write and hope for the best.
 		 */
 		ext2_msg(sb, KERN_ERR,
-		       "previous I/O error to superblock detected\n");
+		       "previous I/O error to superblock detected");
 		clear_buffer_write_io_error(sbh);
 		set_buffer_uptodate(sbh);
 	}
-- 
cgit 


From 31747eda41ef3c30c09c5c096b380bf54013746a Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 14 Jan 2018 18:35:40 +0200
Subject: ovl: hash directory inodes for fsnotify

fsnotify pins a watched directory inode in cache, but if directory dentry
is released, new lookup will allocate a new dentry and a new inode.
Directory events will be notified on the new inode, while fsnotify listener
is watching the old pinned inode.

Hash all directory inodes to reuse the pinned inode on lookup. Pure upper
dirs are hashes by real upper inode, merge and lower dirs are hashed by
real lower inode.

The reference to lower inode was being held by the lower dentry object
in the overlay dentry (oe->lowerstack[0]). Releasing the overlay dentry
may drop lower inode refcount to zero. Add a refcount on behalf of the
overlay inode to prevent that.

As a by-product, hashing directory inodes also detects multiple
redirected dirs to the same lower dir and uncovered redirected dir
target on and returns -ESTALE on lookup.

The reported issue dates back to initial version of overlayfs, but this
patch depends on ovl_inode code that was introduced in kernel v4.13.

Cc: <stable@vger.kernel.org> #v4.13
Reported-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Tested-by: Niklas Cassel <niklas.cassel@axis.com>
---
 fs/overlayfs/inode.c | 39 ++++++++++++++++++++++++++++-----------
 fs/overlayfs/super.c |  1 +
 fs/overlayfs/util.c  |  4 ++--
 3 files changed, 31 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 00b6b294272a..94d2f8a8b779 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -606,6 +606,16 @@ static int ovl_inode_set(struct inode *inode, void *data)
 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
 			     struct dentry *upperdentry)
 {
+	if (S_ISDIR(inode->i_mode)) {
+		/* Real lower dir moved to upper layer under us? */
+		if (!lowerdentry && ovl_inode_lower(inode))
+			return false;
+
+		/* Lookup of an uncovered redirect origin? */
+		if (!upperdentry && ovl_inode_upper(inode))
+			return false;
+	}
+
 	/*
 	 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL.
 	 * This happens when finding a copied up overlay inode for a renamed
@@ -633,6 +643,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
 	struct inode *inode;
 	/* Already indexed or could be indexed on copy up? */
 	bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry));
+	struct dentry *origin = indexed ? lowerdentry : NULL;
+	bool is_dir;
 
 	if (WARN_ON(upperdentry && indexed && !lowerdentry))
 		return ERR_PTR(-EIO);
@@ -641,15 +653,19 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
 		realinode = d_inode(lowerdentry);
 
 	/*
-	 * Copy up origin (lower) may exist for non-indexed upper, but we must
-	 * not use lower as hash key in that case.
-	 * Hash inodes that are or could be indexed by origin inode and
-	 * non-indexed upper inodes that could be hard linked by upper inode.
+	 * Copy up origin (lower) may exist for non-indexed non-dir upper, but
+	 * we must not use lower as hash key in that case.
+	 * Hash non-dir that is or could be indexed by origin inode.
+	 * Hash dir that is or could be merged by origin inode.
+	 * Hash pure upper and non-indexed non-dir by upper inode.
 	 */
-	if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) {
-		struct inode *key = d_inode(indexed ? lowerdentry :
-						      upperdentry);
-		unsigned int nlink;
+	is_dir = S_ISDIR(realinode->i_mode);
+	if (is_dir)
+		origin = lowerdentry;
+
+	if (upperdentry || origin) {
+		struct inode *key = d_inode(origin ?: upperdentry);
+		unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
 
 		inode = iget5_locked(dentry->d_sb, (unsigned long) key,
 				     ovl_inode_test, ovl_inode_set, key);
@@ -670,8 +686,9 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
 			goto out;
 		}
 
-		nlink = ovl_get_nlink(lowerdentry, upperdentry,
-				      realinode->i_nlink);
+		/* Recalculate nlink for non-dir due to indexing */
+		if (!is_dir)
+			nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
 		set_nlink(inode, nlink);
 	} else {
 		inode = new_inode(dentry->d_sb);
@@ -685,7 +702,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
 		ovl_set_flag(OVL_IMPURE, inode);
 
 	/* Check for non-merge dir that may have whiteouts */
-	if (S_ISDIR(realinode->i_mode)) {
+	if (is_dir) {
 		struct ovl_entry *oe = dentry->d_fsdata;
 
 		if (((upperdentry && lowerdentry) || oe->numlower > 1) ||
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 76440feb79f6..1a436fa92a04 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -211,6 +211,7 @@ static void ovl_destroy_inode(struct inode *inode)
 	struct ovl_inode *oi = OVL_I(inode);
 
 	dput(oi->__upperdentry);
+	iput(oi->lower);
 	kfree(oi->redirect);
 	ovl_dir_cache_free(inode);
 	mutex_destroy(&oi->lock);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index d6bb1c9f5e7a..06119f34a69d 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -257,7 +257,7 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
 	if (upperdentry)
 		OVL_I(inode)->__upperdentry = upperdentry;
 	if (lowerdentry)
-		OVL_I(inode)->lower = d_inode(lowerdentry);
+		OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
 
 	ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
 }
@@ -273,7 +273,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
 	 */
 	smp_wmb();
 	OVL_I(inode)->__upperdentry = upperdentry;
-	if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) {
+	if (inode_unhashed(inode)) {
 		inode->i_private = upperinode;
 		__insert_inode_hash(inode, (unsigned long) upperinode);
 	}
-- 
cgit 


From d796e77f1dd541fe34481af2eee6454688d13982 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 8 Nov 2017 09:39:46 +0200
Subject: ovl: fix failure to fsync lower dir
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a writable mount, it is not expected for overlayfs to return
EINVAL/EROFS for fsync, even if dir/file is not changed.

This commit fixes the case of fsync of directory, which is easier to
address, because overlayfs already implements fsync file operation for
directories.

The problem reported by Raphael is that new PostgreSQL 10.0 with a
database in overlayfs where lower layer in squashfs fails to start.
The failure is due to fsync error, when PostgreSQL does fsync on all
existing db directories on startup and a specific directory exists
lower layer with no changes.

Reported-by: Raphael Hertzog <raphael@ouaza.com>
Cc: <stable@vger.kernel.org> # v3.18
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Tested-by: Raphaël Hertzog <hertzog@debian.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/readdir.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 8c98578d27a1..a7e45e6cd732 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -769,10 +769,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
 	struct dentry *dentry = file->f_path.dentry;
 	struct file *realfile = od->realfile;
 
+	/* Nothing to sync for lower */
+	if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
+		return 0;
+
 	/*
 	 * Need to check if we started out being a lower dir, but got copied up
 	 */
-	if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
+	if (!od->is_upper) {
 		struct inode *inode = file_inode(file);
 
 		realfile = READ_ONCE(od->upperfile);
-- 
cgit 


From 6d0a8a90a5bbfd6befcb512fad6618608e8c0e86 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 10 Nov 2017 13:18:07 +0200
Subject: ovl: take lower dir inode mutex outside upper sb_writers lock

The functions ovl_lower_positive() and ovl_check_empty_dir() both take
inode mutex on the real lower dir under ovl_want_write() which takes
the upper_mnt sb_writers lock.

While this is not a clear locking order or layering violation, it creates
an undesired lock dependency between two unrelated layers for no good
reason.

This lock dependency materializes to a false(?) positive lockdep warning
when calling rmdir() on a nested overlayfs, where both nested and
underlying overlayfs both use the same fs type as upper layer.

rmdir() on the nested overlayfs creates the lock chain:
  sb_writers of upper_mnt (e.g. tmpfs) in ovl_do_remove()
  ovl_i_mutex_dir_key[] of lower overlay dir in ovl_lower_positive()

rmdir() on the underlying overlayfs creates the lock chain in
reverse order:
  ovl_i_mutex_dir_key[] of lower overlay dir in vfs_rmdir()
  sb_writers of nested upper_mnt (e.g. tmpfs) in ovl_do_remove()

To rid of the unneeded locking dependency, move both ovl_lower_positive()
and ovl_check_empty_dir() to before ovl_want_write() in rmdir() and
rename() implementation.

This change spreads the pieces of ovl_check_empty_and_clear() directly
inside the rmdir()/rename() implementations so the helper is no longer
needed and removed.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c     | 117 ++++++++++++++++++++++---------------------------
 fs/overlayfs/namei.c   |   3 ++
 fs/overlayfs/readdir.c |   3 ++
 3 files changed, 58 insertions(+), 65 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index f9788bc116a8..a1a7606d4891 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -181,11 +181,6 @@ static bool ovl_type_origin(struct dentry *dentry)
 	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
 }
 
-static bool ovl_may_have_whiteouts(struct dentry *dentry)
-{
-	return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
-}
-
 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 			    struct cattr *attr, struct dentry *hardlink)
 {
@@ -301,37 +296,6 @@ out:
 	return ERR_PTR(err);
 }
 
-static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
-{
-	int err;
-	struct dentry *ret = NULL;
-	LIST_HEAD(list);
-
-	err = ovl_check_empty_dir(dentry, &list);
-	if (err) {
-		ret = ERR_PTR(err);
-		goto out_free;
-	}
-
-	/*
-	 * When removing an empty opaque directory, then it makes no sense to
-	 * replace it with an exact replica of itself.
-	 *
-	 * If upperdentry has whiteouts, clear them.
-	 *
-	 * Can race with copy-up, since we don't hold the upperdir mutex.
-	 * Doesn't matter, since copy-up can't create a non-empty directory
-	 * from an empty one.
-	 */
-	if (!list_empty(&list))
-		ret = ovl_clear_empty(dentry, &list);
-
-out_free:
-	ovl_cache_free(&list);
-
-	return ret;
-}
-
 static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
 			     const struct posix_acl *acl)
 {
@@ -623,7 +587,8 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
 	return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
 }
 
-static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
+static int ovl_remove_and_whiteout(struct dentry *dentry,
+				   struct list_head *list)
 {
 	struct dentry *workdir = ovl_workdir(dentry);
 	struct inode *wdir = workdir->d_inode;
@@ -638,8 +603,8 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
 	if (WARN_ON(!workdir))
 		return -EROFS;
 
-	if (is_dir) {
-		opaquedir = ovl_check_empty_and_clear(dentry);
+	if (!list_empty(list)) {
+		opaquedir = ovl_clear_empty(dentry, list);
 		err = PTR_ERR(opaquedir);
 		if (IS_ERR(opaquedir))
 			goto out;
@@ -694,7 +659,8 @@ kill_whiteout:
 	goto out_d_drop;
 }
 
-static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
+static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
+			    struct list_head *list)
 {
 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 	struct inode *dir = upperdir->d_inode;
@@ -702,10 +668,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
 	struct dentry *opaquedir = NULL;
 	int err;
 
-	/* Redirect/origin dir can be !ovl_lower_positive && not clean */
-	if (is_dir && (ovl_dentry_get_redirect(dentry) ||
-		       ovl_may_have_whiteouts(dentry))) {
-		opaquedir = ovl_check_empty_and_clear(dentry);
+	if (!list_empty(list)) {
+		opaquedir = ovl_clear_empty(dentry, list);
 		err = PTR_ERR(opaquedir);
 		if (IS_ERR(opaquedir))
 			goto out;
@@ -746,11 +710,26 @@ out:
 	return err;
 }
 
+static bool ovl_pure_upper(struct dentry *dentry)
+{
+	return !ovl_dentry_lower(dentry) &&
+	       !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
+}
+
 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 {
 	int err;
 	bool locked = false;
 	const struct cred *old_cred;
+	bool lower_positive = ovl_lower_positive(dentry);
+	LIST_HEAD(list);
+
+	/* No need to clean pure upper removed by vfs_rmdir() */
+	if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) {
+		err = ovl_check_empty_dir(dentry, &list);
+		if (err)
+			goto out;
+	}
 
 	err = ovl_want_write(dentry);
 	if (err)
@@ -765,10 +744,10 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 		goto out_drop_write;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
-	if (!ovl_lower_positive(dentry))
-		err = ovl_remove_upper(dentry, is_dir);
+	if (!lower_positive)
+		err = ovl_remove_upper(dentry, is_dir, &list);
 	else
-		err = ovl_remove_and_whiteout(dentry, is_dir);
+		err = ovl_remove_and_whiteout(dentry, &list);
 	revert_creds(old_cred);
 	if (!err) {
 		if (is_dir)
@@ -780,6 +759,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 out_drop_write:
 	ovl_drop_write(dentry);
 out:
+	ovl_cache_free(&list);
 	return err;
 }
 
@@ -915,6 +895,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	bool samedir = olddir == newdir;
 	struct dentry *opaquedir = NULL;
 	const struct cred *old_cred = NULL;
+	LIST_HEAD(list);
 
 	err = -EINVAL;
 	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
@@ -929,6 +910,27 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	if (!overwrite && !ovl_can_move(new))
 		goto out;
 
+	if (overwrite && new_is_dir && !ovl_pure_upper(new)) {
+		err = ovl_check_empty_dir(new, &list);
+		if (err)
+			goto out;
+	}
+
+	if (overwrite) {
+		if (ovl_lower_positive(old)) {
+			if (!ovl_dentry_is_whiteout(new)) {
+				/* Whiteout source */
+				flags |= RENAME_WHITEOUT;
+			} else {
+				/* Switch whiteouts */
+				flags |= RENAME_EXCHANGE;
+			}
+		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
+			flags |= RENAME_EXCHANGE;
+			cleanup_whiteout = true;
+		}
+	}
+
 	err = ovl_want_write(old);
 	if (err)
 		goto out;
@@ -952,9 +954,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 
 	old_cred = ovl_override_creds(old->d_sb);
 
-	if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) ||
-					ovl_may_have_whiteouts(new))) {
-		opaquedir = ovl_check_empty_and_clear(new);
+	if (!list_empty(&list)) {
+		opaquedir = ovl_clear_empty(new, &list);
 		err = PTR_ERR(opaquedir);
 		if (IS_ERR(opaquedir)) {
 			opaquedir = NULL;
@@ -962,21 +963,6 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		}
 	}
 
-	if (overwrite) {
-		if (ovl_lower_positive(old)) {
-			if (!ovl_dentry_is_whiteout(new)) {
-				/* Whiteout source */
-				flags |= RENAME_WHITEOUT;
-			} else {
-				/* Switch whiteouts */
-				flags |= RENAME_EXCHANGE;
-			}
-		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
-			flags |= RENAME_EXCHANGE;
-			cleanup_whiteout = true;
-		}
-	}
-
 	old_upperdir = ovl_dentry_upper(old->d_parent);
 	new_upperdir = ovl_dentry_upper(new->d_parent);
 
@@ -1094,6 +1080,7 @@ out_drop_write:
 	ovl_drop_write(old);
 out:
 	dput(opaquedir);
+	ovl_cache_free(&list);
 	return err;
 }
 
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index beb945e1963c..926248e1de04 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -774,6 +774,7 @@ bool ovl_lower_positive(struct dentry *dentry)
 	struct ovl_entry *oe = dentry->d_fsdata;
 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
 	const struct qstr *name = &dentry->d_name;
+	const struct cred *old_cred;
 	unsigned int i;
 	bool positive = false;
 	bool done = false;
@@ -789,6 +790,7 @@ bool ovl_lower_positive(struct dentry *dentry)
 	if (!ovl_dentry_upper(dentry))
 		return true;
 
+	old_cred = ovl_override_creds(dentry->d_sb);
 	/* Positive upper -> have to look up lower to see whether it exists */
 	for (i = 0; !done && !positive && i < poe->numlower; i++) {
 		struct dentry *this;
@@ -818,6 +820,7 @@ bool ovl_lower_positive(struct dentry *dentry)
 			dput(this);
 		}
 	}
+	revert_creds(old_cred);
 
 	return positive;
 }
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index a7e45e6cd732..7dfe381c2cd8 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -862,8 +862,11 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
 	int err;
 	struct ovl_cache_entry *p, *n;
 	struct rb_root root = RB_ROOT;
+	const struct cred *old_cred;
 
+	old_cred = ovl_override_creds(dentry->d_sb);
 	err = ovl_dir_read_merged(dentry, list, &root);
+	revert_creds(old_cred);
 	if (err)
 		return err;
 
-- 
cgit 


From f81678173ce25a1c7e1570a328dfba50b5d872eb Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 18 Dec 2017 14:25:56 +0200
Subject: ovl: fix another overlay: warning prefix

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 926248e1de04..69a43ede0a2a 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -693,7 +693,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		 */
 		err = -EPERM;
 		if (d.redirect && !ofs->config.redirect_follow) {
-			pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry);
+			pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
+					    dentry);
 			goto out_put;
 		}
 
-- 
cgit 


From 2ba9d57e65044859f7ff133bcb0a902769bf3bc6 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 3 Jan 2018 18:54:41 +0200
Subject: ovl: take mnt_want_write() for work/index dir setup

There are several write operations on upper fs not covered by
mnt_want_write():

- test set/remove OPAQUE xattr
- test create O_TMPFILE
- set ORIGIN xattr in ovl_verify_origin()
- cleanup of index entries in ovl_indexdir_cleanup()

Some of these go way back, but this patch only applies over the
v4.14 re-factoring of ovl_fill_super().

Cc: <stable@vger.kernel.org> #v4.14
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 1a436fa92a04..3387e6d639a5 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -521,10 +521,6 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
 	bool retried = false;
 	bool locked = false;
 
-	err = mnt_want_write(mnt);
-	if (err)
-		goto out_err;
-
 	inode_lock_nested(dir, I_MUTEX_PARENT);
 	locked = true;
 
@@ -589,7 +585,6 @@ retry:
 		goto out_err;
 	}
 out_unlock:
-	mnt_drop_write(mnt);
 	if (locked)
 		inode_unlock(dir);
 
@@ -930,12 +925,17 @@ out:
 
 static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
 {
+	struct vfsmount *mnt = ofs->upper_mnt;
 	struct dentry *temp;
 	int err;
 
+	err = mnt_want_write(mnt);
+	if (err)
+		return err;
+
 	ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
 	if (!ofs->workdir)
-		return 0;
+		goto out;
 
 	/*
 	 * Upper should support d_type, else whiteouts are visible.  Given
@@ -945,7 +945,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
 	 */
 	err = ovl_check_d_type_supported(workpath);
 	if (err < 0)
-		return err;
+		goto out;
 
 	/*
 	 * We allowed this configuration and don't want to break users over
@@ -969,6 +969,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
 	if (err) {
 		ofs->noxattr = true;
 		pr_warn("overlayfs: upper fs does not support xattr.\n");
+		err = 0;
 	} else {
 		vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
 	}
@@ -980,7 +981,9 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
 		pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
 	}
 
-	return 0;
+out:
+	mnt_drop_write(mnt);
+	return err;
 }
 
 static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
@@ -1027,8 +1030,13 @@ out:
 static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
 			    struct path *upperpath)
 {
+	struct vfsmount *mnt = ofs->upper_mnt;
 	int err;
 
+	err = mnt_want_write(mnt);
+	if (err)
+		return err;
+
 	/* Verify lower root is upper root origin */
 	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
 				false, true);
@@ -1056,6 +1064,7 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
 		pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
 
 out:
+	mnt_drop_write(mnt);
 	return err;
 }
 
-- 
cgit 


From a5a927a7c82e28ea76599dee4019c41e372c911f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 3 Jan 2018 18:54:42 +0200
Subject: ovl: take mnt_want_write() for removing impure xattr

The optimization in ovl_cache_get_impure() that tries to remove an
unneeded "impure" xattr needs to take mnt_want_write() on upper fs.

Fixes: 4edb83bb1041 ("ovl: constant d_ino for non-merge dirs")
Cc: <stable@vger.kernel.org> #v4.14
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/readdir.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 7dfe381c2cd8..1a8c39887992 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -593,8 +593,15 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
 		return ERR_PTR(res);
 	}
 	if (list_empty(&cache->entries)) {
-		/* Good oportunity to get rid of an unnecessary "impure" flag */
-		ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE);
+		/*
+		 * A good opportunity to get rid of an unneeded "impure" flag.
+		 * Removing the "impure" xattr is best effort.
+		 */
+		if (!ovl_want_write(dentry)) {
+			ovl_do_removexattr(ovl_dentry_upper(dentry),
+					   OVL_XATTR_IMPURE);
+			ovl_drop_write(dentry);
+		}
 		ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
 		kfree(cache);
 		return NULL;
-- 
cgit 


From 24f3478d664b1eaa6f8860d3aa521aebe51b2a62 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 21 Dec 2017 17:04:07 -0800
Subject: ext4: auto disable dax instead of failing mount

Bring the ext4 filesystem in line with xfs that only warns and continues
when the "-o dax" option is specified to mount and the backing device
does not support dax. This is in preparation for removing dax support
from devices that do not enable get_user_pages() operations on dax
mappings. In other words 'gup' support is required and configurations
that were using so called 'page-less' dax will be converted back to
using the page cache.

Removing the broken 'page-less' dax support is a pre-requisite for
removing the "EXPERIMENTAL" warning when mounting a filesystem in dax
mode.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/ext4/super.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7c46693a14d7..18873ea89e08 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3710,11 +3710,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		if (ext4_has_feature_inline_data(sb)) {
 			ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
 					" that may contain inline data");
-			goto failed_mount;
+			sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
 		}
 		err = bdev_dax_supported(sb, blocksize);
-		if (err)
-			goto failed_mount;
+		if (err) {
+			ext4_msg(sb, KERN_ERR,
+				"DAX unsupported by block device. Turning off DAX.");
+			sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
+		}
 	}
 
 	if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
-- 
cgit 


From b4b5798cea8f40ab61f3a2c79a26314465dd83e3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 21 Dec 2017 18:18:27 -0800
Subject: ext2: auto disable dax instead of failing mount

Bring the ext2 filesystem in line with xfs that only warns and continues
when the "-o dax" option is specified to mount and the backing device
does not support dax. This is in preparation for removing dax support
from devices that do not enable get_user_pages() operations on dax
mappings. In other words 'gup' support is required and configurations
that were using so called 'page-less' dax will be converted back to
using the page cache.

Removing the broken 'page-less' dax support is a pre-requisite for
removing the "EXPERIMENTAL" warning when mounting a filesystem in dax
mode.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/ext2/super.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7646818ab266..38f9222606ee 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -959,8 +959,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 
 	if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
 		err = bdev_dax_supported(sb, blocksize);
-		if (err)
-			goto failed_mount;
+		if (err) {
+			ext2_msg(sb, KERN_ERR,
+				"DAX unsupported by block device. Turning off DAX.");
+			sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
+		}
 	}
 
 	/* If the blocksize doesn't match, re-read the thing.. */
-- 
cgit 


From 569d0365f571fa6421a5c80bc30d1b2cdab857fe Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 14 Oct 2017 11:33:32 -0700
Subject: dax: require 'struct page' by default for filesystem dax

If a dax buffer from a device that does not map pages is passed to
read(2) or write(2) as a target for direct-I/O it triggers SIGBUS. If
gdb attempts to examine the contents of a dax buffer from a device that
does not map pages it triggers SIGBUS. If fork(2) is called on a process
with a dax mapping from a device that does not map pages it triggers
SIGBUS. 'struct page' is required otherwise several kernel code paths
break in surprising ways. Disable filesystem-dax on devices that do not
map pages.

In addition to needing pfn_to_page() to be valid we also require devmap
pages.  We need this to detect dax pages in the get_user_pages_fast()
path and so that we can stop managing the VM_MIXEDMAP flag. For DAX
drivers that have not supported get_user_pages() to date we allow them
to opt-in to supporting DAX with the CONFIG_FS_DAX_LIMITED configuration
option which requires ->direct_access() to return pfn_t_special() pfns.
This leaves DAX support in brd disabled and scheduled for removal.

Note that when the initial dax support was being merged a few years back
there was concern that struct page was unsuitable for use with next
generation persistent memory devices. The theoretical concern was that
struct page access, being such a hotly used data structure in the
kernel, would lead to media wear out. While that was a reasonable
conservative starting position it has not held true in practice. We have
long since committed to using devm_memremap_pages() to support higher
order kernel functionality that needs get_user_pages() and
pfn_to_page().


Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/Kconfig | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 7aee6d699fd6..b40128bf6d1a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -58,6 +58,13 @@ config FS_DAX_PMD
 	depends on ZONE_DEVICE
 	depends on TRANSPARENT_HUGEPAGE
 
+# Selected by DAX drivers that do not expect filesystem DAX to support
+# get_user_pages() of DAX mappings. I.e. "limited" indicates no support
+# for fork() of processes with MAP_SHARED mappings or support for
+# direct-I/O to a DAX mapping.
+config FS_DAX_LIMITED
+	bool
+
 endif # BLOCK
 
 # Posix ACL utility routines
-- 
cgit 


From 9678e630305724487f1fc101d6b83c383ff9cc90 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 3 Jan 2018 19:34:45 +0200
Subject: ovl: fix inconsistent d_ino for legacy merge dir

For a merge dir that was copied up before v4.12 or that was hand crafted
offline (e.g. mkdir {upper/lower}/dir), upper dir does not contain the
'trusted.overlay.origin' xattr.  In that case, stat(2) on the merge dir
returns the lower dir st_ino, but getdents(2) returns the upper dir d_ino.

After this change, on merge dir lookup, missing origin xattr on upper
dir will be fixed and 'impure' xattr will be fixed on parent of the legacy
merge dir.

Suggested-by: zhangyi (F) <yi.zhang@huawei.com>
Reviewed-by: zhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c   |  4 ++--
 fs/overlayfs/namei.c     | 33 +++++++++++++++++++++++++++++++++
 fs/overlayfs/overlayfs.h |  2 ++
 3 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index eb3b8d39fb61..206ececd5ae7 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -288,8 +288,8 @@ out:
 	return fh;
 }
 
-static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
-			  struct dentry *upper)
+int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+		   struct dentry *upper)
 {
 	const struct ovl_fh *fh = NULL;
 	int err;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 69a43ede0a2a..69f4f19659fc 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -584,6 +584,27 @@ static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path)
 	return i;
 }
 
+/* Fix missing 'origin' xattr */
+static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
+			  struct dentry *upper)
+{
+	int err;
+
+	if (ovl_check_origin_xattr(upper))
+		return 0;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		return err;
+
+	err = ovl_set_origin(dentry, lower, upper);
+	if (!err)
+		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
+
+	ovl_drop_write(dentry);
+	return err;
+}
+
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			  unsigned int flags)
 {
@@ -674,6 +695,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		if (!this)
 			continue;
 
+		/*
+		 * If no origin fh is stored in upper of a merge dir, store fh
+		 * of lower dir and set upper parent "impure".
+		 */
+		if (upperdentry && !ctr && !ofs->noxattr) {
+			err = ovl_fix_origin(dentry, this, upperdentry);
+			if (err) {
+				dput(this);
+				goto out_put;
+			}
+		}
+
 		stack[ctr].dentry = this;
 		stack[ctr].layer = lower.layer;
 		ctr++;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index b489099ccd49..d1cfa69c98b5 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -322,3 +322,5 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags);
 int ovl_copy_xattr(struct dentry *old, struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
 struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper);
+int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+		   struct dentry *upper);
-- 
cgit 


From a683737ba924cd2985f6e7350520f449915ff8f9 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 19 Sep 2017 12:14:18 +0300
Subject: ovl: disable index when no xattr support

Overlayfs falls back to index=off if lower/upper fs does not support
file handles. Do the same if upper fs does not support xattr.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 3387e6d639a5..f3281f0b2388 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -968,7 +968,8 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
 	err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
 	if (err) {
 		ofs->noxattr = true;
-		pr_warn("overlayfs: upper fs does not support xattr.\n");
+		ofs->config.index = false;
+		pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n");
 		err = 0;
 	} else {
 		vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
-- 
cgit 


From 972d0093c2f7b1bd57e47a1780a552dde528fd16 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 19 Sep 2017 12:14:18 +0300
Subject: ovl: force r/o mount when index dir creation fails

When work dir creation fails, a warning is emitted and overlay is
mounted r/o. Trying to remount r/w will fail with no work dir.

When index dir creation fails, the same warning is emitted and overlay
is mounted r/o, but trying to remount r/w will succeed. This may cause
unintentional corruption of filesystem consistency.

Adjust the behavior of index dir creation failure to that of work dir
creation failure and do not allow to remount r/w. User needs to state
an explicitly intention to work without an index by mounting with
option 'index=off' to allow r/w mount with no index dir.

When mounting with option 'index=on' and no 'upperdir', index is
implicitly disabled, so do not warn about no file handle support.

The issue was introduced with inodes index feature in v4.13, but this
patch will not apply cleanly before ovl_fill_super() re-factoring in
v4.15.

Fixes: 02bcd1577400 ("ovl: introduce the inodes index dir feature")
Cc: <stable@vger.kernel.org> #v4.13
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index f3281f0b2388..9aa5d32af427 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -699,7 +699,8 @@ static int ovl_lower_dir(const char *name, struct path *path,
 	 * The inodes index feature needs to encode and decode file
 	 * handles, so it requires that all layers support them.
 	 */
-	if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) {
+	if (ofs->config.index && ofs->config.upperdir &&
+	    !ovl_can_decode_fh(path->dentry->d_sb)) {
 		ofs->config.index = false;
 		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name);
 	}
@@ -1268,11 +1269,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		if (err)
 			goto out_free_oe;
 
-		if (!ofs->indexdir)
+		/* Force r/o mount with no index dir */
+		if (!ofs->indexdir) {
+			dput(ofs->workdir);
+			ofs->workdir = NULL;
 			sb->s_flags |= SB_RDONLY;
+		}
+
 	}
 
-	/* Show index=off/on in /proc/mounts for any of the reasons above */
+	/* Show index=off in /proc/mounts for forced r/o mount */
 	if (!ofs->indexdir)
 		ofs->config.index = false;
 
-- 
cgit 


From d583ed7d138825fd9469d5419e23230ad39173e8 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 8 Nov 2017 19:23:36 +0200
Subject: ovl: store layer index in ovl_layer

Store the fs root layer index inside ovl_layer struct, so we can
get the root fs layer index from merge dir lower layer instead of
find it with ovl_find_layer() helper.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c     | 17 +----------------
 fs/overlayfs/ovl_entry.h |  2 ++
 fs/overlayfs/super.c     |  1 +
 3 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 69f4f19659fc..a38db76cbccd 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -572,18 +572,6 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
 	return (idx < oe->numlower) ? idx + 1 : -1;
 }
 
-static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path)
-{
-	int i;
-
-	for (i = 0; i < ofs->numlower; i++) {
-		if (ofs->lower_layers[i].mnt == path->layer->mnt)
-			break;
-	}
-
-	return i;
-}
-
 /* Fix missing 'origin' xattr */
 static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
 			  struct dentry *upper)
@@ -733,11 +721,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 
 		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
 			poe = roe;
-
 			/* Find the current layer on the root dentry */
-			i = ovl_find_layer(ofs, &lower);
-			if (WARN_ON(i == ofs->numlower))
-				break;
+			i = lower.layer->idx - 1;
 		}
 	}
 
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 9d0bc03bf6e4..608e48755070 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -22,6 +22,8 @@ struct ovl_config {
 struct ovl_layer {
 	struct vfsmount *mnt;
 	dev_t pseudo_dev;
+	/* Index of this layer in fs root (upper == 0) */
+	int idx;
 };
 
 struct ovl_path {
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 9aa5d32af427..b34a002ab4b5 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1106,6 +1106,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
 
 		ofs->lower_layers[ofs->numlower].mnt = mnt;
 		ofs->lower_layers[ofs->numlower].pseudo_dev = dev;
+		ofs->lower_layers[ofs->numlower].idx = i + 1;
 		ofs->numlower++;
 
 		/* Check if all lower layers are on same sb */
-- 
cgit 


From 2e1a532883cf77f01031bef4b83d864a46c1bed0 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 24 Oct 2017 15:12:15 +0300
Subject: ovl: factor out ovl_check_origin_fh()

Re-factor ovl_check_origin() and ovl_get_origin(), so origin fh xattr is
read from upper inode only once during lookup with multiple lower layers
and only once when verifying index entry origin.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 142 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 92 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index a38db76cbccd..a6b9bd2afca1 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -87,9 +87,36 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
 	return 1;
 }
 
+/*
+ * Check validity of an overlay file handle buffer.
+ *
+ * Return 0 for a valid file handle.
+ * Return -ENODATA for "origin unknown".
+ * Return <0 for an invalid file handle.
+ */
+static int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+{
+	if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
+		return -EINVAL;
+
+	if (fh->magic != OVL_FH_MAGIC)
+		return -EINVAL;
+
+	/* Treat larger version and unknown flags as "origin unknown" */
+	if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+		return -ENODATA;
+
+	/* Treat endianness mismatch as "origin unknown" */
+	if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+		return -ENODATA;
+
+	return 0;
+}
+
 static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
 {
-	int res;
+	int res, err;
 	struct ovl_fh *fh = NULL;
 
 	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
@@ -102,7 +129,7 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
 	if (res == 0)
 		return NULL;
 
-	fh  = kzalloc(res, GFP_KERNEL);
+	fh = kzalloc(res, GFP_KERNEL);
 	if (!fh)
 		return ERR_PTR(-ENOMEM);
 
@@ -110,20 +137,12 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
 	if (res < 0)
 		goto fail;
 
-	if (res < sizeof(struct ovl_fh) || res < fh->len)
-		goto invalid;
-
-	if (fh->magic != OVL_FH_MAGIC)
+	err = ovl_check_fh_len(fh, res);
+	if (err < 0) {
+		if (err == -ENODATA)
+			goto out;
 		goto invalid;
-
-	/* Treat larger version and unknown flags as "origin unknown" */
-	if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
-		goto out;
-
-	/* Treat endianness mismatch as "origin unknown" */
-	if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
-	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
-		goto out;
+	}
 
 	return fh;
 
@@ -139,22 +158,17 @@ invalid:
 	goto out;
 }
 
-static struct dentry *ovl_get_origin(struct dentry *dentry,
-				     struct vfsmount *mnt)
+static struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
 {
-	struct dentry *origin = NULL;
-	struct ovl_fh *fh = ovl_get_origin_fh(dentry);
+	struct dentry *origin;
 	int bytes;
 
-	if (IS_ERR_OR_NULL(fh))
-		return (struct dentry *)fh;
-
 	/*
 	 * Make sure that the stored uuid matches the uuid of the lower
 	 * layer where file handle will be decoded.
 	 */
 	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
-		goto out;
+		return NULL;
 
 	bytes = (fh->len - offsetof(struct ovl_fh, fid));
 	origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
@@ -164,22 +178,15 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 		/* Treat stale file handle as "origin unknown" */
 		if (origin == ERR_PTR(-ESTALE))
 			origin = NULL;
-		goto out;
+		return origin;
 	}
 
-	if (ovl_dentry_weird(origin) ||
-	    ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT))
-		goto invalid;
+	if (ovl_dentry_weird(origin)) {
+		dput(origin);
+		return NULL;
+	}
 
-out:
-	kfree(fh);
 	return origin;
-
-invalid:
-	pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
-	dput(origin);
-	origin = NULL;
-	goto out;
 }
 
 static bool ovl_is_opaquedir(struct dentry *dentry)
@@ -284,9 +291,9 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 }
 
 
-static int ovl_check_origin(struct dentry *upperdentry,
-			    struct ovl_path *lower, unsigned int numlower,
-			    struct ovl_path **stackp, unsigned int *ctrp)
+static int ovl_check_origin_fh(struct ovl_fh *fh, struct dentry *upperdentry,
+			       struct ovl_path *lower, unsigned int numlower,
+			       struct ovl_path **stackp)
 {
 	struct vfsmount *mnt;
 	struct dentry *origin = NULL;
@@ -294,18 +301,20 @@ static int ovl_check_origin(struct dentry *upperdentry,
 
 	for (i = 0; i < numlower; i++) {
 		mnt = lower[i].layer->mnt;
-		origin = ovl_get_origin(upperdentry, mnt);
-		if (IS_ERR(origin))
-			return PTR_ERR(origin);
-
+		origin = ovl_decode_fh(fh, mnt);
 		if (origin)
 			break;
 	}
 
 	if (!origin)
-		return 0;
+		return -ESTALE;
+	else if (IS_ERR(origin))
+		return PTR_ERR(origin);
+
+	if (!ovl_is_whiteout(upperdentry) &&
+	    ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
+		goto invalid;
 
-	BUG_ON(*ctrp);
 	if (!*stackp)
 		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
 	if (!*stackp) {
@@ -313,9 +322,41 @@ static int ovl_check_origin(struct dentry *upperdentry,
 		return -ENOMEM;
 	}
 	**stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer};
-	*ctrp = 1;
 
 	return 0;
+
+invalid:
+	pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
+			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
+			    d_inode(origin)->i_mode & S_IFMT);
+	dput(origin);
+	return -EIO;
+}
+
+static int ovl_check_origin(struct dentry *upperdentry,
+			    struct ovl_path *lower, unsigned int numlower,
+			    struct ovl_path **stackp, unsigned int *ctrp)
+{
+	struct ovl_fh *fh = ovl_get_origin_fh(upperdentry);
+	int err;
+
+	if (IS_ERR_OR_NULL(fh))
+		return PTR_ERR(fh);
+
+	err = ovl_check_origin_fh(fh, upperdentry, lower, numlower, stackp);
+	kfree(fh);
+
+	if (err) {
+		if (err == -ESTALE)
+			return 0;
+		return err;
+	}
+
+	if (WARN_ON(*ctrp))
+		return -EIO;
+
+	*ctrp = 1;
+	return 0;
 }
 
 /*
@@ -389,7 +430,6 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
 	size_t len;
 	struct ovl_path origin = { };
 	struct ovl_path *stack = &origin;
-	unsigned int ctr = 0;
 	int err;
 
 	if (!d_inode(index))
@@ -420,16 +460,18 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
 		goto fail;
 
 	err = -EINVAL;
-	if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len)
+	if (hex2bin((u8 *)fh, index->d_name.name, len))
+		goto fail;
+
+	err = ovl_check_fh_len(fh, len);
+	if (err)
 		goto fail;
 
 	err = ovl_verify_origin_fh(index, fh);
 	if (err)
 		goto fail;
 
-	err = ovl_check_origin(index, lower, numlower, &stack, &ctr);
-	if (!err && !ctr)
-		err = -ESTALE;
+	err = ovl_check_origin_fh(fh, index, lower, numlower, &stack);
 	if (err)
 		goto fail;
 
-- 
cgit 


From 1eff1a1deec727bacead79ec64554c1df190f43c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 12 Dec 2017 22:40:46 +0200
Subject: ovl: simplify arguments to ovl_check_origin_fh()

Pass the fs instance with lower_layers array instead of the dentry
lowerstack array to ovl_check_origin_fh(), because the dentry members
of lowerstack play no role in this helper.

This change simplifies the argument list of ovl_check_origin(),
ovl_cleanup_index() and ovl_verify_index().

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c     | 28 +++++++++++++---------------
 fs/overlayfs/overlayfs.h |  9 ++++-----
 fs/overlayfs/readdir.c   | 12 ++++++------
 fs/overlayfs/super.c     |  5 +----
 4 files changed, 24 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index a6b9bd2afca1..27f25a61f6e4 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -291,17 +291,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 }
 
 
-static int ovl_check_origin_fh(struct ovl_fh *fh, struct dentry *upperdentry,
-			       struct ovl_path *lower, unsigned int numlower,
+static int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+			       struct dentry *upperdentry,
 			       struct ovl_path **stackp)
 {
-	struct vfsmount *mnt;
 	struct dentry *origin = NULL;
 	int i;
 
-	for (i = 0; i < numlower; i++) {
-		mnt = lower[i].layer->mnt;
-		origin = ovl_decode_fh(fh, mnt);
+	for (i = 0; i < ofs->numlower; i++) {
+		origin = ovl_decode_fh(fh, ofs->lower_layers[i].mnt);
 		if (origin)
 			break;
 	}
@@ -321,7 +319,10 @@ static int ovl_check_origin_fh(struct ovl_fh *fh, struct dentry *upperdentry,
 		dput(origin);
 		return -ENOMEM;
 	}
-	**stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer};
+	**stackp = (struct ovl_path){
+		.dentry = origin,
+		.layer = &ofs->lower_layers[i]
+	};
 
 	return 0;
 
@@ -333,8 +334,7 @@ invalid:
 	return -EIO;
 }
 
-static int ovl_check_origin(struct dentry *upperdentry,
-			    struct ovl_path *lower, unsigned int numlower,
+static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
 			    struct ovl_path **stackp, unsigned int *ctrp)
 {
 	struct ovl_fh *fh = ovl_get_origin_fh(upperdentry);
@@ -343,7 +343,7 @@ static int ovl_check_origin(struct dentry *upperdentry,
 	if (IS_ERR_OR_NULL(fh))
 		return PTR_ERR(fh);
 
-	err = ovl_check_origin_fh(fh, upperdentry, lower, numlower, stackp);
+	err = ovl_check_origin_fh(ofs, fh, upperdentry, stackp);
 	kfree(fh);
 
 	if (err) {
@@ -423,8 +423,7 @@ fail:
  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
  * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
  */
-int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
-		     unsigned int numlower)
+int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 {
 	struct ovl_fh *fh = NULL;
 	size_t len;
@@ -471,7 +470,7 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
 	if (err)
 		goto fail;
 
-	err = ovl_check_origin_fh(fh, index, lower, numlower, &stack);
+	err = ovl_check_origin_fh(ofs, fh, index, &stack);
 	if (err)
 		goto fail;
 
@@ -689,8 +688,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			 * number - it's the same as if we held a reference
 			 * to a dentry in lower layer that was moved under us.
 			 */
-			err = ovl_check_origin(upperdentry, roe->lowerstack,
-					       roe->numlower, &stack, &ctr);
+			err = ovl_check_origin(ofs, upperdentry, &stack, &ctr);
 			if (err)
 				goto out_put_upper;
 		}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index d1cfa69c98b5..d55afb6646b0 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -251,11 +251,11 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 /* namei.c */
 int ovl_verify_origin(struct dentry *dentry, struct dentry *origin,
 		      bool is_upper, bool set);
-int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
-		     unsigned int numlower);
+int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
 int ovl_get_index_name(struct dentry *origin, struct qstr *name);
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
-struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+			  unsigned int flags);
 bool ovl_lower_positive(struct dentry *dentry);
 
 /* readdir.c */
@@ -267,8 +267,7 @@ void ovl_dir_cache_free(struct inode *inode);
 int ovl_check_d_type_supported(struct path *realpath);
 void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 			 struct dentry *dentry, int level);
-int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
-			 struct ovl_path *lower, unsigned int numlower);
+int ovl_indexdir_cleanup(struct ovl_fs *ofs);
 
 /* inode.c */
 int ovl_set_nlink_upper(struct dentry *dentry);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 1a8c39887992..4c660c7085b7 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1030,13 +1030,13 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 	}
 }
 
-int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
-			 struct ovl_path *lower, unsigned int numlower)
+int ovl_indexdir_cleanup(struct ovl_fs *ofs)
 {
 	int err;
+	struct dentry *indexdir = ofs->indexdir;
 	struct dentry *index = NULL;
-	struct inode *dir = dentry->d_inode;
-	struct path path = { .mnt = mnt, .dentry = dentry };
+	struct inode *dir = indexdir->d_inode;
+	struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir };
 	LIST_HEAD(list);
 	struct rb_root root = RB_ROOT;
 	struct ovl_cache_entry *p;
@@ -1060,13 +1060,13 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
 			if (p->len == 2 && p->name[1] == '.')
 				continue;
 		}
-		index = lookup_one_len(p->name, dentry, p->len);
+		index = lookup_one_len(p->name, indexdir, p->len);
 		if (IS_ERR(index)) {
 			err = PTR_ERR(index);
 			index = NULL;
 			break;
 		}
-		err = ovl_verify_index(index, lower, numlower);
+		err = ovl_verify_index(ofs, index);
 		/* Cleanup stale and orphan index entries */
 		if (err && (err == -ESTALE || err == -ENOENT))
 			err = ovl_cleanup(dir, index);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b34a002ab4b5..4345c3f83fde 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1057,10 +1057,7 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
 
 		/* Cleanup bad/stale/orphan index entries */
 		if (!err)
-			err = ovl_indexdir_cleanup(ofs->indexdir,
-						   ofs->upper_mnt,
-						   oe->lowerstack,
-						   oe->numlower);
+			err = ovl_indexdir_cleanup(ofs);
 	}
 	if (err || !ofs->indexdir)
 		pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
-- 
cgit 


From 051224438af21047b34160b1e0ad1c5af45fdace Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 11 Jan 2018 08:25:32 +0200
Subject: ovl: generalize ovl_verify_origin() and helpers

Remove the "origin" language from the functions that handle set, get
and verify of "origin" xattr and pass the xattr name as an argument.

The same helpers are going to be used for NFS export to get, get and
verify the "upper" xattr for directory index entries.

ovl_verify_origin() is now a helper used only to verify non upper
file handle stored in "origin" xattr of upper inode.

The upper root dir file handle is still stored in "origin" xattr on
the index dir for backward compatibility. This is going to be changed
by the patch that adds directory index entries support.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c   |  6 +++---
 fs/overlayfs/namei.c     | 42 ++++++++++++++++++++++--------------------
 fs/overlayfs/overlayfs.h | 12 +++++++++---
 fs/overlayfs/super.c     |  8 ++++----
 4 files changed, 38 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 206ececd5ae7..503c92404095 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -232,13 +232,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
-struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
+struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper)
 {
 	struct ovl_fh *fh;
 	int fh_type, fh_len, dwords;
 	void *buf;
 	int buflen = MAX_HANDLE_SZ;
-	uuid_t *uuid = &lower->d_sb->s_uuid;
+	uuid_t *uuid = &real->d_sb->s_uuid;
 
 	buf = kmalloc(buflen, GFP_KERNEL);
 	if (!buf)
@@ -250,7 +250,7 @@ struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
 	 * the price or reconnecting the dentry.
 	 */
 	dwords = buflen >> 2;
-	fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
+	fh_type = exportfs_encode_fh(real, buf, &dwords, 0);
 	buflen = (dwords << 2);
 
 	fh = ERR_PTR(-EIO);
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 27f25a61f6e4..11e164cb2593 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -114,12 +114,12 @@ static int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
 	return 0;
 }
 
-static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
+static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
 {
 	int res, err;
 	struct ovl_fh *fh = NULL;
 
-	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+	res = vfs_getxattr(dentry, name, NULL, 0);
 	if (res < 0) {
 		if (res == -ENODATA || res == -EOPNOTSUPP)
 			return NULL;
@@ -133,7 +133,7 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
 	if (!fh)
 		return ERR_PTR(-ENOMEM);
 
-	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res);
+	res = vfs_getxattr(dentry, name, fh, res);
 	if (res < 0)
 		goto fail;
 
@@ -337,7 +337,7 @@ invalid:
 static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
 			    struct ovl_path **stackp, unsigned int *ctrp)
 {
-	struct ovl_fh *fh = ovl_get_origin_fh(upperdentry);
+	struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
 	int err;
 
 	if (IS_ERR_OR_NULL(fh))
@@ -360,12 +360,13 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
 }
 
 /*
- * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
+ * Verify that @fh matches the file handle stored in xattr @name.
  * Return 0 on match, -ESTALE on mismatch, < 0 on error.
  */
-static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
+static int ovl_verify_fh(struct dentry *dentry, const char *name,
+			 const struct ovl_fh *fh)
 {
-	struct ovl_fh *ofh = ovl_get_origin_fh(dentry);
+	struct ovl_fh *ofh = ovl_get_fh(dentry, name);
 	int err = 0;
 
 	if (!ofh)
@@ -382,28 +383,28 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
 }
 
 /*
- * Verify that an inode matches the origin file handle stored in upper inode.
+ * Verify that @real dentry matches the file handle stored in xattr @name.
  *
- * If @set is true and there is no stored file handle, encode and store origin
- * file handle in OVL_XATTR_ORIGIN.
+ * If @set is true and there is no stored file handle, encode @real and store
+ * file handle in xattr @name.
  *
- * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
  */
-int ovl_verify_origin(struct dentry *dentry, struct dentry *origin,
-		      bool is_upper, bool set)
+int ovl_verify_set_fh(struct dentry *dentry, const char *name,
+		      struct dentry *real, bool is_upper, bool set)
 {
 	struct inode *inode;
 	struct ovl_fh *fh;
 	int err;
 
-	fh = ovl_encode_fh(origin, is_upper);
+	fh = ovl_encode_fh(real, is_upper);
 	err = PTR_ERR(fh);
 	if (IS_ERR(fh))
 		goto fail;
 
-	err = ovl_verify_origin_fh(dentry, fh);
+	err = ovl_verify_fh(dentry, name, fh);
 	if (set && err == -ENODATA)
-		err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0);
+		err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
 	if (err)
 		goto fail;
 
@@ -412,9 +413,10 @@ out:
 	return err;
 
 fail:
-	inode = d_inode(origin);
-	pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
-			    origin, inode ? inode->i_ino : 0, err);
+	inode = d_inode(real);
+	pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+			    is_upper ? "upper" : "origin", real,
+			    inode ? inode->i_ino : 0, err);
 	goto out;
 }
 
@@ -466,7 +468,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 	if (err)
 		goto fail;
 
-	err = ovl_verify_origin_fh(index, fh);
+	err = ovl_verify_fh(index, OVL_XATTR_ORIGIN, fh);
 	if (err)
 		goto fail;
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index d55afb6646b0..1d62b1e6111a 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -249,8 +249,8 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 
 
 /* namei.c */
-int ovl_verify_origin(struct dentry *dentry, struct dentry *origin,
-		      bool is_upper, bool set);
+int ovl_verify_set_fh(struct dentry *dentry, const char *name,
+		      struct dentry *real, bool is_upper, bool set);
 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
 int ovl_get_index_name(struct dentry *origin, struct qstr *name);
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
@@ -258,6 +258,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			  unsigned int flags);
 bool ovl_lower_positive(struct dentry *dentry);
 
+static inline int ovl_verify_origin(struct dentry *upper,
+				    struct dentry *origin, bool set)
+{
+	return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set);
+}
+
 /* readdir.c */
 extern const struct file_operations ovl_dir_operations;
 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
@@ -320,6 +326,6 @@ int ovl_copy_up(struct dentry *dentry);
 int ovl_copy_up_flags(struct dentry *dentry, int flags);
 int ovl_copy_xattr(struct dentry *old, struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
-struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper);
+struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper);
 int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 		   struct dentry *upper);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4345c3f83fde..4ebbb368fce8 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1041,7 +1041,7 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
 
 	/* Verify lower root is upper root origin */
 	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
-				false, true);
+				true);
 	if (err) {
 		pr_err("overlayfs: failed to verify upper root origin\n");
 		goto out;
@@ -1049,9 +1049,9 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
 
 	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
 	if (ofs->indexdir) {
-		/* Verify upper root is index dir origin */
-		err = ovl_verify_origin(ofs->indexdir, upperpath->dentry,
-					true, true);
+		/* Verify upper root is exclusively associated with index dir */
+		err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
+					upperpath->dentry, true, true);
 		if (err)
 			pr_err("overlayfs: failed to verify index dir origin\n");
 
-- 
cgit 


From 60b866420ba7ae0b6a8d338f49be21c601d19064 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 10 Jan 2018 23:15:21 +0200
Subject: ovl: update documentation of inodes index feature

Document that inode index feature solves breaking hard links on
copy up.

Simplify Kconfig backward compatibility disclaimer.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/Kconfig | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 5ac415466861..9eac01c3e21e 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -47,9 +47,6 @@ config OVERLAY_FS_INDEX
 	  The inodes index feature prevents breaking of lower hardlinks on copy
 	  up.
 
-	  Note, that the inodes index feature is read-only backward compatible.
-	  That is, mounting an overlay which has an index dir on a kernel that
-	  doesn't support this feature read-only, will not have any negative
-	  outcomes.  However, mounting the same overlay with an old kernel
-	  read-write and then mounting it again with a new kernel, will have
-	  unexpected results.
+	  Note, that the inodes index feature is not backward compatible.
+	  That is, mounting an overlay which has an inodes index on a kernel
+	  that doesn't support this feature will have unexpected results.
-- 
cgit 


From f168f1098dd9038daaf9f7be5f81cdea4985886a Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 19 Jan 2018 11:26:53 +0200
Subject: ovl: add support for "nfs_export" configuration

Introduce the "nfs_export" config, module and mount options.

The NFS export feature depends on the "index" feature and enables two
implicit overlayfs features: "index_all" and "verify_lower".
The "index_all" feature creates an index on copy up of every file and
directory. The "verify_lower" feature uses the full index to detect
overlay filesystems inconsistencies on lookup, like redirect from
multiple upper dirs to the same lower dir.

NFS export can be enabled for non-upper mount with no index. However,
because lower layer redirects cannot be verified with the index, enabling
NFS export support on an overlay with no upper layer requires turning off
redirect follow (e.g. "redirect_dir=nofollow").

The full index may incur some overhead on mount time, especially when
verifying that lower directory file handles are not stale.

NFS export support, full index and consistency verification will be
implemented by following patches.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/Kconfig     | 22 ++++++++++++++++++++++
 fs/overlayfs/overlayfs.h |  2 ++
 fs/overlayfs/ovl_entry.h |  1 +
 fs/overlayfs/super.c     | 49 +++++++++++++++++++++++++++++++++++++++++++-----
 fs/overlayfs/util.c      | 16 ++++++++++++++++
 5 files changed, 85 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 9eac01c3e21e..406e72de88f6 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -50,3 +50,25 @@ config OVERLAY_FS_INDEX
 	  Note, that the inodes index feature is not backward compatible.
 	  That is, mounting an overlay which has an inodes index on a kernel
 	  that doesn't support this feature will have unexpected results.
+
+config OVERLAY_FS_NFS_EXPORT
+	bool "Overlayfs: turn on NFS export feature by default"
+	depends on OVERLAY_FS
+	depends on OVERLAY_FS_INDEX
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  the inodes index dir to decode overlay NFS file handles by default.
+	  In this case, it is still possible to turn off NFS export support
+	  globally with the "nfs_export=off" module option or on a filesystem
+	  instance basis with the "nfs_export=off" mount option.
+
+	  The NFS export feature creates an index on copy up of every file and
+	  directory.  This full index is used to detect overlay filesystems
+	  inconsistencies on lookup, like redirect from multiple upper dirs to
+	  the same lower dir.  The full index may incur some overhead on mount
+	  time, especially when verifying that directory file handles are not
+	  stale.
+
+	  Note, that the NFS export feature is not backward compatible.
+	  That is, mounting an overlay which has a full index on a kernel
+	  that doesn't support this feature will have unexpected results.
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 1d62b1e6111a..db75955f9677 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -194,6 +194,8 @@ const struct cred *ovl_override_creds(struct super_block *sb);
 struct super_block *ovl_same_sb(struct super_block *sb);
 bool ovl_can_decode_fh(struct super_block *sb);
 struct dentry *ovl_indexdir(struct super_block *sb);
+bool ovl_index_all(struct super_block *sb);
+bool ovl_verify_lower(struct super_block *sb);
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 bool ovl_dentry_remote(struct dentry *dentry);
 bool ovl_dentry_weird(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 608e48755070..6dd60fcf8cb7 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -17,6 +17,7 @@ struct ovl_config {
 	bool redirect_follow;
 	const char *redirect_mode;
 	bool index;
+	bool nfs_export;
 };
 
 struct ovl_layer {
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4ebbb368fce8..1d538be87fa0 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -45,6 +45,11 @@ module_param_named(index, ovl_index_def, bool, 0644);
 MODULE_PARM_DESC(ovl_index_def,
 		 "Default to on or off for the inodes index feature");
 
+static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
+module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
+MODULE_PARM_DESC(ovl_nfs_export_def,
+		 "Default to on or off for the NFS export feature");
+
 static void ovl_entry_stack_free(struct ovl_entry *oe)
 {
 	unsigned int i;
@@ -342,6 +347,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
 	if (ofs->config.index != ovl_index_def)
 		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
+	if (ofs->config.nfs_export != ovl_nfs_export_def)
+		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
+						"on" : "off");
 	return 0;
 }
 
@@ -374,6 +382,8 @@ enum {
 	OPT_REDIRECT_DIR,
 	OPT_INDEX_ON,
 	OPT_INDEX_OFF,
+	OPT_NFS_EXPORT_ON,
+	OPT_NFS_EXPORT_OFF,
 	OPT_ERR,
 };
 
@@ -385,6 +395,8 @@ static const match_table_t ovl_tokens = {
 	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
 	{OPT_INDEX_ON,			"index=on"},
 	{OPT_INDEX_OFF,			"index=off"},
+	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
+	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
 	{OPT_ERR,			NULL}
 };
 
@@ -491,6 +503,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			config->index = false;
 			break;
 
+		case OPT_NFS_EXPORT_ON:
+			config->nfs_export = true;
+			break;
+
+		case OPT_NFS_EXPORT_OFF:
+			config->nfs_export = false;
+			break;
+
 		default:
 			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
 			return -EINVAL;
@@ -696,13 +716,16 @@ static int ovl_lower_dir(const char *name, struct path *path,
 		*remote = true;
 
 	/*
-	 * The inodes index feature needs to encode and decode file
-	 * handles, so it requires that all layers support them.
+	 * The inodes index feature and NFS export need to encode and decode
+	 * file handles, so they require that all layers support them.
 	 */
-	if (ofs->config.index && ofs->config.upperdir &&
+	if ((ofs->config.nfs_export ||
+	     (ofs->config.index && ofs->config.upperdir)) &&
 	    !ovl_can_decode_fh(path->dentry->d_sb)) {
 		ofs->config.index = false;
-		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name);
+		ofs->config.nfs_export = false;
+		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
+			name);
 	}
 
 	return 0;
@@ -983,6 +1006,12 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
 		pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
 	}
 
+	/* NFS export of r/w mount depends on index */
+	if (ofs->config.nfs_export && !ofs->config.index) {
+		pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
+		ofs->config.nfs_export = false;
+	}
+
 out:
 	mnt_drop_write(mnt);
 	return err;
@@ -1141,6 +1170,10 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
 	} else if (!ofs->config.upperdir && stacklen == 1) {
 		pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
 		goto out_err;
+	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
+		   ofs->config.redirect_follow) {
+		pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+		ofs->config.nfs_export = false;
 	}
 
 	err = -ENOMEM;
@@ -1217,6 +1250,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_err;
 
 	ofs->config.index = ovl_index_def;
+	ofs->config.nfs_export = ovl_nfs_export_def;
 	err = ovl_parse_opt((char *) data, &ofs->config);
 	if (err)
 		goto out_err;
@@ -1277,8 +1311,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	/* Show index=off in /proc/mounts for forced r/o mount */
-	if (!ofs->indexdir)
+	if (!ofs->indexdir) {
 		ofs->config.index = false;
+		if (ofs->upper_mnt && ofs->config.nfs_export) {
+			pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
+			ofs->config.nfs_export = false;
+		}
+	}
 
 	/* Never override disk quota limits or use reserved space */
 	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 06119f34a69d..ae81d878248e 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -63,6 +63,22 @@ struct dentry *ovl_indexdir(struct super_block *sb)
 	return ofs->indexdir;
 }
 
+/* Index all files on copy up. For now only enabled for NFS export */
+bool ovl_index_all(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->config.nfs_export && ofs->config.index;
+}
+
+/* Verify lower origin on lookup. For now only enabled for NFS export */
+bool ovl_verify_lower(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->config.nfs_export && ofs->config.index;
+}
+
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
 {
 	size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
-- 
cgit 


From 37b12916c0f802d956c767db984801d3100c6524 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 10 Jan 2018 22:29:38 +0200
Subject: ovl: verify stored origin fh matches lower dir

When the NFS export feature is enabled, overlayfs implicitly enables the
feature "verify_lower". When the "verify_lower" feature is enabled, a
directory inode found in lower layer by name or by redirect_dir is
verified against the file handle of the copy up origin that is stored in
the upper layer.

This introduces a change of behavior for the case of lower layer
modification while overlay is offline. A lower directory created or
moved offline under an exisitng upper directory, will not be merged with
that upper directory.

The NFS export feature should not be used after copying layers, because
the new lower directory inodes would fail verification and won't be
merged with upper directories.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 11e164cb2593..69ca8eb07519 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -737,6 +737,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			}
 		}
 
+		/*
+		 * When "verify_lower" feature is enabled, do not merge with a
+		 * lower dir that does not match a stored origin xattr.
+		 */
+		if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) {
+			err = ovl_verify_origin(upperdentry, this, false);
+			if (err) {
+				dput(this);
+				break;
+			}
+		}
+
 		stack[ctr].dentry = this;
 		stack[ctr].layer = lower.layer;
 		ctr++;
-- 
cgit 


From 86eaa13046d5e814484c89f635a95b0342b765ad Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 21 Nov 2017 13:55:51 +0200
Subject: ovl: unbless lower st_ino of unverified origin

On a malformed overlay, several redirected dirs can point to the same
dir on a lower layer. This presents a similar challenge as broken
hardlinks, because different objects in the overlay can return the same
st_ino/st_dev pair from stat(2).

For broken hardlinks, we do not provide constant st_ino on copy up to
avoid this inconsistency. When NFS export feature is enabled, apply
the same logic to files and directories with unverified lower origin.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 94d2f8a8b779..96587075db11 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -105,12 +105,20 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 			 * Lower hardlinks may be broken on copy up to different
 			 * upper files, so we cannot use the lower origin st_ino
 			 * for those different files, even for the same fs case.
+			 *
+			 * Similarly, several redirected dirs can point to the
+			 * same dir on a lower layer. With the "verify_lower"
+			 * feature, we do not use the lower origin st_ino, if
+			 * we haven't verified that this redirect is unique.
+			 *
 			 * With inodes index enabled, it is safe to use st_ino
-			 * of an indexed hardlinked origin. The index validates
-			 * that the upper hardlink is not broken.
+			 * of an indexed origin. The index validates that the
+			 * upper hardlink is not broken and that a redirected
+			 * dir is the only redirect to that origin.
 			 */
-			if (is_dir || lowerstat.nlink == 1 ||
-			    ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+			if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
+			    (!ovl_verify_lower(dentry->d_sb) &&
+			     (is_dir || lowerstat.nlink == 1)))
 				stat->ino = lowerstat.ino;
 
 			if (samefs)
-- 
cgit 


From ad1d615cec1c973aa222c065997a77e7cd5a0d17 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 11 Jan 2018 10:47:03 +0200
Subject: ovl: use directory index entries for consistency verification

A directory index is a directory type entry in index dir with a
"trusted.overlay.upper" xattr containing an encoded ovl_fh of the merge
directory upper dir inode.

On lookup of non-dir files, lower file is followed by origin file handle.
On lookup of dir entries, lower dir is found by name and then compared
to origin file handle. We only trust dir index if we verified that lower
dir matches origin file handle, otherwise index may be inconsistent and
we ignore it.

If we find an indexed non-upper dir or an indexed merged dir, whose
index 'upper' xattr points to a different upper dir, that means that the
lower directory may be also referenced by another upper dir via redirect,
so we fail the lookup on inconsistency error.

To be consistent with directory index entries format, the association of
index dir to upper root dir, that was stored by older kernels in
"trusted.overlay.origin" xattr is now stored in "trusted.overlay.upper"
xattr. This also serves as an indication that overlay was mounted with a
kernel that support index directory entries. For backward compatibility,
if an 'origin' xattr exists on the index dir we also verify it on mount.

Directory index entries are going to be used for NFS export.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c     | 42 +++++++++++++++++++++++++++++++++++-------
 fs/overlayfs/overlayfs.h |  7 +++++++
 fs/overlayfs/super.c     | 20 ++++++++++++++++----
 3 files changed, 58 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 69ca8eb07519..b00d909e7326 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -538,6 +538,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
 	struct dentry *index;
 	struct inode *inode;
 	struct qstr name;
+	bool is_dir = d_is_dir(origin);
 	int err;
 
 	err = ovl_get_index_name(origin, &name);
@@ -561,8 +562,6 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
 	inode = d_inode(index);
 	if (d_is_negative(index)) {
 		goto out_dput;
-	} else if (upper && d_inode(upper) != inode) {
-		goto out_dput;
 	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
 		   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
 		/*
@@ -576,8 +575,25 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
 				    index, d_inode(index)->i_mode & S_IFMT,
 				    d_inode(origin)->i_mode & S_IFMT);
 		goto fail;
-	}
+	} else if (is_dir) {
+		if (!upper) {
+			pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
+					    origin, index);
+			goto fail;
+		}
 
+		/* Verify that dir index 'upper' xattr points to upper dir */
+		err = ovl_verify_upper(index, upper, false);
+		if (err) {
+			if (err == -ESTALE) {
+				pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
+						    upper, origin, index);
+			}
+			goto fail;
+		}
+	} else if (upper && d_inode(upper) != inode) {
+		goto out_dput;
+	}
 out:
 	kfree(name.name);
 	return index;
@@ -646,6 +662,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 	struct ovl_path *stack = NULL;
 	struct dentry *upperdir, *upperdentry = NULL;
+	struct dentry *origin = NULL;
 	struct dentry *index = NULL;
 	unsigned int ctr = 0;
 	struct inode *inode = NULL;
@@ -739,7 +756,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 
 		/*
 		 * When "verify_lower" feature is enabled, do not merge with a
-		 * lower dir that does not match a stored origin xattr.
+		 * lower dir that does not match a stored origin xattr. In any
+		 * case, only verified origin is used for index lookup.
 		 */
 		if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) {
 			err = ovl_verify_origin(upperdentry, this, false);
@@ -747,6 +765,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 				dput(this);
 				break;
 			}
+
+			/* Bless lower dir as verified origin */
+			origin = this;
 		}
 
 		stack[ctr].dentry = this;
@@ -780,10 +801,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		}
 	}
 
-	/* Lookup index by lower inode and verify it matches upper inode */
-	if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) {
-		struct dentry *origin = stack[0].dentry;
+	/*
+	 * Lookup index by lower inode and verify it matches upper inode.
+	 * We only trust dir index if we verified that lower dir matches
+	 * origin, otherwise dir index entries may be inconsistent and we
+	 * ignore them. Always lookup index of non-dir and non-upper.
+	 */
+	if (ctr && (!upperdentry || !d.is_dir))
+		origin = stack[0].dentry;
 
+	if (origin && ovl_indexdir(dentry->d_sb) &&
+	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
 		index = ovl_lookup_index(dentry, upperdentry, origin);
 		if (IS_ERR(index)) {
 			err = PTR_ERR(index);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index db75955f9677..25794a3a3fe1 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -27,6 +27,7 @@ enum ovl_path_type {
 #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
 #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
 #define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
+#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
 
 enum ovl_flag {
 	/* Pure upper dir that may contain non pure upper entries */
@@ -266,6 +267,12 @@ static inline int ovl_verify_origin(struct dentry *upper,
 	return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set);
 }
 
+static inline int ovl_verify_upper(struct dentry *index,
+				    struct dentry *upper, bool set)
+{
+	return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set);
+}
+
 /* readdir.c */
 extern const struct file_operations ovl_dir_operations;
 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 1d538be87fa0..170c184a9f43 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1078,11 +1078,23 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
 
 	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
 	if (ofs->indexdir) {
-		/* Verify upper root is exclusively associated with index dir */
-		err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
-					upperpath->dentry, true, true);
+		/*
+		 * Verify upper root is exclusively associated with index dir.
+		 * Older kernels stored upper fh in "trusted.overlay.origin"
+		 * xattr. If that xattr exists, verify that it is a match to
+		 * upper dir file handle. In any case, verify or set xattr
+		 * "trusted.overlay.upper" to indicate that index may have
+		 * directory entries.
+		 */
+		if (ovl_check_origin_xattr(ofs->indexdir)) {
+			err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
+						upperpath->dentry, true, false);
+			if (err)
+				pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
+		}
+		err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
 		if (err)
-			pr_err("overlayfs: failed to verify index dir origin\n");
+			pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
 
 		/* Cleanup bad/stale/orphan index entries */
 		if (!err)
-- 
cgit 


From 7db25d36d9253c58afd3db837dd53e66ae3b1ac9 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 11 Jan 2018 11:03:13 +0200
Subject: ovl: verify whiteout index entries on mount

Whiteout index entries are used as an indication that an exported
overlay file handle should be treated as stale (i.e. after unlink
of the overlay inode).

Check on mount that whiteout index entries have a name that looks like
a valid file handle and cleanup invalid index entries.

For whiteout index entries, do not check that they also have valid
origin fh and nlink xattr, because those xattr do not exist for a
whiteout index entry.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index b00d909e7326..c6c79753b3b3 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -439,16 +439,13 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 	/*
 	 * Directory index entries are going to be used for looking up
 	 * redirected upper dirs by lower dir fh when decoding an overlay
-	 * file handle of a merge dir. Whiteout index entries are going to be
-	 * used as an indication that an exported overlay file handle should
-	 * be treated as stale (i.e. after unlink of the overlay inode).
-	 * We don't know the verification rules for directory and whiteout
-	 * index entries, because they have not been implemented yet, so return
-	 * EINVAL if those entries are found to abort the mount to avoid
-	 * corrupting an index that was created by a newer kernel.
+	 * file handle of a merge dir.  We don't know the verification rules
+	 * for directory index entries, because they have not been implemented
+	 * yet, so return EINVAL if those entries are found to abort the mount
+	 * and to avoid corrupting an index that was created by a newer kernel.
 	 */
 	err = -EINVAL;
-	if (d_is_dir(index) || ovl_is_whiteout(index))
+	if (d_is_dir(index))
 		goto fail;
 
 	if (index->d_name.len < sizeof(struct ovl_fh)*2)
@@ -468,6 +465,14 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 	if (err)
 		goto fail;
 
+	/*
+	 * Whiteout index entries are used as an indication that an exported
+	 * overlay file handle should be treated as stale (i.e. after unlink
+	 * of the overlay inode). These entries contain no origin xattr.
+	 */
+	if (ovl_is_whiteout(index))
+		goto out;
+
 	err = ovl_verify_fh(index, OVL_XATTR_ORIGIN, fh);
 	if (err)
 		goto fail;
-- 
cgit 


From e8f9e5b780b0406ab81add72f1a05583ae5d40ac Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 11 Jan 2018 11:33:24 +0200
Subject: ovl: verify directory index entries on mount

Directory index entries should have 'upper' xattr pointing to the real
upper dir. Verifying that the upper dir file handle is not stale is
expensive, so only verify stale directory index entries on mount if
NFS export feature is enabled.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/dcache.c          |   1 +
 fs/overlayfs/namei.c | 125 ++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 94 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 5c7df1df81ff..b5d5ea984ac4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3527,6 +3527,7 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 
 	return result;
 }
+EXPORT_SYMBOL(is_subdir);
 
 static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
 {
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c6c79753b3b3..881caa385a36 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -84,7 +84,19 @@ invalid:
 
 static int ovl_acceptable(void *ctx, struct dentry *dentry)
 {
-	return 1;
+	/*
+	 * A non-dir origin may be disconnected, which is fine, because
+	 * we only need it for its unique inode number.
+	 */
+	if (!d_is_dir(dentry))
+		return 1;
+
+	/* Don't decode a deleted empty directory */
+	if (d_unhashed(dentry))
+		return 0;
+
+	/* Check if directory belongs to the layer we are decoding from */
+	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
 }
 
 /*
@@ -160,7 +172,7 @@ invalid:
 
 static struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
 {
-	struct dentry *origin;
+	struct dentry *real;
 	int bytes;
 
 	/*
@@ -171,22 +183,28 @@ static struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
 		return NULL;
 
 	bytes = (fh->len - offsetof(struct ovl_fh, fid));
-	origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
-				    bytes >> 2, (int)fh->type,
-				    ovl_acceptable, NULL);
-	if (IS_ERR(origin)) {
-		/* Treat stale file handle as "origin unknown" */
-		if (origin == ERR_PTR(-ESTALE))
-			origin = NULL;
-		return origin;
+	real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
+				  bytes >> 2, (int)fh->type,
+				  ovl_acceptable, mnt);
+	if (IS_ERR(real)) {
+		/*
+		 * Treat stale file handle to lower file as "origin unknown".
+		 * upper file handle could become stale when upper file is
+		 * unlinked and this information is needed to handle stale
+		 * index entries correctly.
+		 */
+		if (real == ERR_PTR(-ESTALE) &&
+		    !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
+			real = NULL;
+		return real;
 	}
 
-	if (ovl_dentry_weird(origin)) {
-		dput(origin);
+	if (ovl_dentry_weird(real)) {
+		dput(real);
 		return NULL;
 	}
 
-	return origin;
+	return real;
 }
 
 static bool ovl_is_opaquedir(struct dentry *dentry)
@@ -420,6 +438,35 @@ fail:
 	goto out;
 }
 
+/* Get upper dentry from index */
+static struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
+{
+	struct ovl_fh *fh;
+	struct dentry *upper;
+
+	if (!d_is_dir(index))
+		return dget(index);
+
+	fh = ovl_get_fh(index, OVL_XATTR_UPPER);
+	if (IS_ERR_OR_NULL(fh))
+		return ERR_CAST(fh);
+
+	upper = ovl_decode_fh(fh, ofs->upper_mnt);
+	kfree(fh);
+
+	if (IS_ERR_OR_NULL(upper))
+		return upper ?: ERR_PTR(-ESTALE);
+
+	if (!d_is_dir(upper)) {
+		pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
+				    index, upper);
+		dput(upper);
+		return ERR_PTR(-EIO);
+	}
+
+	return upper;
+}
+
 /*
  * Verify that an index entry name matches the origin file handle stored in
  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
@@ -431,23 +478,13 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 	size_t len;
 	struct ovl_path origin = { };
 	struct ovl_path *stack = &origin;
+	struct dentry *upper = NULL;
 	int err;
 
 	if (!d_inode(index))
 		return 0;
 
-	/*
-	 * Directory index entries are going to be used for looking up
-	 * redirected upper dirs by lower dir fh when decoding an overlay
-	 * file handle of a merge dir.  We don't know the verification rules
-	 * for directory index entries, because they have not been implemented
-	 * yet, so return EINVAL if those entries are found to abort the mount
-	 * and to avoid corrupting an index that was created by a newer kernel.
-	 */
 	err = -EINVAL;
-	if (d_is_dir(index))
-		goto fail;
-
 	if (index->d_name.len < sizeof(struct ovl_fh)*2)
 		goto fail;
 
@@ -473,21 +510,45 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 	if (ovl_is_whiteout(index))
 		goto out;
 
-	err = ovl_verify_fh(index, OVL_XATTR_ORIGIN, fh);
-	if (err)
+	/*
+	 * Verifying directory index entries are not stale is expensive, so
+	 * only verify stale dir index if NFS export is enabled.
+	 */
+	if (d_is_dir(index) && !ofs->config.nfs_export)
+		goto out;
+
+	/*
+	 * Directory index entries should have 'upper' xattr pointing to the
+	 * real upper dir. Non-dir index entries are hardlinks to the upper
+	 * real inode. For non-dir index, we can read the copy up origin xattr
+	 * directly from the index dentry, but for dir index we first need to
+	 * decode the upper directory.
+	 */
+	upper = ovl_index_upper(ofs, index);
+	if (IS_ERR_OR_NULL(upper)) {
+		err = PTR_ERR(upper);
+		if (!err)
+			err = -ESTALE;
 		goto fail;
+	}
 
-	err = ovl_check_origin_fh(ofs, fh, index, &stack);
+	err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
+	dput(upper);
 	if (err)
 		goto fail;
 
-	/* Check if index is orphan and don't warn before cleaning it */
-	if (d_inode(index)->i_nlink == 1 &&
-	    ovl_get_nlink(origin.dentry, index, 0) == 0)
-		err = -ENOENT;
+	/* Check if non-dir index is orphan and don't warn before cleaning it */
+	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
+		err = ovl_check_origin_fh(ofs, fh, index, &stack);
+		if (err)
+			goto fail;
+
+		if (ovl_get_nlink(origin.dentry, index, 0) == 0)
+			err = -ENOENT;
+	}
 
-	dput(origin.dentry);
 out:
+	dput(origin.dentry);
 	kfree(fh);
 	return err;
 
-- 
cgit 


From 9ee60ce2491166c73a381e5f04dc4c3a147e169d Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 1 Nov 2017 10:13:51 +0200
Subject: ovl: cleanup temp index entries

A previous failed attempt to create or whiteout a directory index may
leave index entries named '#%x' in the index dir. Cleanup those temp
entries on mount instead of failing the mount.

In the future, we may drop 'work' dir and use 'index' dir instead.
This change is enough for cleaning up copy up leftovers 'from the future',
but it is not enough for cleaning up rmdir leftovers 'from the future'
(i.e. temp dir containing whiteouts).

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 881caa385a36..7f27ec5999ea 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -9,6 +9,7 @@
 
 #include <linux/fs.h>
 #include <linux/cred.h>
+#include <linux/ctype.h>
 #include <linux/namei.h>
 #include <linux/xattr.h>
 #include <linux/ratelimit.h>
@@ -467,6 +468,12 @@ static struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
 	return upper;
 }
 
+/* Is this a leftover from create/whiteout of directory index entry? */
+static bool ovl_is_temp_index(struct dentry *index)
+{
+	return index->d_name.name[0] == '#';
+}
+
 /*
  * Verify that an index entry name matches the origin file handle stored in
  * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
@@ -484,6 +491,11 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 	if (!d_inode(index))
 		return 0;
 
+	/* Cleanup leftover from index create/cleanup attempt */
+	err = -ESTALE;
+	if (ovl_is_temp_index(index))
+		goto fail;
+
 	err = -EINVAL;
 	if (index->d_name.len < sizeof(struct ovl_fh)*2)
 		goto fail;
-- 
cgit 


From 24b33ee104ecd5a4e1e71412f8966199d6a0bf02 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 26 Sep 2017 07:55:26 +0300
Subject: ovl: create ovl_need_index() helper

The helper determines which lower file needs to be indexed
on copy up and before nlink changes.

For index=on, the helper evaluates to true for lower hardlinks.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c   |  6 +-----
 fs/overlayfs/overlayfs.h |  1 +
 fs/overlayfs/util.c      | 24 ++++++++++++++++++++----
 3 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 503c92404095..103e62dcb745 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -536,11 +536,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 {
 	int err;
 	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
-	bool indexed = false;
-
-	if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
-	    c->stat.nlink > 1)
-		indexed = true;
+	bool indexed = ovl_need_index(c->dentry);
 
 	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
 		c->origin = true;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 25794a3a3fe1..d7e65284c13b 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -241,6 +241,7 @@ void ovl_clear_flag(unsigned long flag, struct inode *inode);
 bool ovl_test_flag(unsigned long flag, struct inode *inode);
 bool ovl_inuse_trylock(struct dentry *dentry);
 void ovl_inuse_unlock(struct dentry *dentry);
+bool ovl_need_index(struct dentry *dentry);
 int ovl_nlink_start(struct dentry *dentry, bool *locked);
 void ovl_nlink_end(struct dentry *dentry, bool locked);
 int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index ae81d878248e..55ab99131a72 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -463,6 +463,23 @@ void ovl_inuse_unlock(struct dentry *dentry)
 	}
 }
 
+/*
+ * Does this overlay dentry need to be indexed on copy up?
+ */
+bool ovl_need_index(struct dentry *dentry)
+{
+	struct dentry *lower = ovl_dentry_lower(dentry);
+
+	if (!lower || !ovl_indexdir(dentry->d_sb))
+		return false;
+
+	/* Index only lower hardlinks on copy up */
+	if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+		return true;
+
+	return false;
+}
+
 /* Caller must hold OVL_I(inode)->lock */
 static void ovl_cleanup_index(struct dentry *dentry)
 {
@@ -533,11 +550,11 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
 
 	/*
 	 * With inodes index is enabled, we store the union overlay nlink
-	 * in an xattr on the index inode. When whiting out lower hardlinks
+	 * in an xattr on the index inode. When whiting out an indexed lower,
 	 * we need to decrement the overlay persistent nlink, but before the
 	 * first copy up, we have no upper index inode to store the xattr.
 	 *
-	 * As a workaround, before whiteout/rename over of a lower hardlink,
+	 * As a workaround, before whiteout/rename over an indexed lower,
 	 * copy up to create the upper index. Creating the upper index will
 	 * initialize the overlay nlink, so it could be dropped if unlink
 	 * or rename succeeds.
@@ -545,8 +562,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
 	 * TODO: implement metadata only index copy up when called with
 	 *       ovl_copy_up_flags(dentry, O_PATH).
 	 */
-	if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) &&
-	    d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) {
+	if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) {
 		err = ovl_copy_up(dentry);
 		if (err)
 			return err;
-- 
cgit 


From fbd2d2074bde2e333d9a9eeda5864cd593fbe29c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 22 Nov 2017 00:08:21 +0200
Subject: ovl: index all non-dir on copy up for NFS export

With the NFS export feature enabled, all non-dir are indexed on copy up.
The copy up origin inode of an indexed non-dir can be used as a unique
identifier of the overlay object.

The full index is also used for consistency verfication, like detecting
multiple non-hardlink uppers with the same 'origin' on lookup.

Directory index on copy up will be implemented by following patch.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/util.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 55ab99131a72..7cb930e367be 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -473,6 +473,10 @@ bool ovl_need_index(struct dentry *dentry)
 	if (!lower || !ovl_indexdir(dentry->d_sb))
 		return false;
 
+	/* Index all files for NFS export and consistency verification */
+	if (!d_is_dir(lower) && ovl_index_all(dentry->d_sb))
+		return true;
+
 	/* Index only lower hardlinks on copy up */
 	if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
 		return true;
-- 
cgit 


From 016b720f5558d825bc0a4c6d2bdd6929fbe86536 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 11 Jan 2018 14:01:08 +0200
Subject: ovl: index directories on copy up for NFS export

With the NFS export feature enabled, all dirs are indexed on copy up.
Non-dir files are copied up directly to indexdir and then hardlinked
to upper dir.

Directories are copied up to indexdir, then an index entry is created
in indexdir with 'upper' xattr pointing to the copied up dir and then
the copied up dir is moved to upper dir.

Directory index is also used for consistency verification, like
detecting multiple redirected dirs to the same lower dir on lookup.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++---
 fs/overlayfs/util.c    |   2 +-
 2 files changed, 117 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 103e62dcb745..8ef25d8c3cfe 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -315,6 +315,94 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	return err;
 }
 
+/* Store file handle of @upper dir in @index dir entry */
+static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
+{
+	const struct ovl_fh *fh;
+	int err;
+
+	fh = ovl_encode_fh(upper, true);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
+	err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0);
+
+	kfree(fh);
+	return err;
+}
+
+/*
+ * Create and install index entry.
+ *
+ * Caller must hold i_mutex on indexdir.
+ */
+static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
+			    struct dentry *upper)
+{
+	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
+	struct inode *dir = d_inode(indexdir);
+	struct dentry *index = NULL;
+	struct dentry *temp = NULL;
+	struct qstr name = { };
+	int err;
+
+	/*
+	 * For now this is only used for creating index entry for directories,
+	 * because non-dir are copied up directly to index and then hardlinked
+	 * to upper dir.
+	 *
+	 * TODO: implement create index for non-dir, so we can call it when
+	 * encoding file handle for non-dir in case index does not exist.
+	 */
+	if (WARN_ON(!d_is_dir(dentry)))
+		return -EIO;
+
+	/* Directory not expected to be indexed before copy up */
+	if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
+		return -EIO;
+
+	err = ovl_get_index_name(origin, &name);
+	if (err)
+		return err;
+
+	temp = ovl_lookup_temp(indexdir);
+	if (IS_ERR(temp))
+		goto temp_err;
+
+	err = ovl_do_mkdir(dir, temp, S_IFDIR, true);
+	if (err)
+		goto out;
+
+	err = ovl_set_upper_fh(upper, temp);
+	if (err)
+		goto out_cleanup;
+
+	index = lookup_one_len(name.name, indexdir, name.len);
+	if (IS_ERR(index)) {
+		err = PTR_ERR(index);
+	} else {
+		err = ovl_do_rename(dir, temp, dir, index, 0);
+		dput(index);
+	}
+
+	if (err)
+		goto out_cleanup;
+
+out:
+	dput(temp);
+	kfree(name.name);
+	return err;
+
+temp_err:
+	err = PTR_ERR(temp);
+	temp = NULL;
+	goto out;
+
+out_cleanup:
+	ovl_cleanup(dir, temp);
+	goto out;
+}
+
 struct ovl_copy_up_ctx {
 	struct dentry *parent;
 	struct dentry *dentry;
@@ -327,6 +415,7 @@ struct ovl_copy_up_ctx {
 	struct dentry *workdir;
 	bool tmpfile;
 	bool origin;
+	bool indexed;
 };
 
 static int ovl_link_up(struct ovl_copy_up_ctx *c)
@@ -498,6 +587,12 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
 	if (err)
 		goto out_cleanup;
 
+	if (S_ISDIR(c->stat.mode) && c->indexed) {
+		err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
+		if (err)
+			goto out_cleanup;
+	}
+
 	if (c->tmpfile) {
 		inode_lock_nested(udir, I_MUTEX_PARENT);
 		err = ovl_install_temp(c, temp, &newdentry);
@@ -536,12 +631,26 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 {
 	int err;
 	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
-	bool indexed = ovl_need_index(c->dentry);
+	bool to_index = false;
 
-	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
+	/*
+	 * Indexed non-dir is copied up directly to the index entry and then
+	 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
+	 * then index entry is created and then copied up dir installed.
+	 * Copying dir up to indexdir instead of workdir simplifies locking.
+	 */
+	if (ovl_need_index(c->dentry)) {
+		c->indexed = true;
+		if (S_ISDIR(c->stat.mode))
+			c->workdir = ovl_indexdir(c->dentry->d_sb);
+		else
+			to_index = true;
+	}
+
+	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
 		c->origin = true;
 
-	if (indexed) {
+	if (to_index) {
 		c->destdir = ovl_indexdir(c->dentry->d_sb);
 		err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
 		if (err)
@@ -568,9 +677,10 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 		}
 	}
 
-	if (indexed) {
-		if (!err)
-			ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+	if (!err && c->indexed)
+		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+
+	if (to_index) {
 		kfree(c->destname.name);
 	} else if (!err) {
 		struct inode *udir = d_inode(c->destdir);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 7cb930e367be..71ddc4f8864e 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -474,7 +474,7 @@ bool ovl_need_index(struct dentry *dentry)
 		return false;
 
 	/* Index all files for NFS export and consistency verification */
-	if (!d_is_dir(lower) && ovl_index_all(dentry->d_sb))
+	if (ovl_index_all(dentry->d_sb))
 		return true;
 
 	/* Index only lower hardlinks on copy up */
-- 
cgit 


From 89a17556ce4d113f6e7896e118a14f79a84484e9 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 26 Sep 2017 07:40:37 +0300
Subject: ovl: cleanup dir index when dir nlink drops to zero

When non-dir index union nlink drops to zero the non-dir index
is cleaned. Do the same for directory type index entries when
union directory is removed.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/util.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 71ddc4f8864e..6b11e116f190 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -500,7 +500,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
 		goto fail;
 
 	inode = d_inode(upperdentry);
-	if (inode->i_nlink != 1) {
+	if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
 		pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
 				    upperdentry, inode->i_ino, inode->i_nlink);
 		/*
@@ -549,7 +549,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
 	const struct cred *old_cred;
 	int err;
 
-	if (!d_inode(dentry) || d_is_dir(dentry))
+	if (!d_inode(dentry))
 		return 0;
 
 	/*
@@ -576,7 +576,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
 	if (err)
 		return err;
 
-	if (!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+	if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
 		goto out;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
-- 
cgit 


From e7dd0e71348c1e3bc4b9d767c1ffbcbdee46a726 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 24 Oct 2017 17:38:33 +0300
Subject: ovl: whiteout index when union nlink drops to zero

With NFS export feature enabled, when overlay inode nlink drops to
zero, instead of removing the index entry, replace it with a whiteout
index entry.

This is needed for NFS export in order to prevent future open by handle
from opening the lower file directly.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c       | 58 +++++++++++++++++++++++++++++-------------------
 fs/overlayfs/overlayfs.h |  2 ++
 fs/overlayfs/util.c      | 17 +++++++++-----
 3 files changed, 48 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index a1a7606d4891..839709c7803a 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -63,8 +63,7 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
 }
 
 /* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir,
-				   struct dentry *dentry)
+static struct dentry *ovl_whiteout(struct dentry *workdir)
 {
 	int err;
 	struct dentry *whiteout;
@@ -83,6 +82,38 @@ static struct dentry *ovl_whiteout(struct dentry *workdir,
 	return whiteout;
 }
 
+/* Caller must hold i_mutex on both workdir and dir */
+int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+			     struct dentry *dentry)
+{
+	struct inode *wdir = workdir->d_inode;
+	struct dentry *whiteout;
+	int err;
+	int flags = 0;
+
+	whiteout = ovl_whiteout(workdir);
+	err = PTR_ERR(whiteout);
+	if (IS_ERR(whiteout))
+		return err;
+
+	if (d_is_dir(dentry))
+		flags = RENAME_EXCHANGE;
+
+	err = ovl_do_rename(wdir, whiteout, dir, dentry, flags);
+	if (err)
+		goto kill_whiteout;
+	if (flags)
+		ovl_cleanup(wdir, dentry);
+
+out:
+	dput(whiteout);
+	return err;
+
+kill_whiteout:
+	ovl_cleanup(wdir, whiteout);
+	goto out;
+}
+
 int ovl_create_real(struct inode *dir, struct dentry *newdentry,
 		    struct cattr *attr, struct dentry *hardlink, bool debug)
 {
@@ -591,14 +622,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
 				   struct list_head *list)
 {
 	struct dentry *workdir = ovl_workdir(dentry);
-	struct inode *wdir = workdir->d_inode;
 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
-	struct inode *udir = upperdir->d_inode;
-	struct dentry *whiteout;
 	struct dentry *upper;
 	struct dentry *opaquedir = NULL;
 	int err;
-	int flags = 0;
 
 	if (WARN_ON(!workdir))
 		return -EROFS;
@@ -627,24 +654,13 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
 		goto out_dput_upper;
 	}
 
-	whiteout = ovl_whiteout(workdir, dentry);
-	err = PTR_ERR(whiteout);
-	if (IS_ERR(whiteout))
-		goto out_dput_upper;
-
-	if (d_is_dir(upper))
-		flags = RENAME_EXCHANGE;
-
-	err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
+	err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
 	if (err)
-		goto kill_whiteout;
-	if (flags)
-		ovl_cleanup(wdir, upper);
+		goto out_d_drop;
 
 	ovl_dentry_version_inc(dentry->d_parent, true);
 out_d_drop:
 	d_drop(dentry);
-	dput(whiteout);
 out_dput_upper:
 	dput(upper);
 out_unlock:
@@ -653,10 +669,6 @@ out_dput:
 	dput(opaquedir);
 out:
 	return err;
-
-kill_whiteout:
-	ovl_cleanup(wdir, whiteout);
-	goto out_d_drop;
 }
 
 static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index d7e65284c13b..8f4313c6693b 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -321,6 +321,8 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
 /* dir.c */
 extern const struct inode_operations ovl_dir_inode_operations;
 struct dentry *ovl_lookup_temp(struct dentry *workdir);
+int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+			     struct dentry *dentry);
 struct cattr {
 	dev_t rdev;
 	umode_t mode;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 6b11e116f190..aa2234d52007 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -487,7 +487,8 @@ bool ovl_need_index(struct dentry *dentry)
 /* Caller must hold OVL_I(inode)->lock */
 static void ovl_cleanup_index(struct dentry *dentry)
 {
-	struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
+	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
+	struct inode *dir = indexdir->d_inode;
 	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
 	struct dentry *upperdentry = ovl_dentry_upper(dentry);
 	struct dentry *index = NULL;
@@ -518,13 +519,17 @@ static void ovl_cleanup_index(struct dentry *dentry)
 	}
 
 	inode_lock_nested(dir, I_MUTEX_PARENT);
-	/* TODO: whiteout instead of cleanup to block future open by handle */
-	index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len);
+	index = lookup_one_len(name.name, indexdir, name.len);
 	err = PTR_ERR(index);
-	if (!IS_ERR(index))
-		err = ovl_cleanup(dir, index);
-	else
+	if (IS_ERR(index)) {
 		index = NULL;
+	} else if (ovl_index_all(dentry->d_sb)) {
+		/* Whiteout orphan index to block future open by handle */
+		err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+	} else {
+		/* Cleanup orphan index entries */
+		err = ovl_cleanup(dir, index);
+	}
 
 	inode_unlock(dir);
 	if (err)
-- 
cgit 


From 24f0b17203691d22815e842051a014e3bde7c227 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 11 Jan 2018 15:33:51 +0200
Subject: ovl: whiteout orphan index entries on mount

Orphan index entries are non-dir index entries whose union nlink count
dropped to zero. With index=on, orphan index entries are removed on
mount. With NFS export feature enabled, orphan index entries are replaced
with white out index entries to block future open by handle from opening
the lower file.

When dir index has a stale 'upper' xattr, we assume that the upper dir
was removed and we treat the dir index as orphan entry that needs to be
whited out or removed.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c   | 19 +++++++++++++++++--
 fs/overlayfs/readdir.c | 25 +++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 7f27ec5999ea..111a64f904c2 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -539,7 +539,15 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 	upper = ovl_index_upper(ofs, index);
 	if (IS_ERR_OR_NULL(upper)) {
 		err = PTR_ERR(upper);
-		if (!err)
+		/*
+		 * Directory index entries with no 'upper' xattr need to be
+		 * removed. When dir index entry has a stale 'upper' xattr,
+		 * we assume that upper dir was removed and we treat the dir
+		 * index as orphan entry that needs to be whited out.
+		 */
+		if (err == -ESTALE)
+			goto orphan;
+		else if (!err)
 			err = -ESTALE;
 		goto fail;
 	}
@@ -556,7 +564,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 			goto fail;
 
 		if (ovl_get_nlink(origin.dentry, index, 0) == 0)
-			err = -ENOENT;
+			goto orphan;
 	}
 
 out:
@@ -568,6 +576,13 @@ fail:
 	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
 			    index, d_inode(index)->i_mode & S_IFMT, err);
 	goto out;
+
+orphan:
+	pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
+			    index, d_inode(index)->i_mode & S_IFMT,
+			    d_inode(index)->i_nlink);
+	err = -ENOENT;
+	goto out;
 }
 
 /*
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 4c660c7085b7..c11f5c0906c3 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1067,12 +1067,33 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
 			break;
 		}
 		err = ovl_verify_index(ofs, index);
-		/* Cleanup stale and orphan index entries */
-		if (err && (err == -ESTALE || err == -ENOENT))
+		if (!err) {
+			goto next;
+		} else if (err == -ESTALE) {
+			/* Cleanup stale index entries */
 			err = ovl_cleanup(dir, index);
+		} else if (err != -ENOENT) {
+			/*
+			 * Abort mount to avoid corrupting the index if
+			 * an incompatible index entry was found or on out
+			 * of memory.
+			 */
+			break;
+		} else if (ofs->config.nfs_export) {
+			/*
+			 * Whiteout orphan index to block future open by
+			 * handle after overlay nlink dropped to zero.
+			 */
+			err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+		} else {
+			/* Cleanup orphan index entries */
+			err = ovl_cleanup(dir, index);
+		}
+
 		if (err)
 			break;
 
+next:
 		dput(index);
 		index = NULL;
 	}
-- 
cgit 


From 91ffe7beb31e7e1e689a59f5ef56acea0811d81c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 28 Dec 2017 20:23:05 +0200
Subject: ovl: factor out ovl_get_index_fh() helper

The helper is needed to lookup an index by file handle for NFS export.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c     | 59 ++++++++++++++++++++++++++++++++++++++++--------
 fs/overlayfs/overlayfs.h |  1 +
 2 files changed, 50 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 111a64f904c2..49984c9f3689 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -585,6 +585,21 @@ orphan:
 	goto out;
 }
 
+static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
+{
+	char *n, *s;
+
+	n = kzalloc(fh->len * 2, GFP_KERNEL);
+	if (!n)
+		return -ENOMEM;
+
+	s  = bin2hex(n, fh, fh->len);
+	*name = (struct qstr) QSTR_INIT(n, s - n);
+
+	return 0;
+
+}
+
 /*
  * Lookup in indexdir for the index entry of a lower real inode or a copy up
  * origin inode. The index entry name is the hex representation of the lower
@@ -602,25 +617,49 @@ orphan:
  */
 int ovl_get_index_name(struct dentry *origin, struct qstr *name)
 {
-	int err;
 	struct ovl_fh *fh;
-	char *n, *s;
+	int err;
 
 	fh = ovl_encode_fh(origin, false);
 	if (IS_ERR(fh))
 		return PTR_ERR(fh);
 
-	err = -ENOMEM;
-	n = kzalloc(fh->len * 2, GFP_KERNEL);
-	if (n) {
-		s  = bin2hex(n, fh, fh->len);
-		*name = (struct qstr) QSTR_INIT(n, s - n);
-		err = 0;
-	}
-	kfree(fh);
+	err = ovl_get_index_name_fh(fh, name);
 
+	kfree(fh);
 	return err;
+}
+
+/* Lookup index by file handle for NFS export */
+struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
+{
+	struct dentry *index;
+	struct qstr name;
+	int err;
+
+	err = ovl_get_index_name_fh(fh, &name);
+	if (err)
+		return ERR_PTR(err);
+
+	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+	kfree(name.name);
+	if (IS_ERR(index)) {
+		if (PTR_ERR(index) == -ENOENT)
+			index = NULL;
+		return index;
+	}
 
+	if (d_is_negative(index))
+		err = 0;
+	else if (ovl_is_whiteout(index))
+		err = -ESTALE;
+	else if (ovl_dentry_weird(index))
+		err = -EIO;
+	else
+		return index;
+
+	dput(index);
+	return ERR_PTR(err);
 }
 
 static struct dentry *ovl_lookup_index(struct dentry *dentry,
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 8f4313c6693b..4e784f6ff484 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -257,6 +257,7 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
 		      struct dentry *real, bool is_upper, bool set);
 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
 int ovl_get_index_name(struct dentry *origin, struct qstr *name);
+struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			  unsigned int flags);
-- 
cgit 


From 0aceb53e73befee4441c9e68d23cb4f682382171 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 12 Dec 2017 23:43:16 +0200
Subject: ovl: do not pass overlay dentry to ovl_get_inode()

This is needed for using ovl_get_inode() for decoding file handles
for NFS export.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c     | 16 +++++++---------
 fs/overlayfs/namei.c     |  3 ++-
 fs/overlayfs/overlayfs.h |  5 +++--
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 96587075db11..f8f7facb7331 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -643,14 +643,14 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
 	return true;
 }
 
-struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
-			    struct dentry *index)
+struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
+			    struct dentry *lowerdentry, struct dentry *index,
+			    unsigned int numlower)
 {
-	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
 	struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
 	struct inode *inode;
 	/* Already indexed or could be indexed on copy up? */
-	bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry));
+	bool indexed = (index || (ovl_indexdir(sb) && !upperdentry));
 	struct dentry *origin = indexed ? lowerdentry : NULL;
 	bool is_dir;
 
@@ -675,7 +675,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
 		struct inode *key = d_inode(origin ?: upperdentry);
 		unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
 
-		inode = iget5_locked(dentry->d_sb, (unsigned long) key,
+		inode = iget5_locked(sb, (unsigned long) key,
 				     ovl_inode_test, ovl_inode_set, key);
 		if (!inode)
 			goto out_nomem;
@@ -699,7 +699,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
 			nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
 		set_nlink(inode, nlink);
 	} else {
-		inode = new_inode(dentry->d_sb);
+		inode = new_inode(sb);
 		if (!inode)
 			goto out_nomem;
 	}
@@ -711,9 +711,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
 
 	/* Check for non-merge dir that may have whiteouts */
 	if (is_dir) {
-		struct ovl_entry *oe = dentry->d_fsdata;
-
-		if (((upperdentry && lowerdentry) || oe->numlower > 1) ||
+		if (((upperdentry && lowerdentry) || numlower > 1) ||
 		    ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
 			ovl_set_flag(OVL_WHITEOUTS, inode);
 		}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 49984c9f3689..d69ea0a385f6 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -967,7 +967,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		upperdentry = dget(index);
 
 	if (upperdentry || ctr) {
-		inode = ovl_get_inode(dentry, upperdentry, index);
+		inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index,
+				      ctr);
 		err = PTR_ERR(inode);
 		if (IS_ERR(inode))
 			goto out_free_oe;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4e784f6ff484..f9fce7a680cd 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -307,8 +307,9 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
 bool ovl_is_private_xattr(const char *name);
 
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
-struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
-			    struct dentry *index);
+struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
+			    struct dentry *lowerdentry, struct dentry *index,
+			    unsigned int numlower);
 static inline void ovl_copyattr(struct inode *from, struct inode *to)
 {
 	to->i_uid = from->i_uid;
-- 
cgit 


From 829c28be9bb9a05aa7eeb8a68a4536cca2d3d694 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 29 Sep 2017 21:43:07 +0300
Subject: ovl: use d_splice_alias() in place of d_add() in lookup

This is required for NFS export.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index d69ea0a385f6..9d3ccbd95dde 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -982,9 +982,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	dput(index);
 	kfree(stack);
 	kfree(d.redirect);
-	d_add(dentry, inode);
-
-	return NULL;
+	return d_splice_alias(inode, dentry);
 
 out_free_oe:
 	dentry->d_fsdata = NULL;
-- 
cgit 


From aa3ff3c152ff94ef045ed802db7535167f8a21ab Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 15 Oct 2017 18:00:20 +0300
Subject: ovl: copy up of disconnected dentries

With NFS export, some operations on decoded file handles (e.g. open,
link, setattr, xattr_set) may call copy up with a disconnected non-dir.
In this case, we will copy up lower inode to index dir without
linking it to upper dir.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 56 ++++++++++++++++++++++++++++++++++++--------------
 fs/overlayfs/inode.c   |  4 +++-
 fs/overlayfs/util.c    |  7 ++++---
 3 files changed, 48 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 8ef25d8c3cfe..d855f508fa20 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -450,7 +450,10 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
 		}
 	}
 	inode_unlock(udir);
-	ovl_set_nlink_upper(c->dentry);
+	if (err)
+		return err;
+
+	err = ovl_set_nlink_upper(c->dentry);
 
 	return err;
 }
@@ -655,6 +658,9 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 		err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
 		if (err)
 			return err;
+	} else if (WARN_ON(!c->parent)) {
+		/* Disconnected dentry must be copied up to index dir */
+		return -EIO;
 	} else {
 		/*
 		 * Mark parent "impure" because it may now contain non-pure
@@ -677,12 +683,17 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 		}
 	}
 
-	if (!err && c->indexed)
+
+	if (err)
+		goto out;
+
+	if (c->indexed)
 		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
 
 	if (to_index) {
-		kfree(c->destname.name);
-	} else if (!err) {
+		/* Initialize nlink for copy up of disconnected dentry */
+		err = ovl_set_nlink_upper(c->dentry);
+	} else {
 		struct inode *udir = d_inode(c->destdir);
 
 		/* Restore timestamps on parent (best effort) */
@@ -693,6 +704,9 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 		ovl_dentry_set_upper_alias(c->dentry);
 	}
 
+out:
+	if (to_index)
+		kfree(c->destname.name);
 	return err;
 }
 
@@ -717,14 +731,17 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	if (err)
 		return err;
 
-	ovl_path_upper(parent, &parentpath);
-	ctx.destdir = parentpath.dentry;
-	ctx.destname = dentry->d_name;
+	if (parent) {
+		ovl_path_upper(parent, &parentpath);
+		ctx.destdir = parentpath.dentry;
+		ctx.destname = dentry->d_name;
 
-	err = vfs_getattr(&parentpath, &ctx.pstat,
-			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
-	if (err)
-		return err;
+		err = vfs_getattr(&parentpath, &ctx.pstat,
+				  STATX_ATIME | STATX_MTIME,
+				  AT_STATX_SYNC_AS_STAT);
+		if (err)
+			return err;
+	}
 
 	/* maybe truncate regular file. this has no effect on dirs */
 	if (flags & O_TRUNC)
@@ -745,7 +762,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	} else {
 		if (!ovl_dentry_upper(dentry))
 			err = ovl_do_copy_up(&ctx);
-		if (!err && !ovl_dentry_has_upper_alias(dentry))
+		if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
 			err = ovl_link_up(&ctx);
 		ovl_copy_up_end(dentry);
 	}
@@ -758,10 +775,19 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 {
 	int err = 0;
 	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
+	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
+
+	/*
+	 * With NFS export, copy up can get called for a disconnected non-dir.
+	 * In this case, we will copy up lower inode to index dir without
+	 * linking it to upper dir.
+	 */
+	if (WARN_ON(disconnected && d_is_dir(dentry)))
+		return -EIO;
 
 	while (!err) {
 		struct dentry *next;
-		struct dentry *parent;
+		struct dentry *parent = NULL;
 
 		/*
 		 * Check if copy-up has happened as well as for upper alias (in
@@ -777,12 +803,12 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 		 *      with rename.
 		 */
 		if (ovl_dentry_upper(dentry) &&
-		    ovl_dentry_has_upper_alias(dentry))
+		    (ovl_dentry_has_upper_alias(dentry) || disconnected))
 			break;
 
 		next = dget(dentry);
 		/* find the topmost dentry not yet copied up */
-		for (;;) {
+		for (; !disconnected;) {
 			parent = dget_parent(next);
 
 			if (ovl_dentry_upper(parent))
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index f8f7facb7331..bfd7c766b5cd 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -351,8 +351,10 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
 
 static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
 {
+	/* Copy up of disconnected dentry does not set upper alias */
 	if (ovl_dentry_upper(dentry) &&
-	    ovl_dentry_has_upper_alias(dentry))
+	    (ovl_dentry_has_upper_alias(dentry) ||
+	     (dentry->d_flags & DCACHE_DISCONNECTED)))
 		return false;
 
 	if (special_file(d_inode(dentry)->i_mode))
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index aa2234d52007..68541eb5be8e 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -229,9 +229,10 @@ void ovl_dentry_set_opaque(struct dentry *dentry)
 }
 
 /*
- * For hard links it's possible for ovl_dentry_upper() to return positive, while
- * there's no actual upper alias for the inode.  Copy up code needs to know
- * about the existence of the upper alias, so it can't use ovl_dentry_upper().
+ * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
+ * to return positive, while there's no actual upper alias for the inode.
+ * Copy up code needs to know about the existence of the upper alias, so it
+ * can't use ovl_dentry_upper().
  */
 bool ovl_dentry_has_upper_alias(struct dentry *dentry)
 {
-- 
cgit 


From c62520a83bceae0bb0b7b3de10c3e81205cd3823 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 14 Jan 2018 19:25:31 +0200
Subject: ovl: store 'has_upper' and 'opaque' as bit flags

We need to make some room in struct ovl_entry to store information
about redirected ancestors for NFS export, so cram two booleans as
bit flags.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c     |  7 ++++---
 fs/overlayfs/overlayfs.h | 10 +++++++++-
 fs/overlayfs/ovl_entry.h |  8 ++++++--
 fs/overlayfs/super.c     |  6 +++---
 fs/overlayfs/util.c      | 30 +++++++++++++++++++-----------
 5 files changed, 41 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 9d3ccbd95dde..ca15893cfaa9 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -957,10 +957,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	if (!oe)
 		goto out_put;
 
-	oe->opaque = upperopaque;
 	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
 	dentry->d_fsdata = oe;
 
+	if (upperopaque)
+		ovl_dentry_set_opaque(dentry);
+
 	if (upperdentry)
 		ovl_dentry_set_upper_alias(dentry);
 	else if (index)
@@ -1003,7 +1005,6 @@ out:
 
 bool ovl_lower_positive(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
 	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
 	const struct qstr *name = &dentry->d_name;
 	const struct cred *old_cred;
@@ -1016,7 +1017,7 @@ bool ovl_lower_positive(struct dentry *dentry)
 	 * whiteout.
 	 */
 	if (!dentry->d_inode)
-		return oe->opaque;
+		return ovl_dentry_is_opaque(dentry);
 
 	/* Negative upper -> positive lower */
 	if (!ovl_dentry_upper(dentry))
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index f9fce7a680cd..2dddcd257eb3 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -29,7 +29,7 @@ enum ovl_path_type {
 #define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
 #define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
 
-enum ovl_flag {
+enum ovl_inode_flag {
 	/* Pure upper dir that may contain non pure upper entries */
 	OVL_IMPURE,
 	/* Non-merge dir that may contain whiteout entries */
@@ -37,6 +37,11 @@ enum ovl_flag {
 	OVL_INDEX,
 };
 
+enum ovl_entry_flag {
+	OVL_E_UPPER_ALIAS,
+	OVL_E_OPAQUE,
+};
+
 /*
  * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
  * where:
@@ -213,6 +218,9 @@ struct inode *ovl_inode_lower(struct inode *inode);
 struct inode *ovl_inode_real(struct inode *inode);
 struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
 void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
+void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
+void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry);
+bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
 bool ovl_dentry_is_opaque(struct dentry *dentry);
 bool ovl_dentry_is_whiteout(struct dentry *dentry);
 void ovl_dentry_set_opaque(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 6dd60fcf8cb7..bfef6edcc111 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -61,8 +61,7 @@ struct ovl_fs {
 struct ovl_entry {
 	union {
 		struct {
-			unsigned long has_upper;
-			bool opaque;
+			unsigned long flags;
 		};
 		struct rcu_head rcu;
 	};
@@ -72,6 +71,11 @@ struct ovl_entry {
 
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 
+static inline struct ovl_entry *OVL_E(struct dentry *dentry)
+{
+	return (struct ovl_entry *) dentry->d_fsdata;
+}
+
 struct ovl_inode {
 	struct ovl_dir_cache *cache;
 	const char *redirect;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 170c184a9f43..fccdcfae68e9 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1345,15 +1345,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!root_dentry)
 		goto out_free_oe;
 
+	root_dentry->d_fsdata = oe;
+
 	mntput(upperpath.mnt);
 	if (upperpath.dentry) {
-		oe->has_upper = true;
+		ovl_dentry_set_upper_alias(root_dentry);
 		if (ovl_is_impuredir(upperpath.dentry))
 			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
 	}
 
-	root_dentry->d_fsdata = oe;
-
 	/* Root is always merge -> can have whiteouts */
 	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
 	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 68541eb5be8e..930784a26623 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -210,10 +210,24 @@ void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
 	OVL_I(inode)->cache = cache;
 }
 
+void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
+{
+	set_bit(flag, &OVL_E(dentry)->flags);
+}
+
+void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
+{
+	clear_bit(flag, &OVL_E(dentry)->flags);
+}
+
+bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
+{
+	return test_bit(flag, &OVL_E(dentry)->flags);
+}
+
 bool ovl_dentry_is_opaque(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-	return oe->opaque;
+	return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry);
 }
 
 bool ovl_dentry_is_whiteout(struct dentry *dentry)
@@ -223,9 +237,7 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry)
 
 void ovl_dentry_set_opaque(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	oe->opaque = true;
+	ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
 }
 
 /*
@@ -236,16 +248,12 @@ void ovl_dentry_set_opaque(struct dentry *dentry)
  */
 bool ovl_dentry_has_upper_alias(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return oe->has_upper;
+	return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry);
 }
 
 void ovl_dentry_set_upper_alias(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	oe->has_upper = true;
+	ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
 }
 
 bool ovl_redirect_dir(struct super_block *sb)
-- 
cgit 


From f9c34674bc60e5fc0af6ec6513517ed9182862b9 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 19 Jan 2018 11:39:52 +0100
Subject: vfs: factor out helpers d_instantiate_anon() and d_alloc_anon()

Those helpers are going to be used by overlayfs to implement
NFS export decode.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/dcache.c | 87 +++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index b5d5ea984ac4..99bce0ed0213 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1699,9 +1699,15 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 }
 EXPORT_SYMBOL(d_alloc);
 
+struct dentry *d_alloc_anon(struct super_block *sb)
+{
+	return __d_alloc(sb, NULL);
+}
+EXPORT_SYMBOL(d_alloc_anon);
+
 struct dentry *d_alloc_cursor(struct dentry * parent)
 {
-	struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+	struct dentry *dentry = d_alloc_anon(parent->d_sb);
 	if (dentry) {
 		dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
 		dentry->d_parent = dget(parent);
@@ -1887,7 +1893,7 @@ struct dentry *d_make_root(struct inode *root_inode)
 	struct dentry *res = NULL;
 
 	if (root_inode) {
-		res = __d_alloc(root_inode->i_sb, NULL);
+		res = d_alloc_anon(root_inode->i_sb);
 		if (res)
 			d_instantiate(res, root_inode);
 		else
@@ -1926,33 +1932,19 @@ struct dentry *d_find_any_alias(struct inode *inode)
 }
 EXPORT_SYMBOL(d_find_any_alias);
 
-static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
+static struct dentry *__d_instantiate_anon(struct dentry *dentry,
+					   struct inode *inode,
+					   bool disconnected)
 {
-	struct dentry *tmp;
 	struct dentry *res;
 	unsigned add_flags;
 
-	if (!inode)
-		return ERR_PTR(-ESTALE);
-	if (IS_ERR(inode))
-		return ERR_CAST(inode);
-
-	res = d_find_any_alias(inode);
-	if (res)
-		goto out_iput;
-
-	tmp = __d_alloc(inode->i_sb, NULL);
-	if (!tmp) {
-		res = ERR_PTR(-ENOMEM);
-		goto out_iput;
-	}
-
-	security_d_instantiate(tmp, inode);
+	security_d_instantiate(dentry, inode);
 	spin_lock(&inode->i_lock);
 	res = __d_find_any_alias(inode);
 	if (res) {
 		spin_unlock(&inode->i_lock);
-		dput(tmp);
+		dput(dentry);
 		goto out_iput;
 	}
 
@@ -1962,22 +1954,55 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
 	if (disconnected)
 		add_flags |= DCACHE_DISCONNECTED;
 
-	spin_lock(&tmp->d_lock);
-	__d_set_inode_and_type(tmp, inode, add_flags);
-	hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
-	hlist_bl_lock(&tmp->d_sb->s_anon);
-	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
-	hlist_bl_unlock(&tmp->d_sb->s_anon);
-	spin_unlock(&tmp->d_lock);
+	spin_lock(&dentry->d_lock);
+	__d_set_inode_and_type(dentry, inode, add_flags);
+	hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+	hlist_bl_lock(&dentry->d_sb->s_anon);
+	hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_anon);
+	hlist_bl_unlock(&dentry->d_sb->s_anon);
+	spin_unlock(&dentry->d_lock);
 	spin_unlock(&inode->i_lock);
 
-	return tmp;
+	return dentry;
 
  out_iput:
 	iput(inode);
 	return res;
 }
 
+struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode)
+{
+	return __d_instantiate_anon(dentry, inode, true);
+}
+EXPORT_SYMBOL(d_instantiate_anon);
+
+static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
+{
+	struct dentry *tmp;
+	struct dentry *res;
+
+	if (!inode)
+		return ERR_PTR(-ESTALE);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
+	res = d_find_any_alias(inode);
+	if (res)
+		goto out_iput;
+
+	tmp = d_alloc_anon(inode->i_sb);
+	if (!tmp) {
+		res = ERR_PTR(-ENOMEM);
+		goto out_iput;
+	}
+
+	return __d_instantiate_anon(tmp, inode, disconnected);
+
+out_iput:
+	iput(inode);
+	return res;
+}
+
 /**
  * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
  * @inode: inode to allocate the dentry for
@@ -1998,7 +2023,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
  */
 struct dentry *d_obtain_alias(struct inode *inode)
 {
-	return __d_obtain_alias(inode, 1);
+	return __d_obtain_alias(inode, true);
 }
 EXPORT_SYMBOL(d_obtain_alias);
 
@@ -2019,7 +2044,7 @@ EXPORT_SYMBOL(d_obtain_alias);
  */
 struct dentry *d_obtain_root(struct inode *inode)
 {
-	return __d_obtain_alias(inode, 0);
+	return __d_obtain_alias(inode, false);
 }
 EXPORT_SYMBOL(d_obtain_root);
 
-- 
cgit 


From 8ed5eec9d6c4c013aa657ebefbd10a1a0d15893d Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 12 Jul 2017 14:17:16 +0300
Subject: ovl: encode pure upper file handles

Encode overlay file handles as struct ovl_fh containing the file handle
encoding of the real upper inode.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/Makefile    |  3 +-
 fs/overlayfs/export.c    | 98 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/overlayfs/overlayfs.h |  6 +++
 3 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 fs/overlayfs/export.c

(limited to 'fs')

diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 99373bbc1478..30802347a020 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -4,4 +4,5 @@
 
 obj-$(CONFIG_OVERLAY_FS) += overlay.o
 
-overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o
+overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \
+		export.o
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
new file mode 100644
index 000000000000..67b907ca9cdc
--- /dev/null
+++ b/fs/overlayfs/export.c
@@ -0,0 +1,98 @@
+/*
+ * Overlayfs NFS export support.
+ *
+ * Amir Goldstein <amir73il@gmail.com>
+ *
+ * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/cred.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
+{
+	struct dentry *upper = ovl_dentry_upper(dentry);
+	struct dentry *origin = ovl_dentry_lower(dentry);
+	struct ovl_fh *fh = NULL;
+	int err;
+
+	/*
+	 * On overlay with an upper layer, overlay root inode is encoded as
+	 * an upper file handle, because upper root dir is not indexed.
+	 */
+	if (dentry == dentry->d_sb->s_root && upper)
+		origin = NULL;
+
+	err = -EACCES;
+	if (!upper || origin)
+		goto fail;
+
+	/* TODO: encode non pure-upper by origin */
+	fh = ovl_encode_fh(upper, true);
+
+	err = -EOVERFLOW;
+	if (fh->len > buflen)
+		goto fail;
+
+	memcpy(buf, (char *)fh, fh->len);
+	err = fh->len;
+
+out:
+	kfree(fh);
+	return err;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
+			    dentry, err, buflen, fh ? (int)fh->len : 0,
+			    fh ? fh->type : 0);
+	goto out;
+}
+
+static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
+{
+	int res, len = *max_len << 2;
+
+	res = ovl_d_to_fh(dentry, (char *)fid, len);
+	if (res <= 0)
+		return FILEID_INVALID;
+
+	len = res;
+
+	/* Round up to dwords */
+	*max_len = (len + 3) >> 2;
+	return OVL_FILEID;
+}
+
+static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len,
+			       struct inode *parent)
+{
+	struct dentry *dentry;
+	int type;
+
+	/* TODO: encode connectable file handles */
+	if (parent)
+		return FILEID_INVALID;
+
+	dentry = d_find_any_alias(inode);
+	if (WARN_ON(!dentry))
+		return FILEID_INVALID;
+
+	type = ovl_dentry_to_fh(dentry, fid, max_len);
+
+	dput(dentry);
+	return type;
+}
+
+const struct export_operations ovl_export_operations = {
+	.encode_fh	= ovl_encode_inode_fh,
+};
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 2dddcd257eb3..f2baa2ccaacd 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -68,6 +68,9 @@ enum ovl_entry_flag {
 #error Endianness not defined
 #endif
 
+/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */
+#define OVL_FILEID	0xfb
+
 /* On-disk and in-memeory format for redirect by file handle */
 struct ovl_fh {
 	u8 version;	/* 0 */
@@ -351,3 +354,6 @@ int ovl_set_attr(struct dentry *upper, struct kstat *stat);
 struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper);
 int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 		   struct dentry *upper);
+
+/* export.c */
+extern const struct export_operations ovl_export_operations;
-- 
cgit 


From 8556a4205b111c4dac931ee5eba4fcce74c3cb21 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 19 Jan 2018 01:03:23 +0200
Subject: ovl: decode pure upper file handles

Decoding an upper file handle is done by decoding the upper dentry from
underlying upper fs, finding or allocating an overlay inode that is
hashed by the real upper inode and instantiating an overlay dentry with
that inode.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c    | 97 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/overlayfs/namei.c     |  4 +-
 fs/overlayfs/overlayfs.h |  2 +
 3 files changed, 101 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 67b907ca9cdc..a7d57bf9c9d8 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -93,6 +93,103 @@ static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len,
 	return type;
 }
 
+/*
+ * Find or instantiate an overlay dentry from real dentries.
+ */
+static struct dentry *ovl_obtain_alias(struct super_block *sb,
+				       struct dentry *upper,
+				       struct ovl_path *lowerpath)
+{
+	struct inode *inode;
+	struct dentry *dentry;
+	struct ovl_entry *oe;
+	void *fsdata = &oe;
+
+	/* TODO: obtain non pure-upper */
+	if (lowerpath)
+		return ERR_PTR(-EIO);
+
+	inode = ovl_get_inode(sb, dget(upper), NULL, NULL, 0);
+	if (IS_ERR(inode)) {
+		dput(upper);
+		return ERR_CAST(inode);
+	}
+
+	dentry = d_find_any_alias(inode);
+	if (!dentry) {
+		dentry = d_alloc_anon(inode->i_sb);
+		if (!dentry)
+			goto nomem;
+		oe = ovl_alloc_entry(0);
+		if (!oe)
+			goto nomem;
+
+		dentry->d_fsdata = oe;
+		ovl_dentry_set_upper_alias(dentry);
+	}
+
+	return d_instantiate_anon(dentry, inode);
+
+nomem:
+	iput(inode);
+	dput(dentry);
+	return ERR_PTR(-ENOMEM);
+}
+
+static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
+					struct ovl_fh *fh)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct dentry *dentry;
+	struct dentry *upper;
+
+	if (!ofs->upper_mnt)
+		return ERR_PTR(-EACCES);
+
+	upper = ovl_decode_fh(fh, ofs->upper_mnt);
+	if (IS_ERR_OR_NULL(upper))
+		return upper;
+
+	dentry = ovl_obtain_alias(sb, upper, NULL);
+	dput(upper);
+
+	return dentry;
+}
+
+static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
+				       int fh_len, int fh_type)
+{
+	struct dentry *dentry = NULL;
+	struct ovl_fh *fh = (struct ovl_fh *) fid;
+	int len = fh_len << 2;
+	unsigned int flags = 0;
+	int err;
+
+	err = -EINVAL;
+	if (fh_type != OVL_FILEID)
+		goto out_err;
+
+	err = ovl_check_fh_len(fh, len);
+	if (err)
+		goto out_err;
+
+	/* TODO: decode non-upper */
+	flags = fh->flags;
+	if (flags & OVL_FH_FLAG_PATH_UPPER)
+		dentry = ovl_upper_fh_to_d(sb, fh);
+	err = PTR_ERR(dentry);
+	if (IS_ERR(dentry) && err != -ESTALE)
+		goto out_err;
+
+	return dentry;
+
+out_err:
+	pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
+			    len, fh_type, flags, err);
+	return ERR_PTR(err);
+}
+
 const struct export_operations ovl_export_operations = {
 	.encode_fh	= ovl_encode_inode_fh,
+	.fh_to_dentry	= ovl_fh_to_dentry,
 };
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index ca15893cfaa9..a35c5eaa2c01 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -107,7 +107,7 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
  * Return -ENODATA for "origin unknown".
  * Return <0 for an invalid file handle.
  */
-static int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
 {
 	if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
 		return -EINVAL;
@@ -171,7 +171,7 @@ invalid:
 	goto out;
 }
 
-static struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
+struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
 {
 	struct dentry *real;
 	int bytes;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index f2baa2ccaacd..401113a2e9c7 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -264,6 +264,8 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 
 
 /* namei.c */
+int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
+struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt);
 int ovl_verify_set_fh(struct dentry *dentry, const char *name,
 		      struct dentry *real, bool is_upper, bool set);
 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
-- 
cgit 


From 3985b70a3e3f58109dc6ae347eafe6e8610be41e Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 28 Dec 2017 18:36:16 +0200
Subject: ovl: decode connected upper dir file handles

Until this change, we decoded upper file handles by instantiating an
overlay dentry from the real upper dentry. This is sufficient to handle
pure upper files, but insufficient to handle merge/impure dirs.

To that end, if decoded real upper dir is connected and hashed, we
lookup an overlay dentry with the same path as the real upper dir.
If decoded real upper is non-dir, we instantiate a disconnected overlay
dentry as before this change.

Because ovl_fh_to_dentry() returns a connected overlay dir dentry,
exportfs never needs to call get_parent() and get_name() to reconnect an
upper overlay dir. Because connectable non-dir file handles are not
supported, exportfs will not be able to use fh_to_parent() and get_name()
methods to reconnect a disconnected non-dir to its parent. Therefore, the
methods get_parent() and get_name() are implemented just to print out a
sanity warning and the method fh_to_parent() is implemented to warn the
user that using the 'subtree_check' exportfs option is not supported.

An alternative approach could have been to implement instantiating of
an overlay directory inode from origin/index and implement get_parent()
and get_name() by calling into underlying fs operations and them
instantiating the overlay parent dir.

The reasons for not choosing the get_parent() approach were:
- Obtaining a disconnected overlay dir dentry would requires a
  delicate re-factoring of ovl_lookup() to get a dentry with overlay
  parent info. It was preferred to avoid doing that re-factoring unless
  it was proven worthy.
- Going down the path of disconnected dir would mean that the (non
  trivial) code path of d_splice_alias() could be traveled and that
  meant writing more tests and introduces race cases that are very hard
  to hit on purpose. Taking the path of connecting overlay dentry by
  forward lookup is therefore the safe and boring way to avoid surprises.

The culprits of the chosen "connected overlay dentry" approach:
- We need to take special care to rename of ancestors while connecting
  the overlay dentry by real dentry path. These subtleties are usually
  handled by generic exportfs and VFS code.
- In a hypothetical workload, we could end up in a loop trying to connect,
  interrupted by rename and restarting connect forever.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 230 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index a7d57bf9c9d8..09fbfa83eeff 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -136,6 +136,204 @@ nomem:
 	return ERR_PTR(-ENOMEM);
 }
 
+/*
+ * Lookup a child overlay dentry to get a connected overlay dentry whose real
+ * dentry is @real. If @real is on upper layer, we lookup a child overlay
+ * dentry with the same name as the real dentry. Otherwise, we need to consult
+ * index for lookup.
+ */
+static struct dentry *ovl_lookup_real_one(struct dentry *connected,
+					  struct dentry *real,
+					  struct ovl_layer *layer)
+{
+	struct inode *dir = d_inode(connected);
+	struct dentry *this, *parent = NULL;
+	struct name_snapshot name;
+	int err;
+
+	/* TODO: lookup by lower real dentry */
+	if (layer->idx)
+		return ERR_PTR(-EACCES);
+
+	/*
+	 * Lookup child overlay dentry by real name. The dir mutex protects us
+	 * from racing with overlay rename. If the overlay dentry that is above
+	 * real has already been moved to a parent that is not under the
+	 * connected overlay dir, we return -ECHILD and restart the lookup of
+	 * connected real path from the top.
+	 */
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	err = -ECHILD;
+	parent = dget_parent(real);
+	if (ovl_dentry_upper(connected) != parent)
+		goto fail;
+
+	/*
+	 * We also need to take a snapshot of real dentry name to protect us
+	 * from racing with underlying layer rename. In this case, we don't
+	 * care about returning ESTALE, only from dereferencing a free name
+	 * pointer because we hold no lock on the real dentry.
+	 */
+	take_dentry_name_snapshot(&name, real);
+	this = lookup_one_len(name.name, connected, strlen(name.name));
+	err = PTR_ERR(this);
+	if (IS_ERR(this)) {
+		goto fail;
+	} else if (!this || !this->d_inode) {
+		dput(this);
+		err = -ENOENT;
+		goto fail;
+	} else if (ovl_dentry_upper(this) != real) {
+		dput(this);
+		err = -ESTALE;
+		goto fail;
+	}
+
+out:
+	release_dentry_name_snapshot(&name);
+	dput(parent);
+	inode_unlock(dir);
+	return this;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+			    real, layer->idx, connected, err);
+	this = ERR_PTR(err);
+	goto out;
+}
+
+/*
+ * Lookup a connected overlay dentry whose real dentry is @real.
+ * If @real is on upper layer, we lookup a child overlay dentry with the same
+ * path the real dentry. Otherwise, we need to consult index for lookup.
+ */
+static struct dentry *ovl_lookup_real(struct super_block *sb,
+				      struct dentry *real,
+				      struct ovl_layer *layer)
+{
+	struct dentry *connected;
+	int err = 0;
+
+	/* TODO: use index when looking up by lower real dentry */
+	if (layer->idx)
+		return ERR_PTR(-EACCES);
+
+	connected = dget(sb->s_root);
+	while (!err) {
+		struct dentry *next, *this;
+		struct dentry *parent = NULL;
+		struct dentry *real_connected = ovl_dentry_upper(connected);
+
+		if (real_connected == real)
+			break;
+
+		/* Find the topmost dentry not yet connected */
+		next = dget(real);
+		for (;;) {
+			parent = dget_parent(next);
+
+			if (parent == real_connected)
+				break;
+
+			/*
+			 * If real has been moved out of 'real_connected',
+			 * we will not find 'real_connected' and hit the layer
+			 * root. In that case, we need to restart connecting.
+			 * This game can go on forever in the worst case. We
+			 * may want to consider taking s_vfs_rename_mutex if
+			 * this happens more than once.
+			 */
+			if (parent == layer->mnt->mnt_root) {
+				dput(connected);
+				connected = dget(sb->s_root);
+				break;
+			}
+
+			/*
+			 * If real file has been moved out of the layer root
+			 * directory, we will eventully hit the real fs root.
+			 * This cannot happen by legit overlay rename, so we
+			 * return error in that case.
+			 */
+			if (parent == next) {
+				err = -EXDEV;
+				break;
+			}
+
+			dput(next);
+			next = parent;
+		}
+
+		if (!err) {
+			this = ovl_lookup_real_one(connected, next, layer);
+			if (IS_ERR(this))
+				err = PTR_ERR(this);
+
+			/*
+			 * Lookup of child in overlay can fail when racing with
+			 * overlay rename of child away from 'connected' parent.
+			 * In this case, we need to restart the lookup from the
+			 * top, because we cannot trust that 'real_connected' is
+			 * still an ancestor of 'real'.
+			 */
+			if (err == -ECHILD) {
+				this = dget(sb->s_root);
+				err = 0;
+			}
+			if (!err) {
+				dput(connected);
+				connected = this;
+			}
+		}
+
+		dput(parent);
+		dput(next);
+	}
+
+	if (err)
+		goto fail;
+
+	return connected;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+			    real, layer->idx, connected, err);
+	dput(connected);
+	return ERR_PTR(err);
+}
+
+/*
+ * Get an overlay dentry from upper/lower real dentries.
+ */
+static struct dentry *ovl_get_dentry(struct super_block *sb,
+				     struct dentry *upper,
+				     struct ovl_path *lowerpath)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
+
+	/* TODO: get non-upper dentry */
+	if (!upper)
+		return ERR_PTR(-EACCES);
+
+	/*
+	 * Obtain a disconnected overlay dentry from a non-dir real upper
+	 * dentry.
+	 */
+	if (!d_is_dir(upper))
+		return ovl_obtain_alias(sb, upper, NULL);
+
+	/* Removed empty directory? */
+	if ((upper->d_flags & DCACHE_DISCONNECTED) || d_unhashed(upper))
+		return ERR_PTR(-ENOENT);
+
+	/*
+	 * If real upper dentry is connected and hashed, get a connected
+	 * overlay dentry with the same path as the real upper dentry.
+	 */
+	return ovl_lookup_real(sb, upper, &upper_layer);
+}
+
 static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
 					struct ovl_fh *fh)
 {
@@ -150,7 +348,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
 	if (IS_ERR_OR_NULL(upper))
 		return upper;
 
-	dentry = ovl_obtain_alias(sb, upper, NULL);
+	dentry = ovl_get_dentry(sb, upper, NULL);
 	dput(upper);
 
 	return dentry;
@@ -189,7 +387,38 @@ out_err:
 	return ERR_PTR(err);
 }
 
+static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
+				       int fh_len, int fh_type)
+{
+	pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
+	return ERR_PTR(-EACCES);
+}
+
+static int ovl_get_name(struct dentry *parent, char *name,
+			struct dentry *child)
+{
+	/*
+	 * ovl_fh_to_dentry() returns connected dir overlay dentries and
+	 * ovl_fh_to_parent() is not implemented, so we should not get here.
+	 */
+	WARN_ON_ONCE(1);
+	return -EIO;
+}
+
+static struct dentry *ovl_get_parent(struct dentry *dentry)
+{
+	/*
+	 * ovl_fh_to_dentry() returns connected dir overlay dentries, so we
+	 * should not get here.
+	 */
+	WARN_ON_ONCE(1);
+	return ERR_PTR(-EIO);
+}
+
 const struct export_operations ovl_export_operations = {
 	.encode_fh	= ovl_encode_inode_fh,
 	.fh_to_dentry	= ovl_fh_to_dentry,
+	.fh_to_parent	= ovl_fh_to_parent,
+	.get_name	= ovl_get_name,
+	.get_parent	= ovl_get_parent,
 };
-- 
cgit 


From b305e8443f3a87e794927085106db7ebc99a4f74 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 18 Jan 2018 13:14:55 +0200
Subject: ovl: encode non-indexed upper file handles

We only need to encode origin if there is a chance that the same object was
encoded pre copy up and then we need to stay consistent with the same
encoding also after copy up.

In case a non-pure upper is not indexed, then it was copied up before NFS
export support was enabled. In that case, we don't need to worry about
staying consistent with pre copy up encoding and we encode an upper file
handle.

This mitigates the problem that with no index, we cannot find an upper
inode from origin inode, so we cannot decode a non-indexed upper from
origin file handle.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c | 40 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 09fbfa83eeff..862c368883c9 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -19,6 +19,40 @@
 #include <linux/ratelimit.h>
 #include "overlayfs.h"
 
+/*
+ * We only need to encode origin if there is a chance that the same object was
+ * encoded pre copy up and then we need to stay consistent with the same
+ * encoding also after copy up. If non-pure upper is not indexed, then it was
+ * copied up before NFS export was enabled. In that case we don't need to worry
+ * about staying consistent with pre copy up encoding and we encode an upper
+ * file handle. Overlay root dentry is a private case of non-indexed upper.
+ *
+ * The following table summarizes the different file handle encodings used for
+ * different overlay object types:
+ *
+ *  Object type		| Encoding
+ * --------------------------------
+ *  Pure upper		| U
+ *  Non-indexed upper	| U
+ *  Indexed upper	| L
+ *  Non-upper		| L
+ *
+ * U = upper file handle
+ * L = lower file handle
+ */
+static bool ovl_should_encode_origin(struct dentry *dentry)
+{
+	if (!ovl_dentry_lower(dentry))
+		return false;
+
+	/* Decoding a non-indexed upper from origin is not implemented */
+	if (ovl_dentry_upper(dentry) &&
+	    !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		return false;
+
+	return true;
+}
+
 static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
 {
 	struct dentry *upper = ovl_dentry_upper(dentry);
@@ -26,11 +60,7 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
 	struct ovl_fh *fh = NULL;
 	int err;
 
-	/*
-	 * On overlay with an upper layer, overlay root inode is encoded as
-	 * an upper file handle, because upper root dir is not indexed.
-	 */
-	if (dentry == dentry->d_sb->s_root && upper)
+	if (!ovl_should_encode_origin(dentry))
 		origin = NULL;
 
 	err = -EACCES;
-- 
cgit 


From 05e1f11816d7952ef26cc37fdd6637f834d675a9 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 18 Jan 2018 13:15:26 +0200
Subject: ovl: copy up before encoding non-connectable dir file handle

Decoding a merge dir, whose origin's parent is under a redirected
lower dir is not always possible. As a simple aproximation, we do
not encode lower dir file handles when overlay has multiple lower
layers and origin is below the topmost lower layer.

We should later relax this condition and copy up only the parent
that is under a redirected lower.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 862c368883c9..9da498ea75db 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -34,17 +34,35 @@
  * --------------------------------
  *  Pure upper		| U
  *  Non-indexed upper	| U
- *  Indexed upper	| L
- *  Non-upper		| L
+ *  Indexed upper	| L (*)
+ *  Non-upper		| L (*)
  *
  * U = upper file handle
  * L = lower file handle
+ *
+ * (*) Connecting an overlay dir from real lower dentry is not always
+ * possible when there are redirects in lower layers. To mitigate this case,
+ * we copy up the lower dir first and then encode an upper dir file handle.
  */
 static bool ovl_should_encode_origin(struct dentry *dentry)
 {
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
 	if (!ovl_dentry_lower(dentry))
 		return false;
 
+	/*
+	 * Decoding a merge dir, whose origin's parent is under a redirected
+	 * lower dir is not always possible. As a simple aproximation, we do
+	 * not encode lower dir file handles when overlay has multiple lower
+	 * layers and origin is below the topmost lower layer.
+	 *
+	 * TODO: copy up only the parent that is under redirected lower.
+	 */
+	if (d_is_dir(dentry) && ofs->upper_mnt &&
+	    OVL_E(dentry)->lowerstack[0].layer->idx > 1)
+		return false;
+
 	/* Decoding a non-indexed upper from origin is not implemented */
 	if (ovl_dentry_upper(dentry) &&
 	    !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
@@ -53,16 +71,43 @@ static bool ovl_should_encode_origin(struct dentry *dentry)
 	return true;
 }
 
+static int ovl_encode_maybe_copy_up(struct dentry *dentry)
+{
+	int err;
+
+	if (ovl_dentry_upper(dentry))
+		return 0;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		return err;
+
+	err = ovl_copy_up(dentry);
+
+	ovl_drop_write(dentry);
+	return err;
+}
+
 static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
 {
-	struct dentry *upper = ovl_dentry_upper(dentry);
+	struct dentry *upper;
 	struct dentry *origin = ovl_dentry_lower(dentry);
 	struct ovl_fh *fh = NULL;
 	int err;
 
-	if (!ovl_should_encode_origin(dentry))
+	/*
+	 * If we should not encode a lower dir file handle, copy up and encode
+	 * an upper dir file handle.
+	 */
+	if (!ovl_should_encode_origin(dentry)) {
+		err = ovl_encode_maybe_copy_up(dentry);
+		if (err)
+			goto fail;
+
 		origin = NULL;
+	}
 
+	upper = ovl_dentry_upper(dentry);
 	err = -EACCES;
 	if (!upper || origin)
 		goto fail;
-- 
cgit 


From 03e1c584ffbcb4ce05c3c61b76aceab4c12d6b68 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 28 Dec 2017 19:35:21 +0200
Subject: ovl: encode lower file handles

For indexed or lower non-dir, encode a non-connectable lower file handle
from origin inode. For indexed or lower dir, when ofs->numlower == 1,
encode a lower file handle from lower dir.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 9da498ea75db..8e37a07b9eff 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -90,7 +90,6 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry)
 
 static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
 {
-	struct dentry *upper;
 	struct dentry *origin = ovl_dentry_lower(dentry);
 	struct ovl_fh *fh = NULL;
 	int err;
@@ -107,13 +106,8 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
 		origin = NULL;
 	}
 
-	upper = ovl_dentry_upper(dentry);
-	err = -EACCES;
-	if (!upper || origin)
-		goto fail;
-
-	/* TODO: encode non pure-upper by origin */
-	fh = ovl_encode_fh(upper, true);
+	/* Encode an upper or origin file handle */
+	fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin);
 
 	err = -EOVERFLOW;
 	if (fh->len > buflen)
-- 
cgit 


From f941866fc4a8ad0d0b861cc2dbffa06a9f5e8963 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 19 Jan 2018 21:33:44 +0200
Subject: ovl: decode lower non-dir file handles

Decoding a lower non-dir file handle is done by decoding the lower dentry
from underlying lower fs, finding or allocating an overlay inode that is
hashed by the real lower inode and instantiating an overlay dentry with
that inode.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c    | 53 ++++++++++++++++++++++++++++++++++++++----------
 fs/overlayfs/namei.c     |  7 +++----
 fs/overlayfs/overlayfs.h |  2 ++
 3 files changed, 47 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 8e37a07b9eff..8c0172d9b922 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -169,16 +169,16 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
 				       struct dentry *upper,
 				       struct ovl_path *lowerpath)
 {
-	struct inode *inode;
+	struct dentry *lower = lowerpath ? lowerpath->dentry : NULL;
 	struct dentry *dentry;
+	struct inode *inode;
 	struct ovl_entry *oe;
-	void *fsdata = &oe;
 
-	/* TODO: obtain non pure-upper */
-	if (lowerpath)
+	/* TODO: obtain an indexed non-dir upper with origin */
+	if (lower && (upper || d_is_dir(lower)))
 		return ERR_PTR(-EIO);
 
-	inode = ovl_get_inode(sb, dget(upper), NULL, NULL, 0);
+	inode = ovl_get_inode(sb, dget(upper), lower, NULL, !!lower);
 	if (IS_ERR(inode)) {
 		dput(upper);
 		return ERR_CAST(inode);
@@ -189,12 +189,17 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
 		dentry = d_alloc_anon(inode->i_sb);
 		if (!dentry)
 			goto nomem;
-		oe = ovl_alloc_entry(0);
+		oe = ovl_alloc_entry(lower ? 1 : 0);
 		if (!oe)
 			goto nomem;
 
+		if (lower) {
+			oe->lowerstack->dentry = dget(lower);
+			oe->lowerstack->layer = lowerpath->layer;
+		}
 		dentry->d_fsdata = oe;
-		ovl_dentry_set_upper_alias(dentry);
+		if (upper)
+			ovl_dentry_set_upper_alias(dentry);
 	}
 
 	return d_instantiate_anon(dentry, inode);
@@ -381,7 +386,14 @@ static struct dentry *ovl_get_dentry(struct super_block *sb,
 	struct ovl_fs *ofs = sb->s_fs_info;
 	struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
 
-	/* TODO: get non-upper dentry */
+	/*
+	 * Obtain a disconnected overlay dentry from a disconnected non-dir
+	 * real lower dentry.
+	 */
+	if (!upper && !d_is_dir(lowerpath->dentry))
+		return ovl_obtain_alias(sb, NULL, lowerpath);
+
+	/* TODO: lookup connected dir from real lower dir */
 	if (!upper)
 		return ERR_PTR(-EACCES);
 
@@ -423,6 +435,25 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
 	return dentry;
 }
 
+static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
+					struct ovl_fh *fh)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct ovl_path origin = { };
+	struct ovl_path *stack = &origin;
+	struct dentry *dentry = NULL;
+	int err;
+
+	err = ovl_check_origin_fh(ofs, fh, NULL, &stack);
+	if (err)
+		return ERR_PTR(err);
+
+	dentry = ovl_get_dentry(sb, NULL, &origin);
+	dput(origin.dentry);
+
+	return dentry;
+}
+
 static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
 				       int fh_len, int fh_type)
 {
@@ -440,10 +471,10 @@ static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
 	if (err)
 		goto out_err;
 
-	/* TODO: decode non-upper */
 	flags = fh->flags;
-	if (flags & OVL_FH_FLAG_PATH_UPPER)
-		dentry = ovl_upper_fh_to_d(sb, fh);
+	dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
+		 ovl_upper_fh_to_d(sb, fh) :
+		 ovl_lower_fh_to_d(sb, fh);
 	err = PTR_ERR(dentry);
 	if (IS_ERR(dentry) && err != -ESTALE)
 		goto out_err;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index a35c5eaa2c01..741a42d974a3 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -310,9 +310,8 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 }
 
 
-static int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
-			       struct dentry *upperdentry,
-			       struct ovl_path **stackp)
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+			struct dentry *upperdentry, struct ovl_path **stackp)
 {
 	struct dentry *origin = NULL;
 	int i;
@@ -328,7 +327,7 @@ static int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
 	else if (IS_ERR(origin))
 		return PTR_ERR(origin);
 
-	if (!ovl_is_whiteout(upperdentry) &&
+	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
 	    ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
 		goto invalid;
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 401113a2e9c7..40ba11e412b1 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -266,6 +266,8 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 /* namei.c */
 int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
 struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt);
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+			struct dentry *upperdentry, struct ovl_path **stackp);
 int ovl_verify_set_fh(struct dentry *dentry, const char *name,
 		      struct dentry *real, bool is_upper, bool set);
 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
-- 
cgit 


From f71bd9cfb692ec80236b186419bf907eb5fa348c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 19 Jan 2018 21:36:20 +0200
Subject: ovl: decode indexed non-dir file handles

Decoding an indexed non-dir file handle is similar to decoding a lower
non-dir file handle, but additionally, we lookup the file handle in index
dir by name to find the real upper inode.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c | 71 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 46 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 8c0172d9b922..f475a10eec07 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -163,27 +163,32 @@ static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len,
 }
 
 /*
- * Find or instantiate an overlay dentry from real dentries.
+ * Find or instantiate an overlay dentry from real dentries and index.
  */
 static struct dentry *ovl_obtain_alias(struct super_block *sb,
-				       struct dentry *upper,
-				       struct ovl_path *lowerpath)
+				       struct dentry *upper_alias,
+				       struct ovl_path *lowerpath,
+				       struct dentry *index)
 {
 	struct dentry *lower = lowerpath ? lowerpath->dentry : NULL;
+	struct dentry *upper = upper_alias ?: index;
 	struct dentry *dentry;
 	struct inode *inode;
 	struct ovl_entry *oe;
 
-	/* TODO: obtain an indexed non-dir upper with origin */
-	if (lower && (upper || d_is_dir(lower)))
+	/* We get overlay directory dentries with ovl_lookup_real() */
+	if (d_is_dir(upper ?: lower))
 		return ERR_PTR(-EIO);
 
-	inode = ovl_get_inode(sb, dget(upper), lower, NULL, !!lower);
+	inode = ovl_get_inode(sb, dget(upper), lower, index, !!lower);
 	if (IS_ERR(inode)) {
 		dput(upper);
 		return ERR_CAST(inode);
 	}
 
+	if (index)
+		ovl_set_flag(OVL_INDEX, inode);
+
 	dentry = d_find_any_alias(inode);
 	if (!dentry) {
 		dentry = d_alloc_anon(inode->i_sb);
@@ -198,7 +203,7 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
 			oe->lowerstack->layer = lowerpath->layer;
 		}
 		dentry->d_fsdata = oe;
-		if (upper)
+		if (upper_alias)
 			ovl_dentry_set_upper_alias(dentry);
 	}
 
@@ -377,33 +382,28 @@ fail:
 }
 
 /*
- * Get an overlay dentry from upper/lower real dentries.
+ * Get an overlay dentry from upper/lower real dentries and index.
  */
 static struct dentry *ovl_get_dentry(struct super_block *sb,
 				     struct dentry *upper,
-				     struct ovl_path *lowerpath)
+				     struct ovl_path *lowerpath,
+				     struct dentry *index)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
 	struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
+	struct dentry *real = upper ?: (index ?: lowerpath->dentry);
 
 	/*
-	 * Obtain a disconnected overlay dentry from a disconnected non-dir
-	 * real lower dentry.
+	 * Obtain a disconnected overlay dentry from a non-dir real dentry
+	 * and index.
 	 */
-	if (!upper && !d_is_dir(lowerpath->dentry))
-		return ovl_obtain_alias(sb, NULL, lowerpath);
+	if (!d_is_dir(real))
+		return ovl_obtain_alias(sb, upper, lowerpath, index);
 
 	/* TODO: lookup connected dir from real lower dir */
 	if (!upper)
 		return ERR_PTR(-EACCES);
 
-	/*
-	 * Obtain a disconnected overlay dentry from a non-dir real upper
-	 * dentry.
-	 */
-	if (!d_is_dir(upper))
-		return ovl_obtain_alias(sb, upper, NULL);
-
 	/* Removed empty directory? */
 	if ((upper->d_flags & DCACHE_DISCONNECTED) || d_unhashed(upper))
 		return ERR_PTR(-ENOENT);
@@ -429,7 +429,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
 	if (IS_ERR_OR_NULL(upper))
 		return upper;
 
-	dentry = ovl_get_dentry(sb, upper, NULL);
+	dentry = ovl_get_dentry(sb, upper, NULL, NULL);
 	dput(upper);
 
 	return dentry;
@@ -442,16 +442,37 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 	struct ovl_path origin = { };
 	struct ovl_path *stack = &origin;
 	struct dentry *dentry = NULL;
+	struct dentry *index = NULL;
 	int err;
 
+	/* First lookup indexed upper by fh */
+	if (ofs->indexdir) {
+		index = ovl_get_index_fh(ofs, fh);
+		err = PTR_ERR(index);
+		if (IS_ERR(index))
+			return ERR_PTR(err);
+	}
+
+	/* Then lookup origin by fh */
 	err = ovl_check_origin_fh(ofs, fh, NULL, &stack);
-	if (err)
-		return ERR_PTR(err);
+	if (err) {
+		goto out_err;
+	} else if (index) {
+		err = ovl_verify_origin(index, origin.dentry, false);
+		if (err)
+			goto out_err;
+	}
 
-	dentry = ovl_get_dentry(sb, NULL, &origin);
-	dput(origin.dentry);
+	dentry = ovl_get_dentry(sb, NULL, &origin, index);
 
+out:
+	dput(origin.dentry);
+	dput(index);
 	return dentry;
+
+out_err:
+	dentry = ERR_PTR(err);
+	goto out;
 }
 
 static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
-- 
cgit 


From 9436a1a339fae84698aaa0b66d7a822018388348 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 24 Dec 2017 18:28:04 +0200
Subject: ovl: decode lower file handles of unlinked but open files

Lookup overlay inode in cache by origin inode, so we can decode a file
handle of an open file even if the index has a whiteout index entry to
mark this overlay inode was unlinked.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c    | 23 +++++++++++++++++++++--
 fs/overlayfs/inode.c     | 16 ++++++++++++++++
 fs/overlayfs/overlayfs.h |  1 +
 3 files changed, 38 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index f475a10eec07..0bca38c79244 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -443,14 +443,22 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 	struct ovl_path *stack = &origin;
 	struct dentry *dentry = NULL;
 	struct dentry *index = NULL;
+	struct inode *inode = NULL;
+	bool is_deleted = false;
 	int err;
 
 	/* First lookup indexed upper by fh */
 	if (ofs->indexdir) {
 		index = ovl_get_index_fh(ofs, fh);
 		err = PTR_ERR(index);
-		if (IS_ERR(index))
-			return ERR_PTR(err);
+		if (IS_ERR(index)) {
+			if (err != -ESTALE)
+				return ERR_PTR(err);
+
+			/* Found a whiteout index - treat as deleted inode */
+			is_deleted = true;
+			index = NULL;
+		}
 	}
 
 	/* Then lookup origin by fh */
@@ -461,6 +469,16 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 		err = ovl_verify_origin(index, origin.dentry, false);
 		if (err)
 			goto out_err;
+	} else if (is_deleted) {
+		/* Lookup deleted non-dir by origin inode */
+		if (!d_is_dir(origin.dentry))
+			inode = ovl_lookup_inode(sb, origin.dentry);
+		err = -ESTALE;
+		if (!inode || atomic_read(&inode->i_count) == 1)
+			goto out_err;
+
+		/* Deleted but still open? */
+		index = dget(ovl_i_dentry_upper(inode));
 	}
 
 	dentry = ovl_get_dentry(sb, NULL, &origin, index);
@@ -468,6 +486,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 out:
 	dput(origin.dentry);
 	dput(index);
+	iput(inode);
 	return dentry;
 
 out_err:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index bfd7c766b5cd..56ba015b9f5e 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -645,6 +645,22 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
 	return true;
 }
 
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin)
+{
+	struct inode *inode, *key = d_inode(origin);
+
+	inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
+	if (!inode)
+		return NULL;
+
+	if (!ovl_verify_inode(inode, origin, NULL)) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+
+	return inode;
+}
+
 struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 			    struct dentry *lowerdentry, struct dentry *index,
 			    unsigned int numlower)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 40ba11e412b1..a47f9142b6be 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -322,6 +322,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
 bool ovl_is_private_xattr(const char *name);
 
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin);
 struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 			    struct dentry *lowerdentry, struct dentry *index,
 			    unsigned int numlower);
-- 
cgit 


From 3b0bfc6ed3c434800e5eacfb6cdbe45c07c270e1 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 24 Dec 2017 18:42:16 +0200
Subject: ovl: decode indexed dir file handles

Decoding an indexed dir file handle is done by looking up the file handle
in index dir by name and then decoding the upper dir from the index origin
file handle. The decoded upper path is used to lookup an overlay dentry of
the same path.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c    | 13 +++++++++++++
 fs/overlayfs/namei.c     |  2 +-
 fs/overlayfs/overlayfs.h |  1 +
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 0bca38c79244..7a4b6a0fd527 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -461,6 +461,19 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 		}
 	}
 
+	/* Then try to get upper dir by index */
+	if (index && d_is_dir(index)) {
+		struct dentry *upper = ovl_index_upper(ofs, index);
+
+		err = PTR_ERR(upper);
+		if (IS_ERR_OR_NULL(upper))
+			goto out_err;
+
+		dentry = ovl_get_dentry(sb, upper, NULL, NULL);
+		dput(upper);
+		goto out;
+	}
+
 	/* Then lookup origin by fh */
 	err = ovl_check_origin_fh(ofs, fh, NULL, &stack);
 	if (err) {
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 741a42d974a3..6199bf7a77c7 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -439,7 +439,7 @@ fail:
 }
 
 /* Get upper dentry from index */
-static struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
 {
 	struct ovl_fh *fh;
 	struct dentry *upper;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index a47f9142b6be..a5d415aec131 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -270,6 +270,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
 			struct dentry *upperdentry, struct ovl_path **stackp);
 int ovl_verify_set_fh(struct dentry *dentry, const char *name,
 		      struct dentry *real, bool is_upper, bool set);
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
 int ovl_get_index_name(struct dentry *origin, struct qstr *name);
 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
-- 
cgit 


From 988925164f659bf74061d3036e14873753c937d2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 17 Jan 2018 22:32:44 +0200
Subject: ovl: decode pure lower dir file handles

Similar to decoding a pure upper dir file handle, decoding a pure lower
dir file handle is implemented by looking an overlay dentry of the same
path as the pure lower path and verifying that the overlay dentry's
real lower matches the decoded real lower file handle.

Unlike the case of upper dir file handle, the lookup of overlay path by
lower real path can fail or find a mismatched overlay dentry if any of
the lower parents have been copied up and renamed. To address this case
we will need to check if any of the lower parents are indexed.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c | 43 ++++++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 7a4b6a0fd527..361174810ce8 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -215,6 +215,23 @@ nomem:
 	return ERR_PTR(-ENOMEM);
 }
 
+/* Get the upper or lower dentry in stach whose on layer @idx */
+static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	int i;
+
+	if (!idx)
+		return ovl_dentry_upper(dentry);
+
+	for (i = 0; i < oe->numlower; i++) {
+		if (oe->lowerstack[i].layer->idx == idx)
+			return oe->lowerstack[i].dentry;
+	}
+
+	return NULL;
+}
+
 /*
  * Lookup a child overlay dentry to get a connected overlay dentry whose real
  * dentry is @real. If @real is on upper layer, we lookup a child overlay
@@ -230,10 +247,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
 	struct name_snapshot name;
 	int err;
 
-	/* TODO: lookup by lower real dentry */
-	if (layer->idx)
-		return ERR_PTR(-EACCES);
-
 	/*
 	 * Lookup child overlay dentry by real name. The dir mutex protects us
 	 * from racing with overlay rename. If the overlay dentry that is above
@@ -244,7 +257,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
 	inode_lock_nested(dir, I_MUTEX_PARENT);
 	err = -ECHILD;
 	parent = dget_parent(real);
-	if (ovl_dentry_upper(connected) != parent)
+	if (ovl_dentry_real_at(connected, layer->idx) != parent)
 		goto fail;
 
 	/*
@@ -262,7 +275,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
 		dput(this);
 		err = -ENOENT;
 		goto fail;
-	} else if (ovl_dentry_upper(this) != real) {
+	} else if (ovl_dentry_real_at(this, layer->idx) != real) {
 		dput(this);
 		err = -ESTALE;
 		goto fail;
@@ -294,14 +307,13 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
 	int err = 0;
 
 	/* TODO: use index when looking up by lower real dentry */
-	if (layer->idx)
-		return ERR_PTR(-EACCES);
 
 	connected = dget(sb->s_root);
 	while (!err) {
 		struct dentry *next, *this;
 		struct dentry *parent = NULL;
-		struct dentry *real_connected = ovl_dentry_upper(connected);
+		struct dentry *real_connected = ovl_dentry_real_at(connected,
+								   layer->idx);
 
 		if (real_connected == real)
 			break;
@@ -391,6 +403,7 @@ static struct dentry *ovl_get_dentry(struct super_block *sb,
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
 	struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
+	struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer;
 	struct dentry *real = upper ?: (index ?: lowerpath->dentry);
 
 	/*
@@ -400,19 +413,15 @@ static struct dentry *ovl_get_dentry(struct super_block *sb,
 	if (!d_is_dir(real))
 		return ovl_obtain_alias(sb, upper, lowerpath, index);
 
-	/* TODO: lookup connected dir from real lower dir */
-	if (!upper)
-		return ERR_PTR(-EACCES);
-
 	/* Removed empty directory? */
-	if ((upper->d_flags & DCACHE_DISCONNECTED) || d_unhashed(upper))
+	if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real))
 		return ERR_PTR(-ENOENT);
 
 	/*
-	 * If real upper dentry is connected and hashed, get a connected
-	 * overlay dentry with the same path as the real upper dentry.
+	 * If real dentry is connected and hashed, get a connected overlay
+	 * dentry whose real dentry is @real.
 	 */
-	return ovl_lookup_real(sb, upper, &upper_layer);
+	return ovl_lookup_real(sb, real, layer);
 }
 
 static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
-- 
cgit 


From 7a9dadef9684aaf738e7ce7e2a9284cc5e165ebc Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 10 Jul 2017 15:55:55 +0300
Subject: ovl: hash non-indexed dir by upper inode for NFS export

Non-indexed upper dirs are encoded as upper file handles. When NFS export
is enabled, hash non-indexed directory inodes by upper inode, so we can
find them in inode cache using the decoded upper inode.

When NFS export is disabled, directories are not indexed on copy up, so
hash non-indexed directory inodes by origin inode, the same hash key
that is used before copy up.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 56ba015b9f5e..416dc06835db 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -665,6 +665,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 			    struct dentry *lowerdentry, struct dentry *index,
 			    unsigned int numlower)
 {
+	struct ovl_fs *ofs = sb->s_fs_info;
 	struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
 	struct inode *inode;
 	/* Already indexed or could be indexed on copy up? */
@@ -684,9 +685,10 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 	 * Hash non-dir that is or could be indexed by origin inode.
 	 * Hash dir that is or could be merged by origin inode.
 	 * Hash pure upper and non-indexed non-dir by upper inode.
+	 * Hash non-indexed dir by upper inode for NFS export.
 	 */
 	is_dir = S_ISDIR(realinode->i_mode);
-	if (is_dir)
+	if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt))
 		origin = lowerdentry;
 
 	if (upperdentry || origin) {
-- 
cgit 


From 4b91c30a5a19332e8dd10b601d05b72caf657730 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 18 Jan 2018 16:39:13 +0200
Subject: ovl: lookup connected ancestor of dir in inode cache

Decoding a dir file handle requires walking backward up to layer root and
for lower dir also checking the index to see if any of the parents have
been copied up.

Lookup overlay ancestor dentry in inode/dentry cache by decoded real
parents to shortcut looking up all the way back to layer root.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c    | 99 +++++++++++++++++++++++++++++++++++++++++++++---
 fs/overlayfs/inode.c     | 21 +++++++---
 fs/overlayfs/overlayfs.h |  3 +-
 3 files changed, 110 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 361174810ce8..092e6e8c9258 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -294,6 +294,88 @@ fail:
 	goto out;
 }
 
+/*
+ * Lookup an indexed or hashed overlay dentry by real inode.
+ */
+static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
+					    struct dentry *real,
+					    struct ovl_layer *layer)
+{
+	struct dentry *this = NULL;
+	struct inode *inode;
+
+	inode = ovl_lookup_inode(sb, real, !layer->idx);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	if (inode) {
+		this = d_find_any_alias(inode);
+		iput(inode);
+	}
+
+	/* TODO: use index when looking up by origin inode */
+	if (!this)
+		return NULL;
+
+	if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
+		dput(this);
+		this = ERR_PTR(-EIO);
+	}
+
+	return this;
+}
+
+/*
+ * Lookup an indexed or hashed overlay dentry, whose real dentry is an
+ * ancestor of @real.
+ */
+static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
+					       struct dentry *real,
+					       struct ovl_layer *layer)
+{
+	struct dentry *next, *parent = NULL;
+	struct dentry *ancestor = ERR_PTR(-EIO);
+
+	if (real == layer->mnt->mnt_root)
+		return dget(sb->s_root);
+
+	/* Find the topmost indexed or hashed ancestor */
+	next = dget(real);
+	for (;;) {
+		parent = dget_parent(next);
+
+		/*
+		 * Lookup a matching overlay dentry in inode/dentry
+		 * cache or in index by real inode.
+		 */
+		ancestor = ovl_lookup_real_inode(sb, next, layer);
+		if (ancestor)
+			break;
+
+		if (parent == layer->mnt->mnt_root) {
+			ancestor = dget(sb->s_root);
+			break;
+		}
+
+		/*
+		 * If @real has been moved out of the layer root directory,
+		 * we will eventully hit the real fs root. This cannot happen
+		 * by legit overlay rename, so we return error in that case.
+		 */
+		if (parent == next) {
+			ancestor = ERR_PTR(-EXDEV);
+			break;
+		}
+
+		dput(next);
+		next = parent;
+	}
+
+	dput(parent);
+	dput(next);
+
+	return ancestor;
+}
+
 /*
  * Lookup a connected overlay dentry whose real dentry is @real.
  * If @real is on upper layer, we lookup a child overlay dentry with the same
@@ -306,9 +388,10 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
 	struct dentry *connected;
 	int err = 0;
 
-	/* TODO: use index when looking up by lower real dentry */
+	connected = ovl_lookup_real_ancestor(sb, real, layer);
+	if (IS_ERR(connected))
+		return connected;
 
-	connected = dget(sb->s_root);
 	while (!err) {
 		struct dentry *next, *this;
 		struct dentry *parent = NULL;
@@ -365,11 +448,15 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
 			 * overlay rename of child away from 'connected' parent.
 			 * In this case, we need to restart the lookup from the
 			 * top, because we cannot trust that 'real_connected' is
-			 * still an ancestor of 'real'.
+			 * still an ancestor of 'real'. There is a good chance
+			 * that the renamed overlay ancestor is now in cache, so
+			 * ovl_lookup_real_ancestor() will find it and we can
+			 * continue to connect exactly from where lookup failed.
 			 */
 			if (err == -ECHILD) {
-				this = dget(sb->s_root);
-				err = 0;
+				this = ovl_lookup_real_ancestor(sb, real,
+								layer);
+				err = IS_ERR(this) ? PTR_ERR(this) : 0;
 			}
 			if (!err) {
 				dput(connected);
@@ -494,7 +581,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 	} else if (is_deleted) {
 		/* Lookup deleted non-dir by origin inode */
 		if (!d_is_dir(origin.dentry))
-			inode = ovl_lookup_inode(sb, origin.dentry);
+			inode = ovl_lookup_inode(sb, origin.dentry, false);
 		err = -ESTALE;
 		if (!inode || atomic_read(&inode->i_count) == 1)
 			goto out_err;
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 416dc06835db..fcd97b783fa1 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -614,9 +614,15 @@ static int ovl_inode_set(struct inode *inode, void *data)
 }
 
 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
-			     struct dentry *upperdentry)
+			     struct dentry *upperdentry, bool strict)
 {
-	if (S_ISDIR(inode->i_mode)) {
+	/*
+	 * For directories, @strict verify from lookup path performs consistency
+	 * checks, so NULL lower/upper in dentry must match NULL lower/upper in
+	 * inode. Non @strict verify from NFS handle decode path passes NULL for
+	 * 'unknown' lower/upper.
+	 */
+	if (S_ISDIR(inode->i_mode) && strict) {
 		/* Real lower dir moved to upper layer under us? */
 		if (!lowerdentry && ovl_inode_lower(inode))
 			return false;
@@ -645,15 +651,17 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
 	return true;
 }
 
-struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin)
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
+			       bool is_upper)
 {
-	struct inode *inode, *key = d_inode(origin);
+	struct inode *inode, *key = d_inode(real);
 
 	inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
 	if (!inode)
 		return NULL;
 
-	if (!ovl_verify_inode(inode, origin, NULL)) {
+	if (!ovl_verify_inode(inode, is_upper ? NULL : real,
+			      is_upper ? real : NULL, false)) {
 		iput(inode);
 		return ERR_PTR(-ESTALE);
 	}
@@ -704,7 +712,8 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 			 * Verify that the underlying files stored in the inode
 			 * match those in the dentry.
 			 */
-			if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) {
+			if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
+					      true)) {
 				iput(inode);
 				inode = ERR_PTR(-ESTALE);
 				goto out;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index a5d415aec131..bf17bf97c50f 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -323,7 +323,8 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
 bool ovl_is_private_xattr(const char *name);
 
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
-struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin);
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
+			       bool is_upper);
 struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 			    struct dentry *lowerdentry, struct dentry *index,
 			    unsigned int numlower);
-- 
cgit 


From 061701540349c30d72e48a201449a840c77ad509 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 17 Jan 2018 14:40:27 +0200
Subject: ovl: lookup indexed ancestor of lower dir

ovl_lookup_real() in lower layer walks back lower parents to find the
topmost indexed parent. If an indexed ancestor is found before reaching
lower layer root, ovl_lookup_real() is called recursively with upper
layer to walk back from indexed upper to the topmost connected/hashed
upper parent (or up to root).

ovl_lookup_real() in upper layer then walks forward to connect the topmost
upper overlay dir dentry and ovl_lookup_real() in lower layer continues to
walk forward to connect the decoded lower overlay dir dentry.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c    | 41 ++++++++++++++++++++++++++++++++++++++++-
 fs/overlayfs/namei.c     | 20 ++++++++++++++------
 fs/overlayfs/overlayfs.h |  2 ++
 3 files changed, 56 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 092e6e8c9258..b65ea49de457 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -294,6 +294,10 @@ fail:
 	goto out;
 }
 
+static struct dentry *ovl_lookup_real(struct super_block *sb,
+				      struct dentry *real,
+				      struct ovl_layer *layer);
+
 /*
  * Lookup an indexed or hashed overlay dentry by real inode.
  */
@@ -301,9 +305,16 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
 					    struct dentry *real,
 					    struct ovl_layer *layer)
 {
+	struct ovl_fs *ofs = sb->s_fs_info;
+	struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
+	struct dentry *index = NULL;
 	struct dentry *this = NULL;
 	struct inode *inode;
 
+	/*
+	 * Decoding upper dir from index is expensive, so first try to lookup
+	 * overlay dentry in inode/dcache.
+	 */
 	inode = ovl_lookup_inode(sb, real, !layer->idx);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
@@ -312,7 +323,35 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
 		iput(inode);
 	}
 
-	/* TODO: use index when looking up by origin inode */
+	/*
+	 * For decoded lower dir file handle, lookup index by origin to check
+	 * if lower dir was copied up and and/or removed.
+	 */
+	if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) {
+		index = ovl_lookup_index(ofs, NULL, real, false);
+		if (IS_ERR(index))
+			return index;
+	}
+
+	/* Get connected upper overlay dir from index */
+	if (index) {
+		struct dentry *upper = ovl_index_upper(ofs, index);
+
+		dput(index);
+		if (IS_ERR_OR_NULL(upper))
+			return upper;
+
+		/*
+		 * ovl_lookup_real() in lower layer may call recursively once to
+		 * ovl_lookup_real() in upper layer. The first level call walks
+		 * back lower parents to the topmost indexed parent. The second
+		 * recursive call walks back from indexed upper to the topmost
+		 * connected/hashed upper parent (or up to root).
+		 */
+		this = ovl_lookup_real(sb, upper, &upper_layer);
+		dput(upper);
+	}
+
 	if (!this)
 		return NULL;
 
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 6199bf7a77c7..c5449efd96d5 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -661,11 +661,9 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
 	return ERR_PTR(err);
 }
 
-static struct dentry *ovl_lookup_index(struct dentry *dentry,
-				       struct dentry *upper,
-				       struct dentry *origin)
+struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+				struct dentry *origin, bool verify)
 {
-	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 	struct dentry *index;
 	struct inode *inode;
 	struct qstr name;
@@ -693,6 +691,16 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
 	inode = d_inode(index);
 	if (d_is_negative(index)) {
 		goto out_dput;
+	} else if (ovl_is_whiteout(index) && !verify) {
+		/*
+		 * When index lookup is called with !verify for decoding an
+		 * overlay file handle, a whiteout index implies that decode
+		 * should treat file handle as stale and no need to print a
+		 * warning about it.
+		 */
+		dput(index);
+		index = ERR_PTR(-ESTALE);
+		goto out;
 	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
 		   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
 		/*
@@ -706,7 +714,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
 				    index, d_inode(index)->i_mode & S_IFMT,
 				    d_inode(origin)->i_mode & S_IFMT);
 		goto fail;
-	} else if (is_dir) {
+	} else if (is_dir && verify) {
 		if (!upper) {
 			pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
 					    origin, index);
@@ -943,7 +951,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 
 	if (origin && ovl_indexdir(dentry->d_sb) &&
 	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
-		index = ovl_lookup_index(dentry, upperdentry, origin);
+		index = ovl_lookup_index(ofs, upperdentry, origin, true);
 		if (IS_ERR(index)) {
 			err = PTR_ERR(index);
 			index = NULL;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index bf17bf97c50f..0df25a9c94bd 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -274,6 +274,8 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
 int ovl_get_index_name(struct dentry *origin, struct qstr *name);
 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
+struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+				struct dentry *origin, bool verify);
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			  unsigned int flags);
-- 
cgit 


From 8383f1748829e1a6a07988863ed13a47fb653387 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 2 Oct 2017 11:31:42 +0300
Subject: ovl: wire up NFS export operations

Now that NFS export operations are implemented, enable overlayfs NFS
export support if the "nfs_export" feature is enabled.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fccdcfae68e9..9ee37c76091d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1331,6 +1331,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		}
 	}
 
+	if (ofs->config.nfs_export)
+		sb->s_export_op = &ovl_export_operations;
+
 	/* Never override disk quota limits or use reserved space */
 	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
 
-- 
cgit 


From 131fa58d391fc0939f6c66b23776ad5df5db20f9 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 29 Jan 2018 18:49:35 -0800
Subject: xfs: fix u32 type usage in sb validation function

Don't use u32, use uint32_t, because this won't work in xfsprogs.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
---
 fs/xfs/libxfs/xfs_sb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 46af6aa60a8e..a55f7a45fa78 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -118,8 +118,8 @@ xfs_mount_validate_sb(
 	bool		check_inprogress,
 	bool		check_version)
 {
-	u32		agcount = 0;
-	u32		rem;
+	uint32_t	agcount = 0;
+	uint32_t	rem;
 
 	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
 		xfs_warn(mp, "bad magic number");
-- 
cgit 


From c472c07bfed9c87d7e0b2c052d7e77fedd7109a9 Mon Sep 17 00:00:00 2001
From: Goffredo Baroncelli <kreijack@inwind.it>
Date: Thu, 1 Feb 2018 08:15:25 -0500
Subject: iversion: Rename make inode_cmp_iversion{+raw} to
 inode_eq_iversion{+raw}

The function inode_cmp_iversion{+raw} is counter-intuitive, because it
returns true when the counters are different and false when these are equal.

Rename it to inode_eq_iversion{+raw}, which will returns true when
the counters are equal and false otherwise.

Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/affs/dir.c       | 2 +-
 fs/exofs/dir.c      | 2 +-
 fs/ext2/dir.c       | 2 +-
 fs/ext4/dir.c       | 4 ++--
 fs/ext4/inline.c    | 2 +-
 fs/fat/namei_vfat.c | 2 +-
 fs/nfs/inode.c      | 6 +++---
 fs/ocfs2/dir.c      | 4 ++--
 fs/ufs/dir.c        | 2 +-
 9 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index d180b46453cf..b2bf7016e1b3 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -81,7 +81,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
 	 * we can jump directly to where we left off.
 	 */
 	ino = (u32)(long)file->private_data;
-	if (ino && inode_cmp_iversion(inode, file->f_version) == 0) {
+	if (ino && inode_eq_iversion(inode, file->f_version)) {
 		pr_debug("readdir() left off=%d\n", ino);
 		goto inside;
 	}
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index c5a53fcc43ea..f0138674c1ed 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -242,7 +242,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
 	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(exofs_chunk_size(inode)-1);
-	bool need_revalidate = inode_cmp_iversion(inode, file->f_version);
+	bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
 
 	if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1))
 		return 0;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 4111085a129f..3b8114def693 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -294,7 +294,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
 	unsigned char *types = NULL;
-	bool need_revalidate = inode_cmp_iversion(inode, file->f_version);
+	bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
 
 	if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
 		return 0;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index afda0a0499ce..da87cf757f7d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -209,7 +209,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 		 * readdir(2), then we might be pointing to an invalid
 		 * dirent right now.  Scan from the start of the block
 		 * to make sure. */
-		if (inode_cmp_iversion(inode, file->f_version)) {
+		if (!inode_eq_iversion(inode, file->f_version)) {
 			for (i = 0; i < sb->s_blocksize && i < offset; ) {
 				de = (struct ext4_dir_entry_2 *)
 					(bh->b_data + i);
@@ -569,7 +569,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
 		 * cached entries.
 		 */
 		if ((!info->curr_node) ||
-		    inode_cmp_iversion(inode, file->f_version)) {
+		    !inode_eq_iversion(inode, file->f_version)) {
 			info->curr_node = NULL;
 			free_rb_tree_fname(&info->root);
 			file->f_version = inode_query_iversion(inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index a8b987b71173..adfc1f360dae 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1495,7 +1495,7 @@ int ext4_read_inline_dir(struct file *file,
 	 * dirent right now.  Scan from the start of the inline
 	 * dir to make sure.
 	 */
-	if (inode_cmp_iversion(inode, file->f_version)) {
+	if (!inode_eq_iversion(inode, file->f_version)) {
 		for (i = 0; i < extra_size && i < offset;) {
 			/*
 			 * "." is with offset 0 and
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index cefea792cde8..2649759c478a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -46,7 +46,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
 {
 	int ret = 1;
 	spin_lock(&dentry->d_lock);
-	if (inode_cmp_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry)))
+	if (!inode_eq_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry)))
 		ret = 0;
 	spin_unlock(&dentry->d_lock);
 	return ret;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ceeaf0fb6657..7d893543cf3b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1314,7 +1314,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
 
 	if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
 			&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
-			&& !inode_cmp_iversion_raw(inode, fattr->pre_change_attr)) {
+			&& inode_eq_iversion_raw(inode, fattr->pre_change_attr)) {
 		inode_set_iversion_raw(inode, fattr->change_attr);
 		if (S_ISDIR(inode->i_mode))
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
@@ -1373,7 +1373,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 
 	if (!nfs_file_has_buffered_writers(nfsi)) {
 		/* Verify a few of the more important attributes */
-		if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode_cmp_iversion_raw(inode, fattr->change_attr))
+		if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
 			invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE;
 
 		if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
@@ -1803,7 +1803,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	/* More cache consistency checks */
 	if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
-		if (inode_cmp_iversion_raw(inode, fattr->change_attr)) {
+		if (!inode_eq_iversion_raw(inode, fattr->change_attr)) {
 			dprintk("NFS: change_attr change on server for file %s/%ld\n",
 					inode->i_sb->s_id, inode->i_ino);
 			/* Could it be a race with writeback? */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b7520e20a770..977763d4c27d 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1776,7 +1776,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
 		 * readdir(2), then we might be pointing to an invalid
 		 * dirent right now.  Scan from the start of the block
 		 * to make sure. */
-		if (inode_cmp_iversion(inode, *f_version)) {
+		if (!inode_eq_iversion(inode, *f_version)) {
 			for (i = 0; i < i_size_read(inode) && i < offset; ) {
 				de = (struct ocfs2_dir_entry *)
 					(data->id_data + i);
@@ -1870,7 +1870,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
 		 * readdir(2), then we might be pointing to an invalid
 		 * dirent right now.  Scan from the start of the block
 		 * to make sure. */
-		if (inode_cmp_iversion(inode, *f_version)) {
+		if (!inode_eq_iversion(inode, *f_version)) {
 			for (i = 0; i < sb->s_blocksize && i < offset; ) {
 				de = (struct ocfs2_dir_entry *) (bh->b_data + i);
 				/* It's too expensive to do a full
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 50dfce000864..b721d0bda5e5 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -429,7 +429,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
 	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
-	bool need_revalidate = inode_cmp_iversion(inode, file->f_version);
+	bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
 	unsigned flags = UFS_SB(sb)->s_flags;
 
 	UFSD("BEGIN\n");
-- 
cgit 


From 494370ccaae891de0a99b3c23b2df482c95cab8c Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Wed, 31 Jan 2018 11:31:10 -0800
Subject: xfs: add scrub to XFS_BUILD_OPTIONS

Advertise this config option along with the others.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_super.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index fcc5dfc70aa0..8cee8e8050e3 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -44,6 +44,12 @@ extern void xfs_qm_exit(void);
 # define XFS_REALTIME_STRING
 #endif
 
+#ifdef CONFIG_XFS_ONLINE_SCRUB
+# define XFS_SCRUB_STRING	"scrub, "
+#else
+# define XFS_SCRUB_STRING
+#endif
+
 #ifdef DEBUG
 # define XFS_DBG_STRING		"debug"
 #else
@@ -54,6 +60,7 @@ extern void xfs_qm_exit(void);
 #define XFS_BUILD_OPTIONS	XFS_ACL_STRING \
 				XFS_SECURITY_STRING \
 				XFS_REALTIME_STRING \
+				XFS_SCRUB_STRING \
 				XFS_DBG_STRING /* DBG must be last */
 
 struct xfs_inode;
-- 
cgit 


From b6e03c10bf3ff08c7678a946a2208b60e66f4426 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 31 Jan 2018 14:21:56 -0800
Subject: xfs: don't allow DAX on reflink filesystems

Now that reflink is no longer experimental, reject attempts to mount
with DAX until that whole mess gets sorted out.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_super.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f3e0001f9992..0505605e25c1 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1666,9 +1666,12 @@ xfs_fs_fill_super(
 			"DAX unsupported by block device. Turning off DAX.");
 			mp->m_flags &= ~XFS_MOUNT_DAX;
 		}
-		if (xfs_sb_version_hasreflink(&mp->m_sb))
+		if (xfs_sb_version_hasreflink(&mp->m_sb)) {
 			xfs_alert(mp,
 		"DAX and reflink cannot be used together!");
+			error = -EINVAL;
+			goto out_filestream_unmount;
+		}
 	}
 
 	if (mp->m_flags & XFS_MOUNT_DISCARD) {
-- 
cgit 


From c14632ddac98dca7ab1740461fae330d09909560 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 31 Jan 2018 16:38:18 -0800
Subject: xfs: don't allow reflink + realtime filesystems

We don't support realtime filesystems with reflink either, so fail
those mounts.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
---
 fs/xfs/xfs_super.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0505605e25c1..96388fb9adff 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1684,6 +1684,13 @@ xfs_fs_fill_super(
 		}
 	}
 
+	if (xfs_sb_version_hasreflink(&mp->m_sb) && mp->m_sb.sb_rblocks) {
+		xfs_alert(mp,
+	"reflink not compatible with realtime device!");
+		error = -EINVAL;
+		goto out_filestream_unmount;
+	}
+
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
 		if (mp->m_sb.sb_rblocks) {
 			xfs_alert(mp,
-- 
cgit 


From 76883f7988e6d06a97232e979bc7aaa7846a134b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 31 Jan 2018 09:47:25 -0800
Subject: xfs: remove experimental tag for reverse mapping

Reverse mapping has had a while to soak, so remove the experimental tag.
Now that we've landed space metadata cross-referencing in scrub, the
feature actually has a purpose.

Reject rmap filesystems with an rt device until the code to support it
is actually implemented.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
---
 fs/xfs/xfs_super.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 96388fb9adff..7aba628dc527 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1691,15 +1691,11 @@ xfs_fs_fill_super(
 		goto out_filestream_unmount;
 	}
 
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-		if (mp->m_sb.sb_rblocks) {
-			xfs_alert(mp,
-	"EXPERIMENTAL reverse mapping btree not compatible with realtime device!");
-			error = -EINVAL;
-			goto out_filestream_unmount;
-		}
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
 		xfs_alert(mp,
-	"EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
+	"reverse mapping btree not compatible with realtime device!");
+		error = -EINVAL;
+		goto out_filestream_unmount;
 	}
 
 	error = xfs_mountfs(mp);
-- 
cgit 


From 2aed489d163a6559e07dbc238882c9970ae0f65b Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 28 Jan 2018 02:35:48 +0200
Subject: ovl: fix regression in fsnotify of overlay merge dir

A re-factoring patch in NFS export series has passed the wrong argument
to ovl_get_inode() causing a regression in the very recent fix to
fsnotify of overlay merge dir.

The regression has caused merge directory inodes to be hashed by upper
instead of lower real inode, when NFS export and directory indexing is
disabled. That caused an inotify watch to become obsolete after directory
copy up and drop caches.

LTP test inotify07 was improved to catch this regression.
The regression also caused multiple redirect dirs to same origin not to
be detected on lookup with NFS export disabled. An xfstest was added to
cover this case.

Fixes: 0aceb53e73be ("ovl: do not pass overlay dentry to ovl_get_inode()")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c5449efd96d5..de3e6da1d5a5 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -976,6 +976,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		upperdentry = dget(index);
 
 	if (upperdentry || ctr) {
+		if (ctr)
+			origin = stack[0].dentry;
 		inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index,
 				      ctr);
 		err = PTR_ERR(inode);
-- 
cgit 


From 9b6faee074702bbbc207e7027b9416c2d8fea9fe Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 30 Jan 2018 13:54:45 +0200
Subject: ovl: check ERR_PTR() return value from ovl_encode_fh()

Another fix for an issue reported by 0-day robot.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: 8ed5eec9d6c4 ("ovl: encode pure upper file handles")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/export.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index b65ea49de457..bb94ce9da5c8 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -108,6 +108,9 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
 
 	/* Encode an upper or origin file handle */
 	fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin);
+	err = PTR_ERR(fh);
+	if (IS_ERR(fh))
+		goto fail;
 
 	err = -EOVERFLOW;
 	if (fh->len > buflen)
-- 
cgit 


From e3912ac37e07a13c70675cd75020694de4841c74 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:36:51 -0800
Subject: proc: use %u for pid printing and slightly less stack

PROC_NUMBUF is 13 which is enough for "negative int + \n + \0".

However PIDs and TGIDs are never negative and newline is not a concern,
so use just 10 per integer.

Link: http://lkml.kernel.org/r/20171120203005.GA27743@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Alexander Viro <viro@ftp.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c        | 16 ++++++++--------
 fs/proc/fd.c          |  2 +-
 fs/proc/self.c        |  6 +++---
 fs/proc/thread_self.c |  5 ++---
 4 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b52d659..fe56f3c7002a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3018,11 +3018,11 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 {
 	struct dentry *dentry, *leader, *dir;
-	char buf[PROC_NUMBUF];
+	char buf[10 + 1];
 	struct qstr name;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", pid);
+	name.len = snprintf(buf, sizeof(buf), "%u", pid);
 	/* no ->d_hash() rejects on procfs */
 	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (dentry) {
@@ -3034,7 +3034,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 		return;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
+	name.len = snprintf(buf, sizeof(buf), "%u", tgid);
 	leader = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (!leader)
 		goto out;
@@ -3046,7 +3046,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 		goto out_put_leader;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", pid);
+	name.len = snprintf(buf, sizeof(buf), "%u", pid);
 	dentry = d_hash_and_lookup(dir, &name);
 	if (dentry) {
 		d_invalidate(dentry);
@@ -3225,14 +3225,14 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
 	for (iter = next_tgid(ns, iter);
 	     iter.task;
 	     iter.tgid += 1, iter = next_tgid(ns, iter)) {
-		char name[PROC_NUMBUF];
+		char name[10 + 1];
 		int len;
 
 		cond_resched();
 		if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
 			continue;
 
-		len = snprintf(name, sizeof(name), "%d", iter.tgid);
+		len = snprintf(name, sizeof(name), "%u", iter.tgid);
 		ctx->pos = iter.tgid + TGID_OFFSET;
 		if (!proc_fill_cache(file, ctx, name, len,
 				     proc_pid_instantiate, iter.task, NULL)) {
@@ -3560,10 +3560,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
 	for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
 	     task;
 	     task = next_tid(task), ctx->pos++) {
-		char name[PROC_NUMBUF];
+		char name[10 + 1];
 		int len;
 		tid = task_pid_nr_ns(task, ns);
-		len = snprintf(name, sizeof(name), "%d", tid);
+		len = snprintf(name, sizeof(name), "%u", tid);
 		if (!proc_fill_cache(file, ctx, name, len,
 				proc_task_instantiate, task, NULL)) {
 			/* returning this tgid failed, save it as the first
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 96fc70225e54..6b80cd1e419a 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -236,7 +236,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
 	for (fd = ctx->pos - 2;
 	     fd < files_fdtable(files)->max_fds;
 	     fd++, ctx->pos++) {
-		char name[PROC_NUMBUF];
+		char name[10 + 1];
 		int len;
 
 		if (!fcheck_files(files, fd))
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 31326bb23b8b..d30627aa440b 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -17,11 +17,11 @@ static const char *proc_self_get_link(struct dentry *dentry,
 
 	if (!tgid)
 		return ERR_PTR(-ENOENT);
-	/* 11 for max length of signed int in decimal + NULL term */
-	name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC);
+	/* max length of unsigned int in decimal + NULL term */
+	name = kmalloc(10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
 	if (unlikely(!name))
 		return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
-	sprintf(name, "%d", tgid);
+	sprintf(name, "%u", tgid);
 	set_delayed_call(done, kfree_link, name);
 	return name;
 }
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index b813e3b529f2..6c1a54716337 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -18,11 +18,10 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
 
 	if (!pid)
 		return ERR_PTR(-ENOENT);
-	name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF,
-				dentry ? GFP_KERNEL : GFP_ATOMIC);
+	name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
 	if (unlikely(!name))
 		return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
-	sprintf(name, "%d/task/%d", tgid, pid);
+	sprintf(name, "%u/task/%u", tgid, pid);
 	set_delayed_call(done, kfree_link, name);
 	return name;
 }
-- 
cgit 


From 9f7118b2007d5e7c7a061550d2ca2ecb841537dc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:36:55 -0800
Subject: proc: don't use READ_ONCE/WRITE_ONCE for /proc/*/fail-nth

READ_ONCE and WRITE_ONCE are useless when there is only one read/write
is being made.

Link: http://lkml.kernel.org/r/20171120204033.GA9446@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index fe56f3c7002a..373091249bdb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1370,7 +1370,7 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
 	task = get_proc_task(file_inode(file));
 	if (!task)
 		return -ESRCH;
-	WRITE_ONCE(task->fail_nth, n);
+	task->fail_nth = n;
 	put_task_struct(task);
 
 	return count;
@@ -1386,8 +1386,7 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
 	task = get_proc_task(file_inode(file));
 	if (!task)
 		return -ESRCH;
-	len = snprintf(numbuf, sizeof(numbuf), "%u\n",
-			READ_ONCE(task->fail_nth));
+	len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
 	len = simple_read_from_buffer(buf, count, ppos, numbuf, len);
 	put_task_struct(task);
 
-- 
cgit 


From ac7f1061c2c11bb8936b1b6a94cdb48de732f7a4 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:36:59 -0800
Subject: proc: fix /proc/*/map_files lookup

Current code does:

	if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)

However sscanf() is broken garbage.

It silently accepts whitespace between format specifiers
(did you know that?).

It silently accepts valid strings which result in integer overflow.

Do not use sscanf() for any even remotely reliable parsing code.

	OK
	# readlink '/proc/1/map_files/55a23af39000-55a23b05b000'
	/lib/systemd/systemd

	broken
	# readlink '/proc/1/map_files/               55a23af39000-55a23b05b000'
	/lib/systemd/systemd

	broken
	# readlink '/proc/1/map_files/55a23af39000-55a23b05b000    '
	/lib/systemd/systemd

	very broken
	# readlink '/proc/1/map_files/1000000000000000055a23af39000-55a23b05b000'
	/lib/systemd/systemd

Andrei said:

: This patch breaks criu.  It was a bug in criu.  And this bug is on a minor
: path, which works when memfd_create() isn't available.  It is a reason why
: I ask to not backport this patch to stable kernels.
:
: In CRIU this bug can be triggered, only if this patch will be backported
: to a kernel which version is lower than v3.16.

Link: http://lkml.kernel.org/r/20171120212706.GA14325@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 373091249bdb..4c12cb2cd704 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -100,6 +100,8 @@
 #include "internal.h"
 #include "fd.h"
 
+#include "../../lib/kstrtox.h"
+
 /* NOTE:
  *	Implementing inode permission operations in /proc is almost
  *	certainly an error.  Permission checks need to happen during
@@ -1906,8 +1908,33 @@ end_instantiate:
 static int dname_to_vma_addr(struct dentry *dentry,
 			     unsigned long *start, unsigned long *end)
 {
-	if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
+	const char *str = dentry->d_name.name;
+	unsigned long long sval, eval;
+	unsigned int len;
+
+	len = _parse_integer(str, 16, &sval);
+	if (len & KSTRTOX_OVERFLOW)
+		return -EINVAL;
+	if (sval != (unsigned long)sval)
+		return -EINVAL;
+	str += len;
+
+	if (*str != '-')
 		return -EINVAL;
+	str++;
+
+	len = _parse_integer(str, 16, &eval);
+	if (len & KSTRTOX_OVERFLOW)
+		return -EINVAL;
+	if (eval != (unsigned long)eval)
+		return -EINVAL;
+	str += len;
+
+	if (*str != '\0')
+		return -EINVAL;
+
+	*start = sval;
+	*end = eval;
 
 	return 0;
 }
-- 
cgit 


From 593bc695a1102a540f1613c651e73693b17a7343 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:37:02 -0800
Subject: fs/proc/vmcore.c: simpler /proc/vmcore cleanup

Iterators aren't necessary as you can just grab the first entry and delete
it until no entries left.

Link: http://lkml.kernel.org/r/20171121191121.GA20757@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/vmcore.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 885d445afa0d..a45f0af22a60 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -1178,18 +1178,16 @@ fs_initcall(vmcore_init);
 /* Cleanup function for vmcore module. */
 void vmcore_cleanup(void)
 {
-	struct list_head *pos, *next;
-
 	if (proc_vmcore) {
 		proc_remove(proc_vmcore);
 		proc_vmcore = NULL;
 	}
 
 	/* clear the vmcore list. */
-	list_for_each_safe(pos, next, &vmcore_list) {
+	while (!list_empty(&vmcore_list)) {
 		struct vmcore *m;
 
-		m = list_entry(pos, struct vmcore, list);
+		m = list_first_entry(&vmcore_list, struct vmcore, list);
 		list_del(&m->list);
 		kfree(m);
 	}
-- 
cgit 


From 20d28cde5558a2a211620254ec7bc53a4334167f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:37:06 -0800
Subject: proc: less memory for /proc/*/map_files readdir

dentry name can be evaluated later, right before calling into VFS.

Also, spend less time under ->mmap_sem.

Link: http://lkml.kernel.org/r/20171110163034.GA2534@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4c12cb2cd704..a3efc2427c74 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2026,9 +2026,9 @@ out:
 }
 
 struct map_files_info {
+	unsigned long	start;
+	unsigned long	end;
 	fmode_t		mode;
-	unsigned int	len;
-	unsigned char	name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
 };
 
 /*
@@ -2198,10 +2198,9 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
 			if (++pos <= ctx->pos)
 				continue;
 
+			info.start = vma->vm_start;
+			info.end = vma->vm_end;
 			info.mode = vma->vm_file->f_mode;
-			info.len = snprintf(info.name,
-					sizeof(info.name), "%lx-%lx",
-					vma->vm_start, vma->vm_end);
 			if (flex_array_put(fa, i++, &info, GFP_KERNEL))
 				BUG();
 		}
@@ -2209,9 +2208,13 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
 	up_read(&mm->mmap_sem);
 
 	for (i = 0; i < nr_files; i++) {
+		char buf[4 * sizeof(long) + 2];	/* max: %lx-%lx\0 */
+		unsigned int len;
+
 		p = flex_array_get(fa, i);
+		len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
 		if (!proc_fill_cache(file, ctx,
-				      p->name, p->len,
+				      buf, len,
 				      proc_map_files_instantiate,
 				      task,
 				      (void *)(unsigned long)p->mode))
-- 
cgit 


From 171ef917dfe721b1437b0066f7bc5684d776bba8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:37:10 -0800
Subject: fs/proc/array.c: delete children_seq_release()

It is 1:1 wrapper around seq_release().

Link: http://lkml.kernel.org/r/20171122171510.GA12161@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/array.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index d67a72dcb92c..598803576e4c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -736,16 +736,10 @@ static int children_seq_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-int children_seq_release(struct inode *inode, struct file *file)
-{
-	seq_release(inode, file);
-	return 0;
-}
-
 const struct file_operations proc_tid_children_operations = {
 	.open    = children_seq_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = children_seq_release,
+	.release = seq_release,
 };
 #endif /* CONFIG_PROC_CHILDREN */
-- 
cgit 


From d0290bc20d4739b7a900ae37eb5d4cc3be2b393f Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 6 Feb 2018 15:37:13 -0800
Subject: fs/proc/kcore.c: use probe_kernel_read() instead of memcpy()

Commit df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext
data") added a bounce buffer to avoid hardened usercopy checks.  Copying
to the bounce buffer was implemented with a simple memcpy() assuming
that it is always valid to read from kernel memory iff the
kern_addr_valid() check passed.

A simple, but pointless, test case like "dd if=/proc/kcore of=/dev/null"
now can easily crash the kernel, since the former execption handling on
invalid kernel addresses now doesn't work anymore.

Also adding a kern_addr_valid() implementation wouldn't help here.  Most
architectures simply return 1 here, while a couple implemented a page
table walk to figure out if something is mapped at the address in
question.

With DEBUG_PAGEALLOC active mappings are established and removed all the
time, so that relying on the result of kern_addr_valid() before
executing the memcpy() also doesn't work.

Therefore simply use probe_kernel_read() to copy to the bounce buffer.
This also allows to simplify read_kcore().

At least on s390 this fixes the observed crashes and doesn't introduce
warnings that were removed with df04abfd181a ("fs/proc/kcore.c: Add
bounce buffer for ktext data"), even though the generic
probe_kernel_read() implementation uses uaccess functions.

While looking into this I'm also wondering if kern_addr_valid() could be
completely removed...(?)

Link: http://lkml.kernel.org/r/20171202132739.99971-1-heiko.carstens@de.ibm.com
Fixes: df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data")
Fixes: f5509cc18daa ("mm: Hardened usercopy")
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 4bc85cb8be6a..e8a93bc8285d 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -512,23 +512,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				return -EFAULT;
 		} else {
 			if (kern_addr_valid(start)) {
-				unsigned long n;
-
 				/*
 				 * Using bounce buffer to bypass the
 				 * hardened user copy kernel text checks.
 				 */
-				memcpy(buf, (char *) start, tsz);
-				n = copy_to_user(buffer, buf, tsz);
-				/*
-				 * We cannot distinguish between fault on source
-				 * and fault on destination. When this happens
-				 * we clear too and hope it will trigger the
-				 * EFAULT again.
-				 */
-				if (n) { 
-					if (clear_user(buffer + tsz - n,
-								n))
+				if (probe_kernel_read(buf, (void *) start, tsz)) {
+					if (clear_user(buffer, tsz))
+						return -EFAULT;
+				} else {
+					if (copy_to_user(buffer, buf, tsz))
 						return -EFAULT;
 				}
 			} else {
-- 
cgit 


From 163cf548db888710695d5dbe907cda4262d45b52 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:37:18 -0800
Subject: fs/proc/internal.h: rearrange struct proc_dir_entry

struct proc_dir_entry became bit messy over years:

* move 16-bit ->mode_t before namelen to get rid of padding
* make ->in_use first field: it seems to be most used resulting in
  smaller code on x86_64 (defconfig):

	add/remove: 0/0 grow/shrink: 7/13 up/down: 24/-67 (-43)
	Function                                     old     new   delta
	proc_readdir_de                              451     455      +4
	proc_get_inode                               282     286      +4
	pde_put                                       65      69      +4
	remove_proc_subtree                          294     297      +3
	remove_proc_entry                            297     300      +3
	proc_register                                295     298      +3
	proc_notify_change                            94      97      +3
	unuse_pde                                     27      26      -1
	proc_reg_write                                89      85      -4
	proc_reg_unlocked_ioctl                       85      81      -4
	proc_reg_read                                 89      85      -4
	proc_reg_llseek                               87      83      -4
	proc_reg_get_unmapped_area                   123     119      -4
	proc_entry_rundown                           139     135      -4
	proc_reg_poll                                 91      85      -6
	proc_reg_mmap                                 79      73      -6
	proc_get_link                                 55      49      -6
	proc_reg_release                             108     101      -7
	proc_reg_open                                298     291      -7
	close_pdeo                                   228     218     -10

* move writeable fields together to a first cacheline (on x86_64),
  those include
	* ->in_use: reference count, taken every open/read/write/close etc
	* ->count: reference count, taken at readdir on every entry
	* ->pde_openers: tracks (nearly) every open, dirtied
	* ->pde_unload_lock: spinlock protecting ->pde_openers
	* ->proc_iops, ->proc_fops, ->data: writeonce fields,
	  used right together with previous group.

* other rarely written fields go into 1st/2nd and 2nd/3rd cacheline on
  32-bit and 64-bit respectively.

Additionally on 32-bit, ->subdir, ->subdir_node, ->namelen, ->name go
fully into 2nd cacheline, separated from writeable fields.  They are all
used during lookup.

Link: http://lkml.kernel.org/r/20171220215914.GA7877@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 4a67188c8d74..a290a1e921a5 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -31,24 +31,27 @@ struct mempolicy;
  * subdir_node is used to build the rb tree "subdir" of the parent.
  */
 struct proc_dir_entry {
+	/*
+	 * number of callers into module in progress;
+	 * negative -> it's going away RSN
+	 */
+	atomic_t in_use;
+	atomic_t count;		/* use count */
+	struct list_head pde_openers;	/* who did ->open, but not ->release */
+	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+	struct completion *pde_unload_completion;
+	const struct inode_operations *proc_iops;
+	const struct file_operations *proc_fops;
+	void *data;
 	unsigned int low_ino;
-	umode_t mode;
 	nlink_t nlink;
 	kuid_t uid;
 	kgid_t gid;
 	loff_t size;
-	const struct inode_operations *proc_iops;
-	const struct file_operations *proc_fops;
 	struct proc_dir_entry *parent;
 	struct rb_root_cached subdir;
 	struct rb_node subdir_node;
-	void *data;
-	atomic_t count;		/* use count */
-	atomic_t in_use;	/* number of callers into module in progress; */
-			/* negative -> it's going away RSN */
-	struct completion *pde_unload_completion;
-	struct list_head pde_openers;	/* who did ->open, but not ->release */
-	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+	umode_t mode;
 	u8 namelen;
 	char name[];
 } __randomize_layout;
-- 
cgit 


From 53f63345d893df36b58e81ddb3d11dcd2e9cc966 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:37:21 -0800
Subject: fs/proc/internal.h: fix up comment

Document what ->pde_unload_lock actually does.

Link: http://lkml.kernel.org/r/20180103185120.GB31849@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index a290a1e921a5..5ba317874f0d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -38,7 +38,8 @@ struct proc_dir_entry {
 	atomic_t in_use;
 	atomic_t count;		/* use count */
 	struct list_head pde_openers;	/* who did ->open, but not ->release */
-	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+	/* protects ->pde_openers and all struct pde_opener instances */
+	spinlock_t pde_unload_lock;
 	struct completion *pde_unload_completion;
 	const struct inode_operations *proc_iops;
 	const struct file_operations *proc_fops;
-- 
cgit 


From efb1a57d90cae6af1ddd32f1b920c924a711aba5 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:37:24 -0800
Subject: fs/proc: use __ro_after_init

/proc/self inode numbers, value of proc_inode_cache and st_nlink of
/proc/$TGID are fixed constants.

Link: http://lkml.kernel.org/r/20180103184707.GA31849@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c        | 5 +++--
 fs/proc/inode.c       | 3 ++-
 fs/proc/self.c        | 3 ++-
 fs/proc/thread_self.c | 3 ++-
 4 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index a3efc2427c74..9298324325ed 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -75,6 +75,7 @@
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
 #include <linux/printk.h>
+#include <linux/cache.h>
 #include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
@@ -112,8 +113,8 @@
  *	in /proc for a task before it execs a suid executable.
  */
 
-static u8 nlink_tid;
-static u8 nlink_tgid;
+static u8 nlink_tid __ro_after_init;
+static u8 nlink_tgid __ro_after_init;
 
 struct pid_entry {
 	const char *name;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8dacaabb9f37..c5c8e7af5520 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -5,6 +5,7 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
+#include <linux/cache.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/kernel.h>
@@ -52,7 +53,7 @@ static void proc_evict_inode(struct inode *inode)
 	}
 }
 
-static struct kmem_cache * proc_inode_cachep;
+static struct kmem_cache *proc_inode_cachep __ro_after_init;
 
 static struct inode *proc_alloc_inode(struct super_block *sb)
 {
diff --git a/fs/proc/self.c b/fs/proc/self.c
index d30627aa440b..4d7d061696b3 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/cache.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/pid_namespace.h>
@@ -30,7 +31,7 @@ static const struct inode_operations proc_self_inode_operations = {
 	.get_link	= proc_self_get_link,
 };
 
-static unsigned self_inum;
+static unsigned self_inum __ro_after_init;
 
 int proc_setup_self(struct super_block *s)
 {
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 6c1a54716337..9d2efaca499f 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/cache.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/pid_namespace.h>
@@ -30,7 +31,7 @@ static const struct inode_operations proc_thread_self_inode_operations = {
 	.get_link	= proc_thread_self_get_link,
 };
 
-static unsigned thread_self_inum;
+static unsigned thread_self_inum __ro_after_init;
 
 int proc_setup_thread_self(struct super_block *s)
 {
-- 
cgit 


From 15b158b4e6274351fc3cf652cbabc57104efb547 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:37:28 -0800
Subject: proc: spread likely/unlikely a bit

use_pde() is used at every open/read/write/...  of every random /proc
file.  Negative refcount happens only if PDE is being deleted by module
(read: never).  So it gets "likely".

unuse_pde() gets "unlikely" for the same reason.

close_pdeo() gets unlikely as the completion is filled only if there is a
race between PDE removal and close() (read: never ever).

It even saves code on x86_64 defconfig:

	add/remove: 0/0 grow/shrink: 1/2 up/down: 2/-20 (-18)
	Function                                     old     new   delta
	close_pdeo                                   183     185      +2
	proc_reg_get_unmapped_area                   119     111      -8
	proc_reg_poll                                 85      73     -12

Link: http://lkml.kernel.org/r/20180104175657.GA5204@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index c5c8e7af5520..6e8724958116 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -129,12 +129,12 @@ enum {BIAS = -1U<<31};
 
 static inline int use_pde(struct proc_dir_entry *pde)
 {
-	return atomic_inc_unless_negative(&pde->in_use);
+	return likely(atomic_inc_unless_negative(&pde->in_use));
 }
 
 static void unuse_pde(struct proc_dir_entry *pde)
 {
-	if (atomic_dec_return(&pde->in_use) == BIAS)
+	if (unlikely(atomic_dec_return(&pde->in_use) == BIAS))
 		complete(pde->pde_unload_completion);
 }
 
@@ -167,7 +167,7 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 		spin_lock(&pde->pde_unload_lock);
 		/* After ->release. */
 		list_del(&pdeo->lh);
-		if (pdeo->c)
+		if (unlikely(pdeo->c))
 			complete(pdeo->c);
 		kfree(pdeo);
 	}
@@ -421,7 +421,7 @@ static const char *proc_get_link(struct dentry *dentry,
 				 struct delayed_call *done)
 {
 	struct proc_dir_entry *pde = PDE(inode);
-	if (unlikely(!use_pde(pde)))
+	if (!use_pde(pde))
 		return ERR_PTR(-EINVAL);
 	set_delayed_call(done, proc_put_link, pde);
 	return pde->data;
-- 
cgit 


From 93ad5bc6d4addb74e30d421cd3ba5249c961fb3e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:37:31 -0800
Subject: proc: rearrange args

Rearrange args for smaller code.

lookup revolves around memcmp() which gets len 3rd arg, so propagate
length as 3rd arg.

readdir and lookup add additional arg to VFS ->readdir and ->lookup, so
better add it to the end.

Space savings on x86_64:

	add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-18 (-18)
	Function                                     old     new   delta
	proc_readdir                                  22      13      -9
	proc_lookup                                   18       9      -9

proc_match() is smaller if not inlined, I promise!

Link: http://lkml.kernel.org/r/20180104175958.GB5204@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c  | 18 +++++++++---------
 fs/proc/internal.h |  5 ++---
 fs/proc/proc_net.c |  4 ++--
 3 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 793a67574668..5d709fa8f3a2 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -28,7 +28,7 @@
 
 static DEFINE_RWLOCK(proc_subdir_lock);
 
-static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
+static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len)
 {
 	if (len < de->namelen)
 		return -1;
@@ -60,7 +60,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
 		struct proc_dir_entry *de = rb_entry(node,
 						     struct proc_dir_entry,
 						     subdir_node);
-		int result = proc_match(len, name, de);
+		int result = proc_match(name, de, len);
 
 		if (result < 0)
 			node = node->rb_left;
@@ -84,7 +84,7 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
 		struct proc_dir_entry *this = rb_entry(*new,
 						       struct proc_dir_entry,
 						       subdir_node);
-		int result = proc_match(de->namelen, de->name, this);
+		int result = proc_match(de->name, this, de->namelen);
 
 		parent = *new;
 		if (result < 0)
@@ -211,8 +211,8 @@ void proc_free_inum(unsigned int inum)
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
  */
-struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
-		struct dentry *dentry)
+struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
+			      struct proc_dir_entry *de)
 {
 	struct inode *inode;
 
@@ -235,7 +235,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
 		unsigned int flags)
 {
-	return proc_lookup_de(PDE(dir), dir, dentry);
+	return proc_lookup_de(dir, dentry, PDE(dir));
 }
 
 /*
@@ -247,8 +247,8 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
  * value of the readdir() call, as long as it's non-negative
  * for success..
  */
-int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
-		    struct dir_context *ctx)
+int proc_readdir_de(struct file *file, struct dir_context *ctx,
+		    struct proc_dir_entry *de)
 {
 	int i;
 
@@ -292,7 +292,7 @@ int proc_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
 
-	return proc_readdir_de(PDE(inode), file, ctx);
+	return proc_readdir_de(file, ctx, PDE(inode));
 }
 
 /*
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5ba317874f0d..d697c8ab0a14 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -153,10 +153,9 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
  * generic.c
  */
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
-extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
-				     struct dentry *);
+struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
 extern int proc_readdir(struct file *, struct dir_context *);
-extern int proc_readdir_de(struct proc_dir_entry *, struct file *, struct dir_context *);
+int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *);
 
 static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
 {
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index a2bf369c923d..68c06ae7888c 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -135,7 +135,7 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
 	de = ERR_PTR(-ENOENT);
 	net = get_proc_task_net(dir);
 	if (net != NULL) {
-		de = proc_lookup_de(net->proc_net, dir, dentry);
+		de = proc_lookup_de(dir, dentry, net->proc_net);
 		put_net(net);
 	}
 	return de;
@@ -172,7 +172,7 @@ static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx)
 	ret = -EINVAL;
 	net = get_proc_task_net(file_inode(file));
 	if (net != NULL) {
-		ret = proc_readdir_de(net->proc_net, file, ctx);
+		ret = proc_readdir_de(file, ctx, net->proc_net);
 		put_net(net);
 	}
 	return ret;
-- 
cgit 


From 4bf8ba811ac1102d7de6f73af3b9f323463e16c0 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Tue, 6 Feb 2018 15:37:34 -0800
Subject: fs/proc/consoles.c: use seq_putc() in show_console_dev()

A single character (line break) should be put into a sequence.  Thus use
the corresponding function "seq_putc".

This issue was detected by using the Coccinelle software.

Link: http://lkml.kernel.org/r/04fb69fe-d820-9141-820f-07e9a48f4635@users.sourceforge.net
Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/consoles.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index 290ba85cb900..a8ac48aebd59 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -55,8 +55,7 @@ static int show_console_dev(struct seq_file *m, void *v)
 	if (dev)
 		seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
 
-	seq_printf(m, "\n");
-
+	seq_putc(m, '\n');
 	return 0;
 }
 
-- 
cgit 


From 60c9d92f887f4606d363fece7a36c92664dc64c6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Feb 2018 15:39:13 -0800
Subject: elf: fix NT_FILE integer overflow

If vm.max_map_count bumped above 2^26 (67+ mil) and system has enough RAM
to allocate all the VMAs (~12.8 GB on Fedora 27 with 200-byte VMAs), then
it should be possible to overflow 32-bit "size", pass paranoia check,
allocate very little vmalloc space and oops while writing into vmalloc
guard page...

But I didn't test this, only coredump of regular process.

Link: http://lkml.kernel.org/r/20180112203427.GA9109@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 83732fef510d..bdb201230bae 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1599,6 +1599,8 @@ static int fill_files_note(struct memelfnote *note)
 
 	/* *Estimated* file count and total data size needed */
 	count = current->mm->map_count;
+	if (count > UINT_MAX / 64)
+		return -EINVAL;
 	size = count * 64;
 
 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
-- 
cgit 


From fb04b91bc2c3a83e9e2ba9c5ce0f0124dd3ffef0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 6 Feb 2018 15:39:21 -0800
Subject: nilfs2: use time64_t internally

The superblock and segment timestamps are used only internally in nilfs2
and can be read out using sysfs.

Since we are using the old 'get_seconds()' interface and store the data
as timestamps, the behavior differs slightly between 64-bit and 32-bit
kernels, the latter will show incorrect timestamps after 2038 in sysfs,
and presumably fail completely in 2106 as comparisons go wrong.

This changes nilfs2 to use time64_t with ktime_get_real_seconds() to
handle timestamps, making the behavior consistent and correct on both
32-bit and 64-bit machines.

The on-disk format already uses 64-bit timestamps, so nothing changes
there.

Link: http://lkml.kernel.org/r/20180122211050.1286441-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/segbuf.c    |  2 +-
 fs/nilfs2/segbuf.h    |  4 ++--
 fs/nilfs2/segment.c   |  2 +-
 fs/nilfs2/segment.h   |  2 +-
 fs/nilfs2/sufile.c    |  2 +-
 fs/nilfs2/sufile.h    |  2 +-
 fs/nilfs2/super.c     |  4 ++--
 fs/nilfs2/sysfs.c     | 21 ++++++++++-----------
 fs/nilfs2/the_nilfs.h |  8 ++++----
 9 files changed, 23 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6c5009cc4e6f..68cb9e4740b4 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -130,7 +130,7 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf,
 }
 
 int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned int flags,
-		       time_t ctime, __u64 cno)
+		       time64_t ctime, __u64 cno)
 {
 	int err;
 
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index 7bbccc099709..10e16935fff6 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -46,7 +46,7 @@ struct nilfs_segsum_info {
 	unsigned long		nfileblk;
 	u64			seg_seq;
 	__u64			cno;
-	time_t			ctime;
+	time64_t		ctime;
 	sector_t		next;
 };
 
@@ -120,7 +120,7 @@ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
 			   struct nilfs_segment_buffer *prev);
 void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
 				  struct the_nilfs *);
-int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time_t,
+int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time64_t,
 		       __u64);
 int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *);
 int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 9f3ffba41533..0953635e7d48 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2040,7 +2040,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
 			goto out;
 
 		/* Update time stamp */
-		sci->sc_seg_ctime = get_seconds();
+		sci->sc_seg_ctime = ktime_get_real_seconds();
 
 		err = nilfs_segctor_collect(sci, nilfs, mode);
 		if (unlikely(err))
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 84084a4d9b3e..04634e3e3d58 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -157,7 +157,7 @@ struct nilfs_sc_info {
 	unsigned long		sc_blk_cnt;
 	unsigned long		sc_datablk_cnt;
 	unsigned long		sc_nblk_this_inc;
-	time_t			sc_seg_ctime;
+	time64_t		sc_seg_ctime;
 	__u64			sc_cno;
 	unsigned long		sc_flags;
 
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1341a41e7b43..c7fa139d50e8 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -526,7 +526,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
  * @modtime: modification time (option)
  */
 int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
-				   unsigned long nblocks, time_t modtime)
+				   unsigned long nblocks, time64_t modtime)
 {
 	struct buffer_head *bh;
 	struct nilfs_segment_usage *su;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index 158a9190c8ec..673a891350f4 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -35,7 +35,7 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
 int nilfs_sufile_alloc(struct inode *, __u64 *);
 int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
 int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
-				   unsigned long nblocks, time_t modtime);
+				   unsigned long nblocks, time64_t modtime);
 int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
 ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned int,
 				size_t);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 3073b646e1ba..6ffeca84d7c3 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -283,10 +283,10 @@ int nilfs_commit_super(struct super_block *sb, int flag)
 {
 	struct the_nilfs *nilfs = sb->s_fs_info;
 	struct nilfs_super_block **sbp = nilfs->ns_sbp;
-	time_t t;
+	time64_t t;
 
 	/* nilfs->ns_sem must be locked by the caller. */
-	t = get_seconds();
+	t = ktime_get_real_seconds();
 	nilfs->ns_sbwtime = t;
 	sbp[0]->s_wtime = cpu_to_le64(t);
 	sbp[0]->s_sum = 0;
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 490303e3d517..4b25837e7724 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -31,7 +31,7 @@ static struct kset *nilfs_kset;
 #define NILFS_SHOW_TIME(time_t_val, buf) ({ \
 		struct tm res; \
 		int count = 0; \
-		time_to_tm(time_t_val, 0, &res); \
+		time64_to_tm(time_t_val, 0, &res); \
 		res.tm_year += 1900; \
 		res.tm_mon += 1; \
 		count = scnprintf(buf, PAGE_SIZE, \
@@ -579,7 +579,7 @@ nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
 					struct the_nilfs *nilfs,
 					char *buf)
 {
-	time_t ctime;
+	time64_t ctime;
 
 	down_read(&nilfs->ns_segctor_sem);
 	ctime = nilfs->ns_ctime;
@@ -593,13 +593,13 @@ nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
 					    struct the_nilfs *nilfs,
 					    char *buf)
 {
-	time_t ctime;
+	time64_t ctime;
 
 	down_read(&nilfs->ns_segctor_sem);
 	ctime = nilfs->ns_ctime;
 	up_read(&nilfs->ns_segctor_sem);
 
-	return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime);
+	return snprintf(buf, PAGE_SIZE, "%llu\n", ctime);
 }
 
 static ssize_t
@@ -607,7 +607,7 @@ nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
 					 struct the_nilfs *nilfs,
 					 char *buf)
 {
-	time_t nongc_ctime;
+	time64_t nongc_ctime;
 
 	down_read(&nilfs->ns_segctor_sem);
 	nongc_ctime = nilfs->ns_nongc_ctime;
@@ -621,14 +621,13 @@ nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
 						struct the_nilfs *nilfs,
 						char *buf)
 {
-	time_t nongc_ctime;
+	time64_t nongc_ctime;
 
 	down_read(&nilfs->ns_segctor_sem);
 	nongc_ctime = nilfs->ns_nongc_ctime;
 	up_read(&nilfs->ns_segctor_sem);
 
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-			(unsigned long long)nongc_ctime);
+	return snprintf(buf, PAGE_SIZE, "%llu\n", nongc_ctime);
 }
 
 static ssize_t
@@ -728,7 +727,7 @@ nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
 				     struct the_nilfs *nilfs,
 				     char *buf)
 {
-	time_t sbwtime;
+	time64_t sbwtime;
 
 	down_read(&nilfs->ns_sem);
 	sbwtime = nilfs->ns_sbwtime;
@@ -742,13 +741,13 @@ nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
 					 struct the_nilfs *nilfs,
 					 char *buf)
 {
-	time_t sbwtime;
+	time64_t sbwtime;
 
 	down_read(&nilfs->ns_sem);
 	sbwtime = nilfs->ns_sbwtime;
 	up_read(&nilfs->ns_sem);
 
-	return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime);
+	return snprintf(buf, PAGE_SIZE, "%llu\n", sbwtime);
 }
 
 static ssize_t
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 883d732b0259..36da1779f976 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -116,7 +116,7 @@ struct the_nilfs {
 	 */
 	struct buffer_head     *ns_sbh[2];
 	struct nilfs_super_block *ns_sbp[2];
-	time_t			ns_sbwtime;
+	time64_t		ns_sbwtime;
 	unsigned int		ns_sbwcount;
 	unsigned int		ns_sbsize;
 	unsigned int		ns_mount_state;
@@ -131,8 +131,8 @@ struct the_nilfs {
 	__u64			ns_nextnum;
 	unsigned long		ns_pseg_offset;
 	__u64			ns_cno;
-	time_t			ns_ctime;
-	time_t			ns_nongc_ctime;
+	time64_t		ns_ctime;
+	time64_t		ns_nongc_ctime;
 	atomic_t		ns_ndirtyblks;
 
 	/*
@@ -267,7 +267,7 @@ struct nilfs_root {
 
 static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
 {
-	u64 t = get_seconds();
+	u64 t = ktime_get_real_seconds();
 
 	return t < nilfs->ns_sbwtime ||
 		t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
-- 
cgit 


From b0cd38c7f54c9176f78cce2e2b23e1513bf5a224 Mon Sep 17 00:00:00 2001
From: "Ernesto A. Fernandez" <ernesto.mnd.fernandez@gmail.com>
Date: Tue, 6 Feb 2018 15:49:02 -0800
Subject: hfsplus: honor setgid flag on directories

When creating a file inside a directory that has the setgid flag set, give
the new file the group ID of the parent, and also the setgid flag if it is
a directory itself.

Link: http://lkml.kernel.org/r/20171204192705.GA6101@debian.home
Signed-off-by: Ernesto A. Fernandez <ernesto.mnd.fernandez@gmail.com>
Reviewed-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/dir.c        | 4 ++--
 fs/hfsplus/hfsplus_fs.h | 3 ++-
 fs/hfsplus/inode.c      | 7 +++----
 fs/hfsplus/super.c      | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index e8120a282435..15e06fb552da 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -444,7 +444,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
 	int res = -ENOMEM;
 
 	mutex_lock(&sbi->vh_mutex);
-	inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO);
+	inode = hfsplus_new_inode(dir->i_sb, dir, S_IFLNK | S_IRWXUGO);
 	if (!inode)
 		goto out;
 
@@ -486,7 +486,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
 	int res = -ENOMEM;
 
 	mutex_lock(&sbi->vh_mutex);
-	inode = hfsplus_new_inode(dir->i_sb, mode);
+	inode = hfsplus_new_inode(dir->i_sb, dir, mode);
 	if (!inode)
 		goto out;
 
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index a015044daa05..d9255abafb81 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -478,7 +478,8 @@ extern const struct address_space_operations hfsplus_aops;
 extern const struct address_space_operations hfsplus_btree_aops;
 extern const struct dentry_operations hfsplus_dentry_operations;
 
-struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode);
+struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
+				umode_t mode);
 void hfsplus_delete_inode(struct inode *inode);
 void hfsplus_inode_read_fork(struct inode *inode,
 			     struct hfsplus_fork_raw *fork);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 190c60efbc99..c0c8d433864f 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -354,7 +354,8 @@ static const struct file_operations hfsplus_file_operations = {
 	.unlocked_ioctl = hfsplus_ioctl,
 };
 
-struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
+struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
+				umode_t mode)
 {
 	struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
 	struct inode *inode = new_inode(sb);
@@ -364,9 +365,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
 		return NULL;
 
 	inode->i_ino = sbi->next_cnid++;
-	inode->i_mode = mode;
-	inode->i_uid = current_fsuid();
-	inode->i_gid = current_fsgid();
+	inode_init_owner(inode, dir, mode);
 	set_nlink(inode, 1);
 	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 1d458b716957..513c357c734b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -549,7 +549,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 
 		if (!sbi->hidden_dir) {
 			mutex_lock(&sbi->vh_mutex);
-			sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
+			sbi->hidden_dir = hfsplus_new_inode(sb, root, S_IFDIR);
 			if (!sbi->hidden_dir) {
 				mutex_unlock(&sbi->vh_mutex);
 				err = -ENOMEM;
-- 
cgit 


From 1a60e4d516defb5808967a8b42ac6675a3f05085 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Tue, 6 Feb 2018 15:40:59 -0800
Subject: vfs: remove might_sleep() from clear_inode()

Commit 7994e6f72543 ("vfs: Move waiting for inode writeback from
end_writeback() to evict_inode()") removed inode_sync_wait() from
end_writeback() and commit dbd5768f87ff ("vfs: Rename end_writeback() to
clear_inode()") renamed end_writeback() to clear_inode().

After these patches there is no sleeping operation in clear_inode().
So, remove might_sleep() from it.

Link: http://lkml.kernel.org/r/20171108004354.40308-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index e2ca0f4b5151..ef362364d396 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -498,7 +498,6 @@ EXPORT_SYMBOL(__remove_inode_hash);
 
 void clear_inode(struct inode *inode)
 {
-	might_sleep();
 	/*
 	 * We have to cycle tree_lock here because reclaim can be still in the
 	 * process of removing the last page (in __delete_from_page_cache())
-- 
cgit 


From 4c2e4befb3cc9ce42d506aa537c9ab504723e98c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 6 Feb 2018 15:41:45 -0800
Subject: pipe, sysctl: drop 'min' parameter from pipe-max-size converter

Patch series "pipe: buffer limits fixes and cleanups", v2.

This series simplifies the sysctl handler for pipe-max-size and fixes
another set of bugs related to the pipe buffer limits:

- The root user wasn't allowed to exceed the limits when creating new
  pipes.

- There was an off-by-one error when checking the limits, so a limit of
  N was actually treated as N - 1.

- F_SETPIPE_SZ accepted values over UINT_MAX.

- Reading the pipe buffer limits could be racy.

This patch (of 7):

Before validating the given value against pipe_min_size,
do_proc_dopipe_max_size_conv() calls round_pipe_size(), which rounds the
value up to pipe_min_size.  Therefore, the second check against
pipe_min_size is redundant.  Remove it.

Link: http://lkml.kernel.org/r/20180111052902.14409-2-ebiggers3@gmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Luis R . Rodriguez" <mcgrof@kernel.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index a449ca0ec0c6..1f6ec1051e59 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -35,11 +35,6 @@
  */
 unsigned int pipe_max_size = 1048576;
 
-/*
- * Minimum pipe size, as required by POSIX
- */
-unsigned int pipe_min_size = PAGE_SIZE;
-
 /* Maximum allocatable pages per user. Hard limit is unset by default, soft
  * matches default values.
  */
@@ -1024,8 +1019,9 @@ unsigned int round_pipe_size(unsigned int size)
 {
 	unsigned long nr_pages;
 
-	if (size < pipe_min_size)
-		size = pipe_min_size;
+	/* Minimum pipe size, as required by POSIX */
+	if (size < PAGE_SIZE)
+		size = PAGE_SIZE;
 
 	nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (nr_pages == 0)
-- 
cgit 


From 319e0a21bb7823abbb4818fe2724e572bbac77a2 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 6 Feb 2018 15:41:49 -0800
Subject: pipe, sysctl: remove pipe_proc_fn()

pipe_proc_fn() is no longer needed, as it only calls through to
proc_dopipe_max_size().  Just put proc_dopipe_max_size() in the ctl_table
entry directly, and remove the unneeded EXPORT_SYMBOL() and the ENOSYS
stub for it.

(The reason the ENOSYS stub isn't needed is that the pipe-max-size
ctl_table entry is located directly in 'kern_table' rather than being
registered separately.  Therefore, the entry is already only defined when
the kernel is built with sysctl support.)

Link: http://lkml.kernel.org/r/20180111052902.14409-3-ebiggers3@gmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Luis R . Rodriguez" <mcgrof@kernel.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 1f6ec1051e59..61adbf0f5b53 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1120,16 +1120,6 @@ out_revert_acct:
 	return ret;
 }
 
-/*
- * This should work even if CONFIG_PROC_FS isn't set, as proc_dopipe_max_size
- * will return an error.
- */
-int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
-		 size_t *lenp, loff_t *ppos)
-{
-	return proc_dopipe_max_size(table, write, buf, lenp, ppos);
-}
-
 /*
  * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
  * location, so checking ->i_pipe is not enough to verify that this is a
-- 
cgit 


From 85c2dd5473b2718b4b63e74bfeb1ca876868e11f Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 6 Feb 2018 15:41:53 -0800
Subject: pipe: actually allow root to exceed the pipe buffer limits

pipe-user-pages-hard and pipe-user-pages-soft are only supposed to apply
to unprivileged users, as documented in both Documentation/sysctl/fs.txt
and the pipe(7) man page.

However, the capabilities are actually only checked when increasing a
pipe's size using F_SETPIPE_SZ, not when creating a new pipe.  Therefore,
if pipe-user-pages-hard has been set, the root user can run into it and be
unable to create pipes.  Similarly, if pipe-user-pages-soft has been set,
the root user can run into it and have their pipes limited to 1 page each.

Fix this by allowing the privileged override in both cases.

Link: http://lkml.kernel.org/r/20180111052902.14409-4-ebiggers3@gmail.com
Fixes: 759c01142a5d ("pipe: limit the per-user amount of pages allocated in pipes")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Luis R . Rodriguez" <mcgrof@kernel.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 61adbf0f5b53..04acfad4692b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -613,6 +613,11 @@ static bool too_many_pipe_buffers_hard(unsigned long user_bufs)
 	return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard;
 }
 
+static bool is_unprivileged_user(void)
+{
+	return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+}
+
 struct pipe_inode_info *alloc_pipe_info(void)
 {
 	struct pipe_inode_info *pipe;
@@ -629,12 +634,12 @@ struct pipe_inode_info *alloc_pipe_info(void)
 
 	user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
 
-	if (too_many_pipe_buffers_soft(user_bufs)) {
+	if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) {
 		user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
 		pipe_bufs = 1;
 	}
 
-	if (too_many_pipe_buffers_hard(user_bufs))
+	if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user())
 		goto out_revert_acct;
 
 	pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
@@ -1065,7 +1070,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
 	if (nr_pages > pipe->buffers &&
 			(too_many_pipe_buffers_hard(user_bufs) ||
 			 too_many_pipe_buffers_soft(user_bufs)) &&
-			!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+			is_unprivileged_user()) {
 		ret = -EPERM;
 		goto out_revert_acct;
 	}
-- 
cgit 


From 9903a91c763ecdae333a04a9d89d79d2b8966503 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 6 Feb 2018 15:41:56 -0800
Subject: pipe: fix off-by-one error when checking buffer limits

With pipe-user-pages-hard set to 'N', users were actually only allowed up
to 'N - 1' buffers; and likewise for pipe-user-pages-soft.

Fix this to allow up to 'N' buffers, as would be expected.

Link: http://lkml.kernel.org/r/20180111052902.14409-5-ebiggers3@gmail.com
Fixes: b0b91d18e2e9 ("pipe: fix limit checking in pipe_set_size()")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Willy Tarreau <w@1wt.eu>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Luis R . Rodriguez" <mcgrof@kernel.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 04acfad4692b..46c30ac777da 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -605,12 +605,12 @@ static unsigned long account_pipe_buffers(struct user_struct *user,
 
 static bool too_many_pipe_buffers_soft(unsigned long user_bufs)
 {
-	return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft;
+	return pipe_user_pages_soft && user_bufs > pipe_user_pages_soft;
 }
 
 static bool too_many_pipe_buffers_hard(unsigned long user_bufs)
 {
-	return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard;
+	return pipe_user_pages_hard && user_bufs > pipe_user_pages_hard;
 }
 
 static bool is_unprivileged_user(void)
-- 
cgit 


From 96e99be40e4cff870a83233731121ec0f7f95075 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 6 Feb 2018 15:42:00 -0800
Subject: pipe: reject F_SETPIPE_SZ with size over UINT_MAX

A pipe's size is represented as an 'unsigned int'.  As expected, writing a
value greater than UINT_MAX to /proc/sys/fs/pipe-max-size fails with
EINVAL.  However, the F_SETPIPE_SZ fcntl silently truncates such values to
32 bits, rather than failing with EINVAL as expected.  (It *does* fail
with EINVAL for values above (1 << 31) but <= UINT_MAX.)

Fix this by moving the check against UINT_MAX into round_pipe_size() which
is called in both cases.

Link: http://lkml.kernel.org/r/20180111052902.14409-6-ebiggers3@gmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Luis R . Rodriguez" <mcgrof@kernel.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 46c30ac777da..817393d36244 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1020,10 +1020,13 @@ const struct file_operations pipefifo_fops = {
  * Currently we rely on the pipe array holding a power-of-2 number
  * of pages. Returns 0 on error.
  */
-unsigned int round_pipe_size(unsigned int size)
+unsigned int round_pipe_size(unsigned long size)
 {
 	unsigned long nr_pages;
 
+	if (size > UINT_MAX)
+		return 0;
+
 	/* Minimum pipe size, as required by POSIX */
 	if (size < PAGE_SIZE)
 		size = PAGE_SIZE;
-- 
cgit 


From c4fed5a91fadc8a277b1eda474317b501651dd3e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 6 Feb 2018 15:42:05 -0800
Subject: pipe: simplify round_pipe_size()

round_pipe_size() calculates the number of pages the requested size
corresponds to, then rounds the page count up to the next power of 2.

However, it also rounds everything < PAGE_SIZE up to PAGE_SIZE.
Therefore, there's no need to actually translate the size into a page
count; we just need to round the size up to the next power of 2.

We do need to verify the size isn't greater than (1 << 31), since on
32-bit systems roundup_pow_of_two() would be undefined in that case.  But
that can just be combined with the UINT_MAX check which we need anyway
now.

Finally, update pipe_set_size() to not redundantly check the return value
of round_pipe_size() for the "invalid size" case twice.

Link: http://lkml.kernel.org/r/20180111052902.14409-7-ebiggers3@gmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Luis R . Rodriguez" <mcgrof@kernel.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 817393d36244..8be52158c400 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1022,20 +1022,14 @@ const struct file_operations pipefifo_fops = {
  */
 unsigned int round_pipe_size(unsigned long size)
 {
-	unsigned long nr_pages;
-
-	if (size > UINT_MAX)
+	if (size > (1U << 31))
 		return 0;
 
 	/* Minimum pipe size, as required by POSIX */
 	if (size < PAGE_SIZE)
-		size = PAGE_SIZE;
-
-	nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (nr_pages == 0)
-		return 0;
+		return PAGE_SIZE;
 
-	return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
+	return roundup_pow_of_two(size);
 }
 
 /*
@@ -1050,8 +1044,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
 	long ret = 0;
 
 	size = round_pipe_size(arg);
-	if (size == 0)
-		return -EINVAL;
 	nr_pages = size >> PAGE_SHIFT;
 
 	if (!nr_pages)
-- 
cgit 


From f7340761812fc10313e6fcc115e0bc4f7a799112 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 6 Feb 2018 15:42:08 -0800
Subject: pipe: read buffer limits atomically

The pipe buffer limits are accessed without any locking, and may be
changed at any time by the sysctl handlers.  In theory this could cause
problems for expressions like the following:

    pipe_user_pages_hard && user_bufs > pipe_user_pages_hard

...  since the assembly code might reference the 'pipe_user_pages_hard'
memory location multiple times, and if the admin removes the limit by
setting it to 0, there is a very brief window where processes could
incorrectly observe the limit to be exceeded.

Fix this by loading the limits with READ_ONCE() prior to use.

Link: http://lkml.kernel.org/r/20180111052902.14409-8-ebiggers3@gmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: "Luis R . Rodriguez" <mcgrof@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 8be52158c400..0913aed7fd0d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -605,12 +605,16 @@ static unsigned long account_pipe_buffers(struct user_struct *user,
 
 static bool too_many_pipe_buffers_soft(unsigned long user_bufs)
 {
-	return pipe_user_pages_soft && user_bufs > pipe_user_pages_soft;
+	unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
+
+	return soft_limit && user_bufs > soft_limit;
 }
 
 static bool too_many_pipe_buffers_hard(unsigned long user_bufs)
 {
-	return pipe_user_pages_hard && user_bufs > pipe_user_pages_hard;
+	unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
+
+	return hard_limit && user_bufs > hard_limit;
 }
 
 static bool is_unprivileged_user(void)
@@ -624,13 +628,14 @@ struct pipe_inode_info *alloc_pipe_info(void)
 	unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
 	struct user_struct *user = get_current_user();
 	unsigned long user_bufs;
+	unsigned int max_size = READ_ONCE(pipe_max_size);
 
 	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
 	if (pipe == NULL)
 		goto out_free_uid;
 
-	if (pipe_bufs * PAGE_SIZE > pipe_max_size && !capable(CAP_SYS_RESOURCE))
-		pipe_bufs = pipe_max_size >> PAGE_SHIFT;
+	if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
+		pipe_bufs = max_size >> PAGE_SHIFT;
 
 	user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
 
-- 
cgit