From 9e8925b67a809bb27ce4b7d352d67f25cf1d7fc5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Mon, 16 Nov 2015 09:49:34 -0500
Subject: locks: Allow disabling mandatory locking at compile time

Mandatory locking appears to be almost unused and buggy and there
appears no real interest in doing anything with it.  Since effectively
no one uses the code and since the code is buggy let's allow it to be
disabled at compile time.  I would just suggest removing the code but
undoubtedly that will break some piece of userspace code somewhere.

For the distributions that don't care about this piece of code
this gives a nice starting point to make mandatory locking go away.

Cc: Benjamin Coddington <bcodding@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jeff Layton <jeff.layton@primarydata.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index 0d2b3267e2a3..86c94674ab22 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1191,6 +1191,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 	return error;
 }
 
+#ifdef CONFIG_MANDATORY_FILE_LOCKING
 /**
  * locks_mandatory_locked - Check for an active lock
  * @file: the file to check
@@ -1289,6 +1290,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 }
 
 EXPORT_SYMBOL(locks_mandatory_area);
+#endif /* CONFIG_MANDATORY_FILE_LOCKING */
 
 static void lease_clear_pending(struct file_lock *fl, int arg)
 {
-- 
cgit 


From 8ace5dfb983e89dbcfcb42ff25df6e4240c555bb Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@163.com>
Date: Wed, 18 Nov 2015 21:40:33 +0800
Subject: locks: use list_first_entry_or_null()

Simplify the code with list_first_entry_or_null().

Signed-off-by: Geliang Tang <geliangtang@163.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index 86c94674ab22..d2ee8e365ba7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1505,12 +1505,10 @@ void lease_get_mtime(struct inode *inode, struct timespec *time)
 	ctx = smp_load_acquire(&inode->i_flctx);
 	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
 		spin_lock(&ctx->flc_lock);
-		if (!list_empty(&ctx->flc_lease)) {
-			fl = list_first_entry(&ctx->flc_lease,
-						struct file_lock, fl_list);
-			if (fl->fl_type == F_WRLCK)
-				has_lease = true;
-		}
+		fl = list_first_entry_or_null(&ctx->flc_lease,
+					      struct file_lock, fl_list);
+		if (fl && (fl->fl_type == F_WRLCK))
+			has_lease = true;
 		spin_unlock(&ctx->flc_lock);
 	}
 
-- 
cgit 


From 9189922675ecca0fab38931d86b676e9d79602dc Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 17 Dec 2015 14:11:03 -0500
Subject: fs: make locks.c explicitly non-modular

The Kconfig currently controlling compilation of this code is:

config FILE_LOCKING
     bool "Enable POSIX file locking API" if EXPERT

...meaning that it currently is not being built as a module by anyone.

Lets remove the couple traces of modularity so that when reading the
driver there is no doubt it is builtin-only.

Since module_init translates to device_initcall in the non-modular
case, the init ordering gets bumped to one level earlier when we
use the more appropriate fs_initcall here.  However we've made similar
changes before without any fallout and none is expected here either.

Cc: Jeff Layton <jlayton@poochiereds.net>
Acked-by: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/locks.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index d2ee8e365ba7..593dca300b29 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -119,7 +119,6 @@
 #include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/security.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
@@ -2706,7 +2705,7 @@ static int __init proc_locks_init(void)
 	proc_create("locks", 0, NULL, &proc_locks_operations);
 	return 0;
 }
-module_init(proc_locks_init);
+fs_initcall(proc_locks_init);
 #endif
 
 static int __init filelock_init(void)
-- 
cgit 


From 7f3697e24dc3820b10f445a4a7d914fc356012d1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Thu, 7 Jan 2016 16:38:10 -0500
Subject: locks: fix unlock when fcntl_setlk races with a close

Dmitry reported that he was able to reproduce the WARN_ON_ONCE that
fires in locks_free_lock_context when the flc_posix list isn't empty.

The problem turns out to be that we're basically rebuilding the
file_lock from scratch in fcntl_setlk when we discover that the setlk
has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END,
then we may end up with fl_start and fl_end values that differ from
when the lock was initially set, if the file position or length of the
file has changed in the interim.

Fix this by just reusing the same lock request structure, and simply
override fl_type value with F_UNLCK as appropriate. That ensures that
we really are unlocking the lock that was initially set.

While we're there, make sure that we do pop a WARN_ON_ONCE if the
removal ever fails. Also return -EBADF in this event, since that's
what we would have returned if the close had happened earlier.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Fixes: c293621bbf67 (stale POSIX lock handling)
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
---
 fs/locks.c | 51 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index 593dca300b29..c263aff793bc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2181,7 +2181,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-again:
 	error = flock_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
@@ -2223,19 +2222,22 @@ again:
 	 * Attempt to detect a close/fcntl race and recover by
 	 * releasing the lock that was just acquired.
 	 */
-	/*
-	 * we need that spin_lock here - it prevents reordering between
-	 * update of i_flctx->flc_posix and check for it done in close().
-	 * rcu_read_lock() wouldn't do.
-	 */
-	spin_lock(&current->files->file_lock);
-	f = fcheck(fd);
-	spin_unlock(&current->files->file_lock);
-	if (!error && f != filp && flock.l_type != F_UNLCK) {
-		flock.l_type = F_UNLCK;
-		goto again;
+	if (!error && file_lock->fl_type != F_UNLCK) {
+		/*
+		 * We need that spin_lock here - it prevents reordering between
+		 * update of i_flctx->flc_posix and check for it done in
+		 * close(). rcu_read_lock() wouldn't do.
+		 */
+		spin_lock(&current->files->file_lock);
+		f = fcheck(fd);
+		spin_unlock(&current->files->file_lock);
+		if (f != filp) {
+			file_lock->fl_type = F_UNLCK;
+			error = do_lock_file_wait(filp, cmd, file_lock);
+			WARN_ON_ONCE(error);
+			error = -EBADF;
+		}
 	}
-
 out:
 	locks_free_lock(file_lock);
 	return error;
@@ -2321,7 +2323,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 		goto out;
 	}
 
-again:
 	error = flock64_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
@@ -2363,14 +2364,22 @@ again:
 	 * Attempt to detect a close/fcntl race and recover by
 	 * releasing the lock that was just acquired.
 	 */
-	spin_lock(&current->files->file_lock);
-	f = fcheck(fd);
-	spin_unlock(&current->files->file_lock);
-	if (!error && f != filp && flock.l_type != F_UNLCK) {
-		flock.l_type = F_UNLCK;
-		goto again;
+	if (!error && file_lock->fl_type != F_UNLCK) {
+		/*
+		 * We need that spin_lock here - it prevents reordering between
+		 * update of i_flctx->flc_posix and check for it done in
+		 * close(). rcu_read_lock() wouldn't do.
+		 */
+		spin_lock(&current->files->file_lock);
+		f = fcheck(fd);
+		spin_unlock(&current->files->file_lock);
+		if (f != filp) {
+			file_lock->fl_type = F_UNLCK;
+			error = do_lock_file_wait(filp, cmd, file_lock);
+			WARN_ON_ONCE(error);
+			error = -EBADF;
+		}
 	}
-
 out:
 	locks_free_lock(file_lock);
 	return error;
-- 
cgit 


From 0752ba807b04ccd69cb4bc8bbf829a80ee208a3c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Fri, 8 Jan 2016 07:30:43 -0500
Subject: locks: don't check for race with close when setting OFD lock

We don't clean out OFD locks on close(), so there's no need to check
for a race with them here. They'll get cleaned out at the same time
that flock locks are.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
---
 fs/locks.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index c263aff793bc..e72077d5a664 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2219,10 +2219,12 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*
-	 * Attempt to detect a close/fcntl race and recover by
-	 * releasing the lock that was just acquired.
+	 * Attempt to detect a close/fcntl race and recover by releasing the
+	 * lock that was just acquired. There is no need to do that when we're
+	 * unlocking though, or for OFD locks.
 	 */
-	if (!error && file_lock->fl_type != F_UNLCK) {
+	if (!error && file_lock->fl_type != F_UNLCK &&
+	    !(file_lock->fl_flags & FL_OFDLCK)) {
 		/*
 		 * We need that spin_lock here - it prevents reordering between
 		 * update of i_flctx->flc_posix and check for it done in
@@ -2361,10 +2363,12 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*
-	 * Attempt to detect a close/fcntl race and recover by
-	 * releasing the lock that was just acquired.
+	 * Attempt to detect a close/fcntl race and recover by releasing the
+	 * lock that was just acquired. There is no need to do that when we're
+	 * unlocking though, or for OFD locks.
 	 */
-	if (!error && file_lock->fl_type != F_UNLCK) {
+	if (!error && file_lock->fl_type != F_UNLCK &&
+	    !(file_lock->fl_flags & FL_OFDLCK)) {
 		/*
 		 * We need that spin_lock here - it prevents reordering between
 		 * update of i_flctx->flc_posix and check for it done in
-- 
cgit 


From 1890910fd06fefbfa1cbeaf88357783914af71f6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Wed, 6 Jan 2016 21:26:10 -0500
Subject: locks: sprinkle some tracepoints around the file locking code

Add some tracepoints around the POSIX locking code. These were useful
when tracking down problems when handling the race between setlk and
close.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
---
 fs/locks.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index e72077d5a664..0af2387bd91e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -229,6 +229,7 @@ locks_get_lock_context(struct inode *inode, int type)
 		ctx = smp_load_acquire(&inode->i_flctx);
 	}
 out:
+	trace_locks_get_lock_context(inode, type, ctx);
 	return ctx;
 }
 
@@ -1141,6 +1142,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 	if (new_fl2)
 		locks_free_lock(new_fl2);
 	locks_dispose_list(&dispose);
+	trace_posix_lock_inode(inode, request, error);
+
 	return error;
 }
 
@@ -2164,6 +2167,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (file_lock == NULL)
 		return -ENOLCK;
 
+	inode = file_inode(filp);
+
 	/*
 	 * This might block, so we do it before checking the inode.
 	 */
@@ -2171,8 +2176,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 
-	inode = file_inode(filp);
-
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
 	 */
@@ -2241,6 +2244,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 		}
 	}
 out:
+	trace_fcntl_setlk(inode, file_lock, error);
 	locks_free_lock(file_lock);
 	return error;
 }
@@ -2397,6 +2401,7 @@ out:
  */
 void locks_remove_posix(struct file *filp, fl_owner_t owner)
 {
+	int error;
 	struct file_lock lock;
 	struct file_lock_context *ctx;
 
@@ -2419,10 +2424,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	lock.fl_ops = NULL;
 	lock.fl_lmops = NULL;
 
-	vfs_lock_file(filp, F_SETLK, &lock, NULL);
+	error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
 
 	if (lock.fl_ops && lock.fl_ops->fl_release_private)
 		lock.fl_ops->fl_release_private(&lock);
+	trace_locks_remove_posix(file_inode(filp), &lock, error);
 }
 
 EXPORT_SYMBOL(locks_remove_posix);
-- 
cgit 


From f27a0fe083bf46fef0d7045aa2e9a7e56e72d8d8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Thu, 7 Jan 2016 15:08:51 -0500
Subject: locks: pass inode pointer to locks_free_lock_context

...so we can print information about it if there are leaked locks.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
---
 fs/locks.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index 0af2387bd91e..ed9ab930d093 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -234,8 +234,10 @@ out:
 }
 
 void
-locks_free_lock_context(struct file_lock_context *ctx)
+locks_free_lock_context(struct inode *inode)
 {
+	struct file_lock_context *ctx = inode->i_flctx;
+
 	if (ctx) {
 		WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
 		WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
-- 
cgit 


From e24dadab08a2a38455434607f52b54a6dc990721 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Wed, 6 Jan 2016 21:28:41 -0500
Subject: locks: prink more detail when there are leaked locks

Right now, we just get WARN_ON_ONCE, which is not particularly helpful.
Have it dump some info about the locks and the inode to make it easier
to track down leaked locks in the future.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
---
 fs/locks.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index ed9ab930d093..ca272eb63c30 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -233,15 +233,40 @@ out:
 	return ctx;
 }
 
+static void
+locks_dump_ctx_list(struct list_head *list, char *list_type)
+{
+	struct file_lock *fl;
+
+	list_for_each_entry(fl, list, fl_list) {
+		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
+	}
+}
+
+static void
+locks_check_ctx_lists(struct inode *inode)
+{
+	struct file_lock_context *ctx = inode->i_flctx;
+
+	if (unlikely(!list_empty(&ctx->flc_flock) ||
+		     !list_empty(&ctx->flc_posix) ||
+		     !list_empty(&ctx->flc_lease))) {
+		pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
+			MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
+			inode->i_ino);
+		locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
+		locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
+		locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
+	}
+}
+
 void
 locks_free_lock_context(struct inode *inode)
 {
 	struct file_lock_context *ctx = inode->i_flctx;
 
-	if (ctx) {
-		WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
-		WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
-		WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
+	if (unlikely(ctx)) {
+		locks_check_ctx_lists(inode);
 		kmem_cache_free(flctx_cache, ctx);
 	}
 }
-- 
cgit 


From b4d629a39e104a8326d5b281ce07c21240c130c9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Thu, 7 Jan 2016 18:27:42 -0500
Subject: locks: rename __posix_lock_file to posix_lock_inode

...a more descriptive name and we can drop the double underscore prefix.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
---
 fs/locks.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index ca272eb63c30..a91f4ab00a90 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -961,7 +961,8 @@ out:
 	return error;
 }
 
-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+			    struct file_lock *conflock)
 {
 	struct file_lock *fl, *tmp;
 	struct file_lock *new_fl = NULL;
@@ -1191,7 +1192,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 int posix_lock_file(struct file *filp, struct file_lock *fl,
 			struct file_lock *conflock)
 {
-	return __posix_lock_file(file_inode(filp), fl, conflock);
+	return posix_lock_inode(file_inode(filp), fl, conflock);
 }
 EXPORT_SYMBOL(posix_lock_file);
 
@@ -1207,7 +1208,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 	int error;
 	might_sleep ();
 	for (;;) {
-		error = __posix_lock_file(inode, fl, NULL);
+		error = posix_lock_inode(inode, fl, NULL);
 		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1290,7 +1291,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 		if (filp) {
 			fl.fl_owner = filp;
 			fl.fl_flags &= ~FL_SLEEP;
-			error = __posix_lock_file(inode, &fl, NULL);
+			error = posix_lock_inode(inode, &fl, NULL);
 			if (!error)
 				break;
 		}
@@ -1298,7 +1299,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 		if (sleep)
 			fl.fl_flags |= FL_SLEEP;
 		fl.fl_owner = current->files;
-		error = __posix_lock_file(inode, &fl, NULL);
+		error = posix_lock_inode(inode, &fl, NULL);
 		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
-- 
cgit