From bfca7a6f0c75f5d97f5efc2c80cca55bdbf5f79a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 3 Mar 2023 07:15:57 -0500
Subject: lockd: purge resources held on behalf of nlm clients when shutting
 down

It's easily possible for the server to have an outstanding lock when we
go to shut down. When that happens, we often get a warning like this in
the kernel log:

    lockd: couldn't shutdown host module for net f0000000!

This is because the shutdown procedures skip removing any hosts that
still have outstanding resources (locks). Eventually, things seem to get
cleaned up anyway, but the log message is unsettling, and server
shutdown doesn't seem to be working the way it was intended.

Ensure that we tear down any resources held on behalf of a client when
tearing one down for server shutdown.

Reported-by: Yongcheng Yang <yoyang@redhat.com>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2063818
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/host.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/lockd')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index cdc8e12cdac4..127a728fcbc8 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -629,6 +629,7 @@ nlm_shutdown_hosts_net(struct net *net)
 			rpc_shutdown_client(host->h_rpcclnt);
 			host->h_rpcclnt = NULL;
 		}
+		nlmsvc_free_host_resources(host);
 	}
 
 	/* Then, perform a garbage collection pass */
-- 
cgit 


From f0aa4852e63f9c1cfd4322c770e69d7e6817e906 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 3 Mar 2023 07:15:59 -0500
Subject: lockd: move struct nlm_wait to lockd.h

The next patch needs struct nlm_wait in fs/lockd/clntproc.c, so move
the definition to a shared header file. As an added clean-up, drop
the unused b_reclaim field.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/clntlock.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'fs/lockd')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 82b19a30e0f0..464cb15c1a06 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -29,18 +29,6 @@ static int			reclaimer(void *ptr);
  * client perspective.
  */
 
-/*
- * This is the representation of a blocked client lock.
- */
-struct nlm_wait {
-	struct list_head	b_list;		/* linked list */
-	wait_queue_head_t	b_wait;		/* where to wait on */
-	struct nlm_host *	b_host;
-	struct file_lock *	b_lock;		/* local file lock */
-	unsigned short		b_reclaim;	/* got to reclaim lock */
-	__be32			b_status;	/* grant callback status */
-};
-
 static LIST_HEAD(nlm_blocked);
 static DEFINE_SPINLOCK(nlm_blocked_lock);
 
-- 
cgit 


From 2005f5b9c35bd736c81e9f24f5c5051967c022ee Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 3 Mar 2023 07:16:00 -0500
Subject: lockd: fix races in client GRANTED_MSG wait logic

After the wait for a grant is done (for whatever reason), nlmclnt_block
updates the status of the nlm_rqst with the status of the block. At the
point it does this, however, the block is still queued its status could
change at any time.

This is particularly a problem when the waiting task is signaled during
the wait. We can end up giving up on the lock just before the GRANTED_MSG
callback comes in, and accept it even though the lock request gets back
an error, leaving a dangling lock on the server.

Since the nlm_wait never lives beyond the end of nlmclnt_lock, put it on
the stack and add functions to allow us to enqueue and dequeue the
block. Enqueue it just before the lock/wait loop, and dequeue it
just after we exit the loop instead of waiting until the end of
the function. Also, scrape the status at the time that we dequeue it to
ensure that it's final.

Reported-by: Yongcheng Yang <yoyang@redhat.com>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2063818
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/clntlock.c | 42 +++++++++++++++++++++---------------------
 fs/lockd/clntproc.c | 28 ++++++++++++++++++----------
 2 files changed, 39 insertions(+), 31 deletions(-)

(limited to 'fs/lockd')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 464cb15c1a06..c374ee072db3 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -82,41 +82,42 @@ void nlmclnt_done(struct nlm_host *host)
 }
 EXPORT_SYMBOL_GPL(nlmclnt_done);
 
+void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct file_lock *fl)
+{
+	block->b_host = host;
+	block->b_lock = fl;
+	init_waitqueue_head(&block->b_wait);
+	block->b_status = nlm_lck_blocked;
+}
+
 /*
  * Queue up a lock for blocking so that the GRANTED request can see it
  */
-struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
+void nlmclnt_queue_block(struct nlm_wait *block)
 {
-	struct nlm_wait *block;
-
-	block = kmalloc(sizeof(*block), GFP_KERNEL);
-	if (block != NULL) {
-		block->b_host = host;
-		block->b_lock = fl;
-		init_waitqueue_head(&block->b_wait);
-		block->b_status = nlm_lck_blocked;
-
-		spin_lock(&nlm_blocked_lock);
-		list_add(&block->b_list, &nlm_blocked);
-		spin_unlock(&nlm_blocked_lock);
-	}
-	return block;
+	spin_lock(&nlm_blocked_lock);
+	list_add(&block->b_list, &nlm_blocked);
+	spin_unlock(&nlm_blocked_lock);
 }
 
-void nlmclnt_finish_block(struct nlm_wait *block)
+/*
+ * Dequeue the block and return its final status
+ */
+__be32 nlmclnt_dequeue_block(struct nlm_wait *block)
 {
-	if (block == NULL)
-		return;
+	__be32 status;
+
 	spin_lock(&nlm_blocked_lock);
 	list_del(&block->b_list);
+	status = block->b_status;
 	spin_unlock(&nlm_blocked_lock);
-	kfree(block);
+	return status;
 }
 
 /*
  * Block on a lock
  */
-int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
+int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 {
 	long ret;
 
@@ -142,7 +143,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 	/* Reset the lock status after a server reboot so we resend */
 	if (block->b_status == nlm_lck_denied_grace_period)
 		block->b_status = nlm_lck_blocked;
-	req->a_res.status = block->b_status;
 	return 0;
 }
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 16b4de868cd2..a14c9110719c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -516,9 +516,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	const struct cred *cred = nfs_file_cred(fl->fl_file);
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	struct nlm_wait *block = NULL;
+	struct nlm_wait block;
 	unsigned char fl_flags = fl->fl_flags;
 	unsigned char fl_type;
+	__be32 b_status;
 	int status = -ENOLCK;
 
 	if (nsm_monitor(host) < 0)
@@ -531,31 +532,41 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	if (status < 0)
 		goto out;
 
-	block = nlmclnt_prepare_block(host, fl);
+	nlmclnt_prepare_block(&block, host, fl);
 again:
 	/*
 	 * Initialise resp->status to a valid non-zero value,
 	 * since 0 == nlm_lck_granted
 	 */
 	resp->status = nlm_lck_blocked;
-	for(;;) {
+
+	/*
+	 * A GRANTED callback can come at any time -- even before the reply
+	 * to the LOCK request arrives, so we queue the wait before
+	 * requesting the lock.
+	 */
+	nlmclnt_queue_block(&block);
+	for (;;) {
 		/* Reboot protection */
 		fl->fl_u.nfs_fl.state = host->h_state;
 		status = nlmclnt_call(cred, req, NLMPROC_LOCK);
 		if (status < 0)
 			break;
 		/* Did a reclaimer thread notify us of a server reboot? */
-		if (resp->status ==  nlm_lck_denied_grace_period)
+		if (resp->status == nlm_lck_denied_grace_period)
 			continue;
 		if (resp->status != nlm_lck_blocked)
 			break;
 		/* Wait on an NLM blocking lock */
-		status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+		status = nlmclnt_wait(&block, req, NLMCLNT_POLL_TIMEOUT);
 		if (status < 0)
 			break;
-		if (resp->status != nlm_lck_blocked)
+		if (block.b_status != nlm_lck_blocked)
 			break;
 	}
+	b_status = nlmclnt_dequeue_block(&block);
+	if (resp->status == nlm_lck_blocked)
+		resp->status = b_status;
 
 	/* if we were interrupted while blocking, then cancel the lock request
 	 * and exit
@@ -564,7 +575,7 @@ again:
 		if (!req->a_args.block)
 			goto out_unlock;
 		if (nlmclnt_cancel(host, req->a_args.block, fl) == 0)
-			goto out_unblock;
+			goto out;
 	}
 
 	if (resp->status == nlm_granted) {
@@ -593,8 +604,6 @@ again:
 		status = -ENOLCK;
 	else
 		status = nlm_stat_to_errno(resp->status);
-out_unblock:
-	nlmclnt_finish_block(block);
 out:
 	nlmclnt_release_call(req);
 	return status;
@@ -602,7 +611,6 @@ out_unlock:
 	/* Fatal error: ensure that we remove the lock altogether */
 	dprintk("lockd: lock attempt ended in fatal error.\n"
 		"       Attempting to unlock.\n");
-	nlmclnt_finish_block(block);
 	fl_type = fl->fl_type;
 	fl->fl_type = F_UNLCK;
 	down_read(&host->h_rwsem);
-- 
cgit 


From 244cc19196d2f6691d34e8cd9bc34a55e4f778e5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 3 Mar 2023 07:16:01 -0500
Subject: lockd: server should unlock lock if client rejects the grant

Currently lockd just dequeues the block and ignores it if the client
sends a GRANT_RES with a status of nlm_lck_denied. That status is an
indicator that the client has rejected the lock, so the right thing to
do is to unlock the lock we were trying to grant.

Reported-by: Yongcheng Yang <yoyang@redhat.com>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2063818
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svclock.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'fs/lockd')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 4e30f3c50970..c43ccdf28ed9 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -954,19 +954,32 @@ void
 nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status)
 {
 	struct nlm_block	*block;
+	struct file_lock	*fl;
+	int			error;
 
 	dprintk("grant_reply: looking for cookie %x, s=%d \n",
 		*(unsigned int *)(cookie->data), status);
 	if (!(block = nlmsvc_find_block(cookie)))
 		return;
 
-	if (status == nlm_lck_denied_grace_period) {
+	switch (status) {
+	case nlm_lck_denied_grace_period:
 		/* Try again in a couple of seconds */
 		nlmsvc_insert_block(block, 10 * HZ);
-	} else {
+		break;
+	case nlm_lck_denied:
+		/* Client doesn't want it, just unlock it */
+		nlmsvc_unlink_block(block);
+		fl = &block->b_call->a_args.lock.fl;
+		fl->fl_type = F_UNLCK;
+		error = vfs_lock_file(fl->fl_file, F_SETLK, fl, NULL);
+		if (error)
+			pr_warn("lockd: unable to unlock lock rejected by client!\n");
+		break;
+	default:
 		/*
-		 * Lock is now held by client, or has been rejected.
-		 * In both cases, the block should be removed.
+		 * Either it was accepted or the status makes no sense
+		 * just unlink it either way.
 		 */
 		nlmsvc_unlink_block(block);
 	}
-- 
cgit 


From 2f90e18ffec47c265c14e313047189b79784bc0e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 3 Mar 2023 07:16:03 -0500
Subject: lockd: add some client-side tracepoints

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/Makefile   |   6 ++-
 fs/lockd/clntlock.c |   4 ++
 fs/lockd/clntproc.c |  14 +++++++
 fs/lockd/trace.c    |   3 ++
 fs/lockd/trace.h    | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 131 insertions(+), 2 deletions(-)
 create mode 100644 fs/lockd/trace.c
 create mode 100644 fs/lockd/trace.h

(limited to 'fs/lockd')

diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 6d5e83ed4476..ac9f9d84510e 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -3,10 +3,12 @@
 # Makefile for the linux lock manager stuff
 #
 
+ccflags-y += -I$(src)			# needed for trace events
+
 obj-$(CONFIG_LOCKD) += lockd.o
 
-lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
-	        svcshare.o svcproc.o svcsubs.o mon.o xdr.o
+lockd-objs-y += clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
+	        svcshare.o svcproc.o svcsubs.o mon.o trace.o xdr.o
 lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
 lockd-objs-$(CONFIG_PROC_FS) += procfs.o
 lockd-objs		      := $(lockd-objs-y)
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index c374ee072db3..e3972aa3045a 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -14,9 +14,12 @@
 #include <linux/nfs_fs.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
 #include <linux/lockd/lockd.h>
 #include <linux/kthread.h>
 
+#include "trace.h"
+
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 /*
@@ -186,6 +189,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
 		res = nlm_granted;
 	}
 	spin_unlock(&nlm_blocked_lock);
+	trace_nlmclnt_grant(lock, addr, svc_addr_len(addr), res);
 	return res;
 }
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a14c9110719c..fba6c7fa7474 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -20,6 +20,8 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
 
+#include "trace.h"
+
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 #define NLMCLNT_GRACE_WAIT	(5*HZ)
 #define NLMCLNT_POLL_TIMEOUT	(30*HZ)
@@ -451,6 +453,9 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 			status = nlm_stat_to_errno(req->a_res.status);
 	}
 out:
+	trace_nlmclnt_test(&req->a_args.lock,
+			   (const struct sockaddr *)&req->a_host->h_addr,
+			   req->a_host->h_addrlen, req->a_res.status);
 	nlmclnt_release_call(req);
 	return status;
 }
@@ -605,10 +610,16 @@ again:
 	else
 		status = nlm_stat_to_errno(resp->status);
 out:
+	trace_nlmclnt_lock(&req->a_args.lock,
+			   (const struct sockaddr *)&req->a_host->h_addr,
+			   req->a_host->h_addrlen, req->a_res.status);
 	nlmclnt_release_call(req);
 	return status;
 out_unlock:
 	/* Fatal error: ensure that we remove the lock altogether */
+	trace_nlmclnt_lock(&req->a_args.lock,
+			   (const struct sockaddr *)&req->a_host->h_addr,
+			   req->a_host->h_addrlen, req->a_res.status);
 	dprintk("lockd: lock attempt ended in fatal error.\n"
 		"       Attempting to unlock.\n");
 	fl_type = fl->fl_type;
@@ -704,6 +715,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	/* What to do now? I'm out of my depth... */
 	status = -ENOLCK;
 out:
+	trace_nlmclnt_unlock(&req->a_args.lock,
+			     (const struct sockaddr *)&req->a_host->h_addr,
+			     req->a_host->h_addrlen, req->a_res.status);
 	nlmclnt_release_call(req);
 	return status;
 }
diff --git a/fs/lockd/trace.c b/fs/lockd/trace.c
new file mode 100644
index 000000000000..d9a6ff6e673c
--- /dev/null
+++ b/fs/lockd/trace.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/fs/lockd/trace.h b/fs/lockd/trace.h
new file mode 100644
index 000000000000..7461b13b6e74
--- /dev/null
+++ b/fs/lockd/trace.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lockd
+
+#if !defined(_TRACE_LOCKD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCKD_H
+
+#include <linux/tracepoint.h>
+#include <linux/crc32.h>
+#include <linux/nfs.h>
+#include <linux/lockd/lockd.h>
+
+#ifdef CONFIG_LOCKD_V4
+#define NLM_STATUS_LIST					\
+	nlm_status_code(LCK_GRANTED)			\
+	nlm_status_code(LCK_DENIED)			\
+	nlm_status_code(LCK_DENIED_NOLOCKS)		\
+	nlm_status_code(LCK_BLOCKED)			\
+	nlm_status_code(LCK_DENIED_GRACE_PERIOD)	\
+	nlm_status_code(DEADLCK)			\
+	nlm_status_code(ROFS)				\
+	nlm_status_code(STALE_FH)			\
+	nlm_status_code(FBIG)				\
+	nlm_status_code_end(FAILED)
+#else
+#define NLM_STATUS_LIST					\
+	nlm_status_code(LCK_GRANTED)			\
+	nlm_status_code(LCK_DENIED)			\
+	nlm_status_code(LCK_DENIED_NOLOCKS)		\
+	nlm_status_code(LCK_BLOCKED)			\
+	nlm_status_code_end(LCK_DENIED_GRACE_PERIOD)
+#endif
+
+#undef nlm_status_code
+#undef nlm_status_code_end
+#define nlm_status_code(x)	TRACE_DEFINE_ENUM(NLM_##x);
+#define nlm_status_code_end(x)	TRACE_DEFINE_ENUM(NLM_##x);
+
+NLM_STATUS_LIST
+
+#undef nlm_status_code
+#undef nlm_status_code_end
+#define nlm_status_code(x)	{ NLM_##x, #x },
+#define nlm_status_code_end(x)	{ NLM_##x, #x }
+
+#define show_nlm_status(x)	__print_symbolic(x, NLM_STATUS_LIST)
+
+DECLARE_EVENT_CLASS(nlmclnt_lock_event,
+		TP_PROTO(
+			const struct nlm_lock *lock,
+			const struct sockaddr *addr,
+			unsigned int addrlen,
+			__be32 status
+		),
+
+		TP_ARGS(lock, addr, addrlen, status),
+
+		TP_STRUCT__entry(
+			__field(u32, oh)
+			__field(u32, svid)
+			__field(u32, fh)
+			__field(unsigned long, status)
+			__field(u64, start)
+			__field(u64, len)
+			__sockaddr(addr, addrlen)
+		),
+
+		TP_fast_assign(
+			__entry->oh = ~crc32_le(0xffffffff, lock->oh.data, lock->oh.len);
+			__entry->svid = lock->svid;
+			__entry->fh = nfs_fhandle_hash(&lock->fh);
+			__entry->start = lock->lock_start;
+			__entry->len = lock->lock_len;
+			__entry->status = be32_to_cpu(status);
+			__assign_sockaddr(addr, addr, addrlen);
+		),
+
+		TP_printk(
+			"addr=%pISpc oh=0x%08x svid=0x%08x fh=0x%08x start=%llu len=%llu status=%s",
+			__get_sockaddr(addr), __entry->oh, __entry->svid,
+			__entry->fh, __entry->start, __entry->len,
+			show_nlm_status(__entry->status)
+		)
+);
+
+#define DEFINE_NLMCLNT_EVENT(name)				\
+	DEFINE_EVENT(nlmclnt_lock_event, name,			\
+			TP_PROTO(				\
+				const struct nlm_lock *lock,	\
+				const struct sockaddr *addr,	\
+				unsigned int addrlen,		\
+				__be32	status			\
+			),					\
+			TP_ARGS(lock, addr, addrlen, status))
+
+DEFINE_NLMCLNT_EVENT(nlmclnt_test);
+DEFINE_NLMCLNT_EVENT(nlmclnt_lock);
+DEFINE_NLMCLNT_EVENT(nlmclnt_unlock);
+DEFINE_NLMCLNT_EVENT(nlmclnt_grant);
+
+#endif /* _TRACE_LOCKD_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
-- 
cgit