From bfca7a6f0c75f5d97f5efc2c80cca55bdbf5f79a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Mar 2023 07:15:57 -0500 Subject: lockd: purge resources held on behalf of nlm clients when shutting down It's easily possible for the server to have an outstanding lock when we go to shut down. When that happens, we often get a warning like this in the kernel log: lockd: couldn't shutdown host module for net f0000000! This is because the shutdown procedures skip removing any hosts that still have outstanding resources (locks). Eventually, things seem to get cleaned up anyway, but the log message is unsettling, and server shutdown doesn't seem to be working the way it was intended. Ensure that we tear down any resources held on behalf of a client when tearing one down for server shutdown. Reported-by: Yongcheng Yang Link: https://bugzilla.redhat.com/show_bug.cgi?id=2063818 Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/lockd/host.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/lockd') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index cdc8e12cdac4..127a728fcbc8 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -629,6 +629,7 @@ nlm_shutdown_hosts_net(struct net *net) rpc_shutdown_client(host->h_rpcclnt); host->h_rpcclnt = NULL; } + nlmsvc_free_host_resources(host); } /* Then, perform a garbage collection pass */ -- cgit From f0aa4852e63f9c1cfd4322c770e69d7e6817e906 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Mar 2023 07:15:59 -0500 Subject: lockd: move struct nlm_wait to lockd.h The next patch needs struct nlm_wait in fs/lockd/clntproc.c, so move the definition to a shared header file. As an added clean-up, drop the unused b_reclaim field. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/lockd/clntlock.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'fs/lockd') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 82b19a30e0f0..464cb15c1a06 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -29,18 +29,6 @@ static int reclaimer(void *ptr); * client perspective. */ -/* - * This is the representation of a blocked client lock. - */ -struct nlm_wait { - struct list_head b_list; /* linked list */ - wait_queue_head_t b_wait; /* where to wait on */ - struct nlm_host * b_host; - struct file_lock * b_lock; /* local file lock */ - unsigned short b_reclaim; /* got to reclaim lock */ - __be32 b_status; /* grant callback status */ -}; - static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); -- cgit From 2005f5b9c35bd736c81e9f24f5c5051967c022ee Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Mar 2023 07:16:00 -0500 Subject: lockd: fix races in client GRANTED_MSG wait logic After the wait for a grant is done (for whatever reason), nlmclnt_block updates the status of the nlm_rqst with the status of the block. At the point it does this, however, the block is still queued its status could change at any time. This is particularly a problem when the waiting task is signaled during the wait. We can end up giving up on the lock just before the GRANTED_MSG callback comes in, and accept it even though the lock request gets back an error, leaving a dangling lock on the server. Since the nlm_wait never lives beyond the end of nlmclnt_lock, put it on the stack and add functions to allow us to enqueue and dequeue the block. Enqueue it just before the lock/wait loop, and dequeue it just after we exit the loop instead of waiting until the end of the function. Also, scrape the status at the time that we dequeue it to ensure that it's final. Reported-by: Yongcheng Yang Link: https://bugzilla.redhat.com/show_bug.cgi?id=2063818 Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/lockd/clntlock.c | 42 +++++++++++++++++++++--------------------- fs/lockd/clntproc.c | 28 ++++++++++++++++++---------- 2 files changed, 39 insertions(+), 31 deletions(-) (limited to 'fs/lockd') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 464cb15c1a06..c374ee072db3 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -82,41 +82,42 @@ void nlmclnt_done(struct nlm_host *host) } EXPORT_SYMBOL_GPL(nlmclnt_done); +void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct file_lock *fl) +{ + block->b_host = host; + block->b_lock = fl; + init_waitqueue_head(&block->b_wait); + block->b_status = nlm_lck_blocked; +} + /* * Queue up a lock for blocking so that the GRANTED request can see it */ -struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl) +void nlmclnt_queue_block(struct nlm_wait *block) { - struct nlm_wait *block; - - block = kmalloc(sizeof(*block), GFP_KERNEL); - if (block != NULL) { - block->b_host = host; - block->b_lock = fl; - init_waitqueue_head(&block->b_wait); - block->b_status = nlm_lck_blocked; - - spin_lock(&nlm_blocked_lock); - list_add(&block->b_list, &nlm_blocked); - spin_unlock(&nlm_blocked_lock); - } - return block; + spin_lock(&nlm_blocked_lock); + list_add(&block->b_list, &nlm_blocked); + spin_unlock(&nlm_blocked_lock); } -void nlmclnt_finish_block(struct nlm_wait *block) +/* + * Dequeue the block and return its final status + */ +__be32 nlmclnt_dequeue_block(struct nlm_wait *block) { - if (block == NULL) - return; + __be32 status; + spin_lock(&nlm_blocked_lock); list_del(&block->b_list); + status = block->b_status; spin_unlock(&nlm_blocked_lock); - kfree(block); + return status; } /* * Block on a lock */ -int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) +int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout) { long ret; @@ -142,7 +143,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) /* Reset the lock status after a server reboot so we resend */ if (block->b_status == nlm_lck_denied_grace_period) block->b_status = nlm_lck_blocked; - req->a_res.status = block->b_status; return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 16b4de868cd2..a14c9110719c 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -516,9 +516,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) const struct cred *cred = nfs_file_cred(fl->fl_file); struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; - struct nlm_wait *block = NULL; + struct nlm_wait block; unsigned char fl_flags = fl->fl_flags; unsigned char fl_type; + __be32 b_status; int status = -ENOLCK; if (nsm_monitor(host) < 0) @@ -531,31 +532,41 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) if (status < 0) goto out; - block = nlmclnt_prepare_block(host, fl); + nlmclnt_prepare_block(&block, host, fl); again: /* * Initialise resp->status to a valid non-zero value, * since 0 == nlm_lck_granted */ resp->status = nlm_lck_blocked; - for(;;) { + + /* + * A GRANTED callback can come at any time -- even before the reply + * to the LOCK request arrives, so we queue the wait before + * requesting the lock. + */ + nlmclnt_queue_block(&block); + for (;;) { /* Reboot protection */ fl->fl_u.nfs_fl.state = host->h_state; status = nlmclnt_call(cred, req, NLMPROC_LOCK); if (status < 0) break; /* Did a reclaimer thread notify us of a server reboot? */ - if (resp->status == nlm_lck_denied_grace_period) + if (resp->status == nlm_lck_denied_grace_period) continue; if (resp->status != nlm_lck_blocked) break; /* Wait on an NLM blocking lock */ - status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); + status = nlmclnt_wait(&block, req, NLMCLNT_POLL_TIMEOUT); if (status < 0) break; - if (resp->status != nlm_lck_blocked) + if (block.b_status != nlm_lck_blocked) break; } + b_status = nlmclnt_dequeue_block(&block); + if (resp->status == nlm_lck_blocked) + resp->status = b_status; /* if we were interrupted while blocking, then cancel the lock request * and exit @@ -564,7 +575,7 @@ again: if (!req->a_args.block) goto out_unlock; if (nlmclnt_cancel(host, req->a_args.block, fl) == 0) - goto out_unblock; + goto out; } if (resp->status == nlm_granted) { @@ -593,8 +604,6 @@ again: status = -ENOLCK; else status = nlm_stat_to_errno(resp->status); -out_unblock: - nlmclnt_finish_block(block); out: nlmclnt_release_call(req); return status; @@ -602,7 +611,6 @@ out_unlock: /* Fatal error: ensure that we remove the lock altogether */ dprintk("lockd: lock attempt ended in fatal error.\n" " Attempting to unlock.\n"); - nlmclnt_finish_block(block); fl_type = fl->fl_type; fl->fl_type = F_UNLCK; down_read(&host->h_rwsem); -- cgit From 244cc19196d2f6691d34e8cd9bc34a55e4f778e5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Mar 2023 07:16:01 -0500 Subject: lockd: server should unlock lock if client rejects the grant Currently lockd just dequeues the block and ignores it if the client sends a GRANT_RES with a status of nlm_lck_denied. That status is an indicator that the client has rejected the lock, so the right thing to do is to unlock the lock we were trying to grant. Reported-by: Yongcheng Yang Link: https://bugzilla.redhat.com/show_bug.cgi?id=2063818 Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/lockd/svclock.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'fs/lockd') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 4e30f3c50970..c43ccdf28ed9 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -954,19 +954,32 @@ void nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status) { struct nlm_block *block; + struct file_lock *fl; + int error; dprintk("grant_reply: looking for cookie %x, s=%d \n", *(unsigned int *)(cookie->data), status); if (!(block = nlmsvc_find_block(cookie))) return; - if (status == nlm_lck_denied_grace_period) { + switch (status) { + case nlm_lck_denied_grace_period: /* Try again in a couple of seconds */ nlmsvc_insert_block(block, 10 * HZ); - } else { + break; + case nlm_lck_denied: + /* Client doesn't want it, just unlock it */ + nlmsvc_unlink_block(block); + fl = &block->b_call->a_args.lock.fl; + fl->fl_type = F_UNLCK; + error = vfs_lock_file(fl->fl_file, F_SETLK, fl, NULL); + if (error) + pr_warn("lockd: unable to unlock lock rejected by client!\n"); + break; + default: /* - * Lock is now held by client, or has been rejected. - * In both cases, the block should be removed. + * Either it was accepted or the status makes no sense + * just unlink it either way. */ nlmsvc_unlink_block(block); } -- cgit From 2f90e18ffec47c265c14e313047189b79784bc0e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Mar 2023 07:16:03 -0500 Subject: lockd: add some client-side tracepoints Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/lockd/Makefile | 6 ++- fs/lockd/clntlock.c | 4 ++ fs/lockd/clntproc.c | 14 +++++++ fs/lockd/trace.c | 3 ++ fs/lockd/trace.h | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 fs/lockd/trace.c create mode 100644 fs/lockd/trace.h (limited to 'fs/lockd') diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile index 6d5e83ed4476..ac9f9d84510e 100644 --- a/fs/lockd/Makefile +++ b/fs/lockd/Makefile @@ -3,10 +3,12 @@ # Makefile for the linux lock manager stuff # +ccflags-y += -I$(src) # needed for trace events + obj-$(CONFIG_LOCKD) += lockd.o -lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ - svcshare.o svcproc.o svcsubs.o mon.o xdr.o +lockd-objs-y += clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ + svcshare.o svcproc.o svcsubs.o mon.o trace.o xdr.o lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o lockd-objs-$(CONFIG_PROC_FS) += procfs.o lockd-objs := $(lockd-objs-y) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index c374ee072db3..e3972aa3045a 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -14,9 +14,12 @@ #include #include #include +#include #include #include +#include "trace.h" + #define NLMDBG_FACILITY NLMDBG_CLIENT /* @@ -186,6 +189,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) res = nlm_granted; } spin_unlock(&nlm_blocked_lock); + trace_nlmclnt_grant(lock, addr, svc_addr_len(addr), res); return res; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a14c9110719c..fba6c7fa7474 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -20,6 +20,8 @@ #include #include +#include "trace.h" + #define NLMDBG_FACILITY NLMDBG_CLIENT #define NLMCLNT_GRACE_WAIT (5*HZ) #define NLMCLNT_POLL_TIMEOUT (30*HZ) @@ -451,6 +453,9 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl) status = nlm_stat_to_errno(req->a_res.status); } out: + trace_nlmclnt_test(&req->a_args.lock, + (const struct sockaddr *)&req->a_host->h_addr, + req->a_host->h_addrlen, req->a_res.status); nlmclnt_release_call(req); return status; } @@ -605,10 +610,16 @@ again: else status = nlm_stat_to_errno(resp->status); out: + trace_nlmclnt_lock(&req->a_args.lock, + (const struct sockaddr *)&req->a_host->h_addr, + req->a_host->h_addrlen, req->a_res.status); nlmclnt_release_call(req); return status; out_unlock: /* Fatal error: ensure that we remove the lock altogether */ + trace_nlmclnt_lock(&req->a_args.lock, + (const struct sockaddr *)&req->a_host->h_addr, + req->a_host->h_addrlen, req->a_res.status); dprintk("lockd: lock attempt ended in fatal error.\n" " Attempting to unlock.\n"); fl_type = fl->fl_type; @@ -704,6 +715,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) /* What to do now? I'm out of my depth... */ status = -ENOLCK; out: + trace_nlmclnt_unlock(&req->a_args.lock, + (const struct sockaddr *)&req->a_host->h_addr, + req->a_host->h_addrlen, req->a_res.status); nlmclnt_release_call(req); return status; } diff --git a/fs/lockd/trace.c b/fs/lockd/trace.c new file mode 100644 index 000000000000..d9a6ff6e673c --- /dev/null +++ b/fs/lockd/trace.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define CREATE_TRACE_POINTS +#include "trace.h" diff --git a/fs/lockd/trace.h b/fs/lockd/trace.h new file mode 100644 index 000000000000..7461b13b6e74 --- /dev/null +++ b/fs/lockd/trace.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lockd + +#if !defined(_TRACE_LOCKD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCKD_H + +#include +#include +#include +#include + +#ifdef CONFIG_LOCKD_V4 +#define NLM_STATUS_LIST \ + nlm_status_code(LCK_GRANTED) \ + nlm_status_code(LCK_DENIED) \ + nlm_status_code(LCK_DENIED_NOLOCKS) \ + nlm_status_code(LCK_BLOCKED) \ + nlm_status_code(LCK_DENIED_GRACE_PERIOD) \ + nlm_status_code(DEADLCK) \ + nlm_status_code(ROFS) \ + nlm_status_code(STALE_FH) \ + nlm_status_code(FBIG) \ + nlm_status_code_end(FAILED) +#else +#define NLM_STATUS_LIST \ + nlm_status_code(LCK_GRANTED) \ + nlm_status_code(LCK_DENIED) \ + nlm_status_code(LCK_DENIED_NOLOCKS) \ + nlm_status_code(LCK_BLOCKED) \ + nlm_status_code_end(LCK_DENIED_GRACE_PERIOD) +#endif + +#undef nlm_status_code +#undef nlm_status_code_end +#define nlm_status_code(x) TRACE_DEFINE_ENUM(NLM_##x); +#define nlm_status_code_end(x) TRACE_DEFINE_ENUM(NLM_##x); + +NLM_STATUS_LIST + +#undef nlm_status_code +#undef nlm_status_code_end +#define nlm_status_code(x) { NLM_##x, #x }, +#define nlm_status_code_end(x) { NLM_##x, #x } + +#define show_nlm_status(x) __print_symbolic(x, NLM_STATUS_LIST) + +DECLARE_EVENT_CLASS(nlmclnt_lock_event, + TP_PROTO( + const struct nlm_lock *lock, + const struct sockaddr *addr, + unsigned int addrlen, + __be32 status + ), + + TP_ARGS(lock, addr, addrlen, status), + + TP_STRUCT__entry( + __field(u32, oh) + __field(u32, svid) + __field(u32, fh) + __field(unsigned long, status) + __field(u64, start) + __field(u64, len) + __sockaddr(addr, addrlen) + ), + + TP_fast_assign( + __entry->oh = ~crc32_le(0xffffffff, lock->oh.data, lock->oh.len); + __entry->svid = lock->svid; + __entry->fh = nfs_fhandle_hash(&lock->fh); + __entry->start = lock->lock_start; + __entry->len = lock->lock_len; + __entry->status = be32_to_cpu(status); + __assign_sockaddr(addr, addr, addrlen); + ), + + TP_printk( + "addr=%pISpc oh=0x%08x svid=0x%08x fh=0x%08x start=%llu len=%llu status=%s", + __get_sockaddr(addr), __entry->oh, __entry->svid, + __entry->fh, __entry->start, __entry->len, + show_nlm_status(__entry->status) + ) +); + +#define DEFINE_NLMCLNT_EVENT(name) \ + DEFINE_EVENT(nlmclnt_lock_event, name, \ + TP_PROTO( \ + const struct nlm_lock *lock, \ + const struct sockaddr *addr, \ + unsigned int addrlen, \ + __be32 status \ + ), \ + TP_ARGS(lock, addr, addrlen, status)) + +DEFINE_NLMCLNT_EVENT(nlmclnt_test); +DEFINE_NLMCLNT_EVENT(nlmclnt_lock); +DEFINE_NLMCLNT_EVENT(nlmclnt_unlock); +DEFINE_NLMCLNT_EVENT(nlmclnt_grant); + +#endif /* _TRACE_LOCKD_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include -- cgit