summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2021-11-29 15:51:25 +1100
committerChuck Lever <chuck.lever@oracle.com>2021-12-13 13:42:53 -0500
commit3409e4f1e8f239f0ed81be0b068ecf4e73e2e826 (patch)
tree17363ca6a0fbb8458c8f68ab6a409cc94bbbe5ec
parent9d3792aefdcda71d20c2b1ecc589c17ae71eb523 (diff)
NFSD: Make it possible to use svc_set_num_threads_sync
nfsd cannot currently use svc_set_num_threads_sync. It instead uses svc_set_num_threads which does *not* wait for threads to all exit, and has a separate mechanism (nfsd_shutdown_complete) to wait for completion. The reason that nfsd is unlike other services is that nfsd threads can exit separately from svc_set_num_threads being called - they die on receipt of SIGKILL. Also, when the last thread exits, the service must be shut down (sockets closed). For this, the nfsd_mutex needs to be taken, and as that mutex needs to be held while svc_set_num_threads is called, the one cannot wait for the other. This patch changes the nfsd thread so that it can drop the ref on the service without blocking on nfsd_mutex, so that svc_set_num_threads_sync can be used: - if it can drop a non-last reference, it does that. This does not trigger shutdown and does not require a mutex. This will likely happen for all but the last thread signalled, and for all threads being shut down by nfsd_shutdown_threads() - if it can get the mutex without blocking (trylock), it does that and then drops the reference. This will likely happen for the last thread killed by SIGKILL - Otherwise there might be an unrelated task holding the mutex, possibly in another network namespace, or nfsd_shutdown_threads() might be just about to get a reference on the service, after which we can drop ours safely. We cannot conveniently get wakeup notifications on these events, and we are unlikely to need to, so we sleep briefly and check again. With this we can discard nfsd_shutdown_complete and nfsd_complete_shutdown(), and switch to svc_set_num_threads_sync. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
-rw-r--r--fs/nfsd/netns.h3
-rw-r--r--fs/nfsd/nfssvc.c41
-rw-r--r--include/linux/sunrpc/svc.h13
3 files changed, 33 insertions, 24 deletions
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 08bcd8f23b01..1fd59eb0730b 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -134,9 +134,6 @@ struct nfsd_net {
wait_queue_head_t ntf_wq;
atomic_t ntf_refcnt;
- /* Allow umount to wait for nfsd state cleanup */
- struct completion nfsd_shutdown_complete;
-
/*
* clientid and stateid data for construction of net unique COPY
* stateids.
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 097abd8b059c..d0d9107a1b93 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -593,20 +593,10 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
.svo_shutdown = nfsd_last_thread,
.svo_function = nfsd,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads,
+ .svo_setup = svc_set_num_threads_sync,
.svo_module = THIS_MODULE,
};
-static void nfsd_complete_shutdown(struct net *net)
-{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- WARN_ON(!mutex_is_locked(&nfsd_mutex));
-
- nn->nfsd_serv = NULL;
- complete(&nn->nfsd_shutdown_complete);
-}
-
void nfsd_shutdown_threads(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -624,8 +614,6 @@ void nfsd_shutdown_threads(struct net *net)
serv->sv_ops->svo_setup(serv, NULL, 0);
nfsd_put(net);
mutex_unlock(&nfsd_mutex);
- /* Wait for shutdown of nfsd_serv to complete */
- wait_for_completion(&nn->nfsd_shutdown_complete);
}
bool i_am_nfsd(void)
@@ -650,7 +638,6 @@ int nfsd_create_serv(struct net *net)
&nfsd_thread_sv_ops);
if (nn->nfsd_serv == NULL)
return -ENOMEM;
- init_completion(&nn->nfsd_shutdown_complete);
nn->nfsd_serv->sv_maxconn = nn->max_connections;
error = svc_bind(nn->nfsd_serv, net);
@@ -659,7 +646,7 @@ int nfsd_create_serv(struct net *net)
* been set up yet.
*/
svc_put(nn->nfsd_serv);
- nfsd_complete_shutdown(net);
+ nn->nfsd_serv = NULL;
return error;
}
@@ -715,7 +702,7 @@ void nfsd_put(struct net *net)
if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
svc_shutdown_net(nn->nfsd_serv, net);
svc_destroy(&nn->nfsd_serv->sv_refcnt);
- nfsd_complete_shutdown(net);
+ nn->nfsd_serv = NULL;
}
}
@@ -743,7 +730,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
if (tot > NFSD_MAXSERVS) {
/* total too large: scale down requested numbers */
for (i = 0; i < n && tot > 0; i++) {
- int new = nthreads[i] * NFSD_MAXSERVS / tot;
+ int new = nthreads[i] * NFSD_MAXSERVS / tot;
tot -= (nthreads[i] - new);
nthreads[i] = new;
}
@@ -989,10 +976,22 @@ out:
/* Release the thread */
svc_exit_thread(rqstp);
- /* Now if needed we call svc_destroy in appropriate context */
- mutex_lock(&nfsd_mutex);
- nfsd_put(net);
- mutex_unlock(&nfsd_mutex);
+ /* We need to drop a ref, but may not drop the last reference
+ * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
+ * could deadlock with nfsd_shutdown_threads() waiting for us.
+ * So three options are:
+ * - drop a non-final reference,
+ * - get the mutex without waiting
+ * - sleep briefly andd try the above again
+ */
+ while (!svc_put_not_last(nn->nfsd_serv)) {
+ if (mutex_trylock(&nfsd_mutex)) {
+ nfsd_put(net);
+ mutex_unlock(&nfsd_mutex);
+ break;
+ }
+ msleep(20);
+ }
/* Release module */
module_put_and_exit(0);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3903b4ae8ac5..36bfc0281988 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -141,6 +141,19 @@ static inline void svc_put(struct svc_serv *serv)
kref_put(&serv->sv_refcnt, svc_destroy);
}
+/**
+ * svc_put_not_last - decrement non-final reference count on SUNRPC serv
+ * @serv: the svc_serv to have count decremented
+ *
+ * Returns: %true is refcount was decremented.
+ *
+ * If the refcount is 1, it is not decremented and instead failure is reported.
+ */
+static inline bool svc_put_not_last(struct svc_serv *serv)
+{
+ return refcount_dec_not_one(&serv->sv_refcnt.refcount);
+}
+
/*
* Maximum payload size supported by a kernel RPC server.
* This is use to determine the max number of pages nfsd is