diff options
| -rw-r--r-- | CREDITS | 5 | ||||
| -rw-r--r-- | Documentation/filesystems/nfs/00-INDEX | 2 | ||||
| -rw-r--r-- | Documentation/filesystems/nfs/fault_injection.txt | 69 | ||||
| -rw-r--r-- | MAINTAINERS | 1 | ||||
| -rw-r--r-- | fs/nfsd/Kconfig | 10 | ||||
| -rw-r--r-- | fs/nfsd/Makefile | 1 | ||||
| -rw-r--r-- | fs/nfsd/export.c | 12 | ||||
| -rw-r--r-- | fs/nfsd/fault_inject.c | 91 | ||||
| -rw-r--r-- | fs/nfsd/fault_inject.h | 28 | ||||
| -rw-r--r-- | fs/nfsd/nfs4idmap.c | 11 | ||||
| -rw-r--r-- | fs/nfsd/nfs4proc.c | 7 | ||||
| -rw-r--r-- | fs/nfsd/nfs4recover.c | 22 | ||||
| -rw-r--r-- | fs/nfsd/nfs4state.c | 328 | ||||
| -rw-r--r-- | fs/nfsd/nfs4xdr.c | 3 | ||||
| -rw-r--r-- | fs/nfsd/nfsctl.c | 10 | ||||
| -rw-r--r-- | fs/nfsd/nfsd.h | 20 | ||||
| -rw-r--r-- | fs/nfsd/state.h | 3 | ||||
| -rw-r--r-- | fs/nfsd/vfs.c | 17 | ||||
| -rw-r--r-- | include/linux/sunrpc/svc_xprt.h | 3 | ||||
| -rw-r--r-- | include/linux/sunrpc/svcsock.h | 2 | ||||
| -rw-r--r-- | net/sunrpc/cache.c | 2 | ||||
| -rw-r--r-- | net/sunrpc/svc.c | 25 | ||||
| -rw-r--r-- | net/sunrpc/svc_xprt.c | 62 | ||||
| -rw-r--r-- | net/sunrpc/svcsock.c | 8 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 | ||||
| -rwxr-xr-x | tools/nfsd/inject_fault.sh | 49 | 
26 files changed, 624 insertions, 169 deletions
@@ -514,6 +514,11 @@ S: Bessemerstraat 21  S: Amsterdam  S: The Netherlands +N: NeilBrown +E: neil@brown.name +P: 4096R/566281B9 1BC6 29EB D390 D870 7B5F  497A 39EC 9EDD 5662 81B9 +D: NFSD Maintainer 2000-2007 +  N: Zach Brown  E: zab@zabbo.net  D: maestro pci sound diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index a57e12411d2a..1716874a651e 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX @@ -2,6 +2,8 @@  	- this file (nfs-related documentation).  Exporting  	- explanation of how to make filesystems exportable. +fault_injection.txt +	- information for using fault injection on the server  knfsd-stats.txt  	- statistics which the NFS server makes available to user space.  nfs.txt diff --git a/Documentation/filesystems/nfs/fault_injection.txt b/Documentation/filesystems/nfs/fault_injection.txt new file mode 100644 index 000000000000..426d166089a3 --- /dev/null +++ b/Documentation/filesystems/nfs/fault_injection.txt @@ -0,0 +1,69 @@ + +Fault Injection +=============== +Fault injection is a method for forcing errors that may not normally occur, or +may be difficult to reproduce.  Forcing these errors in a controlled environment +can help the developer find and fix bugs before their code is shipped in a +production system.  Injecting an error on the Linux NFS server will allow us to +observe how the client reacts and if it manages to recover its state correctly. + +NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this +feature. + + +Using Fault Injection +===================== +On the client, mount the fault injection server through NFS v4.0+ and do some +work over NFS (open files, take locks, ...). + +On the server, mount the debugfs filesystem to <debug_dir> and ls +<debug_dir>/nfsd.  This will show a list of files that will be used for +injecting faults on the NFS server.  As root, write a number n to the file +corresponding to the action you want the server to take.  The server will then +process the first n items it finds.  So if you want to forget 5 locks, echo '5' +to <debug_dir>/nfsd/forget_locks.  A value of 0 will tell the server to forget +all corresponding items.  A log message will be created containing the number +of items forgotten (check dmesg). + +Go back to work on the client and check if the client recovered from the error +correctly. + + +Available Faults +================ +forget_clients: +     The NFS server keeps a list of clients that have placed a mount call.  If +     this list is cleared, the server will have no knowledge of who the client +     is, forcing the client to reauthenticate with the server. + +forget_openowners: +     The NFS server keeps a list of what files are currently opened and who +     they were opened by.  Clearing this list will force the client to reopen +     its files. + +forget_locks: +     The NFS server keeps a list of what files are currently locked in the VFS. +     Clearing this list will force the client to reclaim its locks (files are +     unlocked through the VFS as they are cleared from this list). + +forget_delegations: +     A delegation is used to assure the client that a file, or part of a file, +     has not changed since the delegation was awarded.  Clearing this list will +     force the client to reaquire its delegation before accessing the file +     again. + +recall_delegations: +     Delegations can be recalled by the server when another client attempts to +     access a file.  This test will notify the client that its delegation has +     been revoked, forcing the client to reaquire the delegation before using +     the file again. + + +tools/nfs/inject_faults.sh script +================================= +This script has been created to ease the fault injection process.  This script +will detect the mounted debugfs directory and write to the files located there +based on the arguments passed by the user.  For example, running +`inject_faults.sh forget_locks 1` as root will instruct the server to forget +one lock.  Running `inject_faults forget_locks` will instruct the server to +forgetall locks. diff --git a/MAINTAINERS b/MAINTAINERS index 7559c1ca56ba..4d1ba2022a95 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3775,7 +3775,6 @@ S:	Odd Fixes  KERNEL NFSD, SUNRPC, AND LOCKD SERVERS  M:	"J. Bruce Fields" <bfields@fieldses.org> -M:	Neil Brown <neilb@suse.de>  L:	linux-nfs@vger.kernel.org  W:	http://nfs.sourceforge.net/  S:	Supported diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 10e6366608f2..8df1ea4a6ff9 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -80,3 +80,13 @@ config NFSD_V4  	  available from http://linux-nfs.org/.  	  If unsure, say N. + +config NFSD_FAULT_INJECTION +	bool "NFS server manual fault injection" +	depends on NFSD_V4 && DEBUG_KERNEL +	help +	  This option enables support for manually injecting faults +	  into the NFS server.  This is intended to be used for +	  testing error recovery on the NFS client. + +	  If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 9b118ee20193..af32ef06b4fe 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD)	+= nfsd.o  nfsd-y 			:= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \  			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o  nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o  nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 62f3b9074e84..cf8a6bd062fa 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)  	struct svc_expkey key;  	struct svc_expkey *ek = NULL; -	if (mesg[mlen-1] != '\n') +	if (mlen < 1 || mesg[mlen-1] != '\n')  		return -EINVAL;  	mesg[mlen-1] = 0; @@ -1226,12 +1226,12 @@ nfsd_export_init(void)  	int rv;  	dprintk("nfsd: initializing export module.\n"); -	rv = cache_register(&svc_export_cache); +	rv = cache_register_net(&svc_export_cache, &init_net);  	if (rv)  		return rv; -	rv = cache_register(&svc_expkey_cache); +	rv = cache_register_net(&svc_expkey_cache, &init_net);  	if (rv) -		cache_unregister(&svc_export_cache); +		cache_unregister_net(&svc_export_cache, &init_net);  	return rv;  } @@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void)  	dprintk("nfsd: shutting down export module.\n"); -	cache_unregister(&svc_expkey_cache); -	cache_unregister(&svc_export_cache); +	cache_unregister_net(&svc_expkey_cache, &init_net); +	cache_unregister_net(&svc_export_cache, &init_net);  	svcauth_unix_purge();  	dprintk("nfsd: export shutdown complete.\n"); diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c new file mode 100644 index 000000000000..ce7f0758d84c --- /dev/null +++ b/fs/nfsd/fault_inject.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> + * + * Uses debugfs to create fault injection points for client testing + */ + +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <linux/module.h> + +#include "state.h" +#include "fault_inject.h" + +struct nfsd_fault_inject_op { +	char *file; +	void (*func)(u64); +}; + +static struct nfsd_fault_inject_op inject_ops[] = { +	{ +		.file   = "forget_clients", +		.func   = nfsd_forget_clients, +	}, +	{ +		.file   = "forget_locks", +		.func   = nfsd_forget_locks, +	}, +	{ +		.file   = "forget_openowners", +		.func   = nfsd_forget_openowners, +	}, +	{ +		.file   = "forget_delegations", +		.func   = nfsd_forget_delegations, +	}, +	{ +		.file   = "recall_delegations", +		.func   = nfsd_recall_delegations, +	}, +}; + +static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); +static struct dentry *debug_dir; + +static int nfsd_inject_set(void *op_ptr, u64 val) +{ +	struct nfsd_fault_inject_op *op = op_ptr; + +	if (val == 0) +		printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); +	else +		printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); + +	op->func(val); +	return 0; +} + +static int nfsd_inject_get(void *data, u64 *val) +{ +	return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n"); + +void nfsd_fault_inject_cleanup(void) +{ +	debugfs_remove_recursive(debug_dir); +} + +int nfsd_fault_inject_init(void) +{ +	unsigned int i; +	struct nfsd_fault_inject_op *op; +	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + +	debug_dir = debugfs_create_dir("nfsd", NULL); +	if (!debug_dir) +		goto fail; + +	for (i = 0; i < NUM_INJECT_OPS; i++) { +		op = &inject_ops[i]; +		if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) +			goto fail; +	} +	return 0; + +fail: +	nfsd_fault_inject_cleanup(); +	return -ENOMEM; +} diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h new file mode 100644 index 000000000000..90bd0570956c --- /dev/null +++ b/fs/nfsd/fault_inject.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> + * + * Function definitions for fault injection + */ + +#ifndef LINUX_NFSD_FAULT_INJECT_H +#define LINUX_NFSD_FAULT_INJECT_H + +#ifdef CONFIG_NFSD_FAULT_INJECTION +int nfsd_fault_inject_init(void); +void nfsd_fault_inject_cleanup(void); +void nfsd_forget_clients(u64); +void nfsd_forget_locks(u64); +void nfsd_forget_openowners(u64); +void nfsd_forget_delegations(u64); +void nfsd_recall_delegations(u64); +#else /* CONFIG_NFSD_FAULT_INJECTION */ +static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_cleanup(void) {} +static inline void nfsd_forget_clients(u64 num) {} +static inline void nfsd_forget_locks(u64 num) {} +static inline void nfsd_forget_openowners(u64 num) {} +static inline void nfsd_forget_delegations(u64 num) {} +static inline void nfsd_recall_delegations(u64 num) {} +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + +#endif /* LINUX_NFSD_FAULT_INJECT_H */ diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 55780a22fdbd..94096273cd6c 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -36,6 +36,7 @@  #include <linux/seq_file.h>  #include <linux/sched.h>  #include <linux/slab.h> +#include <net/net_namespace.h>  #include "idmap.h"  #include "nfsd.h" @@ -466,20 +467,20 @@ nfsd_idmap_init(void)  {  	int rv; -	rv = cache_register(&idtoname_cache); +	rv = cache_register_net(&idtoname_cache, &init_net);  	if (rv)  		return rv; -	rv = cache_register(&nametoid_cache); +	rv = cache_register_net(&nametoid_cache, &init_net);  	if (rv) -		cache_unregister(&idtoname_cache); +		cache_unregister_net(&idtoname_cache, &init_net);  	return rv;  }  void  nfsd_idmap_shutdown(void)  { -	cache_unregister(&idtoname_cache); -	cache_unregister(&nametoid_cache); +	cache_unregister_net(&idtoname_cache, &init_net); +	cache_unregister_net(&nametoid_cache, &init_net);  }  static int diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c5e28ed8bca0..896da74ec563 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_  {  	__be32 status; -	/* Only reclaims from previously confirmed clients are valid */ -	if ((status = nfs4_check_open_reclaim(&open->op_clientid))) -		return status; -  	/* We don't know the target directory, and therefore can not  	* set the change info  	*/ @@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  			break;  		case NFS4_OPEN_CLAIM_PREVIOUS:  			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; +			status = nfs4_check_open_reclaim(&open->op_clientid); +			if (status) +				goto out;  		case NFS4_OPEN_CLAIM_FH:  		case NFS4_OPEN_CLAIM_DELEG_CUR_FH:  			status = do_open_fhandle(rqstp, &cstate->current_fh, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 80a0be9ed008..0b3e875d1abd 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -117,8 +117,7 @@ out_no_tfm:  	return status;  } -int -nfsd4_create_clid_dir(struct nfs4_client *clp) +void nfsd4_create_clid_dir(struct nfs4_client *clp)  {  	const struct cred *original_cred;  	char *dname = clp->cl_recdir; @@ -127,13 +126,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)  	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); -	if (!rec_file || clp->cl_firststate) -		return 0; - +	if (clp->cl_firststate) +		return;  	clp->cl_firststate = 1; +	if (!rec_file) +		return;  	status = nfs4_save_creds(&original_cred);  	if (status < 0) -		return status; +		return;  	dir = rec_file->f_path.dentry;  	/* lock the parent */ @@ -144,8 +144,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)  		status = PTR_ERR(dentry);  		goto out_unlock;  	} -	status = -EEXIST;  	if (dentry->d_inode) +		/* +		 * In the 4.1 case, where we're called from +		 * reclaim_complete(), records from the previous reboot +		 * may still be left, so this is OK. +		 * +		 * In the 4.0 case, we should never get here; but we may +		 * as well be forgiving and just succeed silently. +		 */  		goto out_put;  	status = mnt_want_write_file(rec_file);  	if (status) @@ -164,7 +171,6 @@ out_unlock:  				" and is writeable", status,  				user_recovery_dirname);  	nfs4_reset_creds(original_cred); -	return status;  }  typedef int (recdir_func)(struct dentry *, struct dentry *); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9ca16dc09e04..e8c98f009670 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -49,12 +49,20 @@  time_t nfsd4_lease = 90;     /* default lease time */  time_t nfsd4_grace = 90;  static time_t boot_time; -static stateid_t zerostateid;             /* bits all 0 */ -static stateid_t onestateid;              /* bits all 1 */ + +#define all_ones {{~0,~0},~0} +static const stateid_t one_stateid = { +	.si_generation = ~0, +	.si_opaque = all_ones, +}; +static const stateid_t zero_stateid = { +	/* all fields zero */ +}; +  static u64 current_sessionid = 1; -#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) -#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t))) +#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) +#define ONE_STATEID(stateid)  (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))  /* forward declarations */  static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); @@ -133,21 +141,21 @@ unsigned int max_delegations;   * Open owner state (share locks)   */ -/* hash tables for open owners */ -#define OPEN_OWNER_HASH_BITS              8 -#define OPEN_OWNER_HASH_SIZE             (1 << OPEN_OWNER_HASH_BITS) -#define OPEN_OWNER_HASH_MASK             (OPEN_OWNER_HASH_SIZE - 1) +/* hash tables for lock and open owners */ +#define OWNER_HASH_BITS              8 +#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS) +#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1) -static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)  {  	unsigned int ret;  	ret = opaque_hashval(ownername->data, ownername->len);  	ret += clientid; -	return ret & OPEN_OWNER_HASH_MASK; +	return ret & OWNER_HASH_MASK;  } -static struct list_head	open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE]; +static struct list_head	ownerstr_hashtbl[OWNER_HASH_SIZE];  /* hash table for nfs4_file */  #define FILE_HASH_BITS                   8 @@ -514,6 +522,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)  	list_del(&lo->lo_owner.so_strhash);  	list_del(&lo->lo_perstateid); +	list_del(&lo->lo_owner_ino_hash);  	while (!list_empty(&lo->lo_owner.so_stateids)) {  		stp = list_first_entry(&lo->lo_owner.so_stateids,  				struct nfs4_ol_stateid, st_perstateowner); @@ -985,12 +994,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)  	clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);  	if (clp == NULL)  		return NULL; -	clp->cl_name.data = kmalloc(name.len, GFP_KERNEL); +	clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);  	if (clp->cl_name.data == NULL) {  		kfree(clp);  		return NULL;  	} -	memcpy(clp->cl_name.data, name.data, name.len);  	clp->cl_name.len = name.len;  	return clp;  } @@ -1058,7 +1066,6 @@ expire_client(struct nfs4_client *clp)  	spin_unlock(&recall_lock);  	while (!list_empty(&reaplist)) {  		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); -		list_del_init(&dp->dl_recall_lru);  		unhash_delegation(dp);  	}  	while (!list_empty(&clp->cl_openowners)) { @@ -2301,7 +2308,7 @@ nfsd4_free_slabs(void)  	nfsd4_free_slab(&deleg_slab);  } -static int +int  nfsd4_init_slabs(void)  {  	openowner_slab = kmem_cache_create("nfsd4_openowners", @@ -2373,7 +2380,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj  static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)  { -	list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]); +	list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);  	list_add(&oo->oo_perclient, &clp->cl_openowners);  } @@ -2436,7 +2443,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)  	struct nfs4_stateowner *so;  	struct nfs4_openowner *oo; -	list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) { +	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { +		if (!so->so_is_open_owner) +			continue;  		if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {  			oo = openowner(so);  			renew_client(oo->oo_owner.so_client); @@ -2580,7 +2589,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,  	if (open->op_file == NULL)  		return nfserr_jukebox; -	strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner); +	strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);  	oo = find_openstateowner_str(strhashval, open);  	open->op_openowner = oo;  	if (!oo) { @@ -3123,7 +3132,6 @@ nfs4_laundromat(void)  	spin_unlock(&recall_lock);  	list_for_each_safe(pos, next, &reaplist) {  		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); -		list_del_init(&dp->dl_recall_lru);  		unhash_delegation(dp);  	}  	test_val = nfsd4_lease; @@ -3718,13 +3726,11 @@ out:  } -/*  - * Lock owner state (byte-range locks) - */  #define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start)) -#define LOCK_HASH_BITS              8 -#define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS) -#define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1) + +#define LOCKOWNER_INO_HASH_BITS 8 +#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) +#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)  static inline u64  end_offset(u64 start, u64 len) @@ -3746,16 +3752,14 @@ last_byte_offset(u64 start, u64 len)  	return end > start ? end - 1: NFS4_MAX_UINT64;  } -static inline unsigned int -lock_ownerstr_hashval(struct inode *inode, u32 cl_id, -		struct xdr_netobj *ownername) +static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)  {  	return (file_hashval(inode) + cl_id  			+ opaque_hashval(ownername->data, ownername->len)) -		& LOCK_HASH_MASK; +		& LOCKOWNER_INO_HASH_MASK;  } -static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; +static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];  /*   * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that @@ -3809,23 +3813,39 @@ nevermind:  		deny->ld_type = NFS4_WRITE_LT;  } +static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +{ +	struct nfs4_ol_stateid *lst; + +	if (!same_owner_str(&lo->lo_owner, owner, clid)) +		return false; +	lst = list_first_entry(&lo->lo_owner.so_stateids, +			       struct nfs4_ol_stateid, st_perstateowner); +	return lst->st_file->fi_inode == inode; +} +  static struct nfs4_lockowner *  find_lockowner_str(struct inode *inode, clientid_t *clid,  		struct xdr_netobj *owner)  { -	unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); -	struct nfs4_stateowner *op; +	unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); +	struct nfs4_lockowner *lo; -	list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { -		if (same_owner_str(op, owner, clid)) -			return lockowner(op); +	list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { +		if (same_lockowner_ino(lo, inode, clid, owner)) +			return lo;  	}  	return NULL;  }  static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)  { -	list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]); +	struct inode *inode = open_stp->st_file->fi_inode; +	unsigned int inohash = lockowner_ino_hashval(inode, +			clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); + +	list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); +	list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);  	list_add(&lo->lo_perstateid, &open_stp->st_lockowners);  } @@ -3834,7 +3854,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s   * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has    * occurred.    * - * strhashval = lock_ownerstr_hashval  + * strhashval = ownerstr_hashval   */  static struct nfs4_lockowner * @@ -3892,6 +3912,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)  	__set_bit(access, &lock_stp->st_access_bmap);  } +__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +{ +	struct nfs4_file *fi = ost->st_file; +	struct nfs4_openowner *oo = openowner(ost->st_stateowner); +	struct nfs4_client *cl = oo->oo_owner.so_client; +	struct nfs4_lockowner *lo; +	unsigned int strhashval; + +	lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner); +	if (lo) { +		if (!cstate->minorversion) +			return nfserr_bad_seqid; +		/* XXX: a lockowner always has exactly one stateid: */ +		*lst = list_first_entry(&lo->lo_owner.so_stateids, +				struct nfs4_ol_stateid, st_perstateowner); +		return nfs_ok; +	} +	strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, +			&lock->v.new.owner); +	lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); +	if (lo == NULL) +		return nfserr_jukebox; +	*lst = alloc_init_lock_stateid(lo, fi, ost); +	if (*lst == NULL) { +		release_lockowner(lo); +		return nfserr_jukebox; +	} +	*new = true; +	return nfs_ok; +} +  /*   *  LOCK operation    */ @@ -3907,7 +3958,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	struct file_lock file_lock;  	struct file_lock conflock;  	__be32 status = 0; -	unsigned int strhashval; +	bool new_state = false;  	int lkflg;  	int err; @@ -3933,10 +3984,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  		 * lock stateid.  		 */  		struct nfs4_ol_stateid *open_stp = NULL; -		 + +		if (nfsd4_has_session(cstate)) +			/* See rfc 5661 18.10.3: given clientid is ignored: */ +			memcpy(&lock->v.new.clientid, +				&cstate->session->se_client->cl_clientid, +				sizeof(clientid_t)); +  		status = nfserr_stale_clientid; -		if (!nfsd4_has_session(cstate) && -		    STALE_CLIENTID(&lock->lk_new_clientid)) +		if (STALE_CLIENTID(&lock->lk_new_clientid))  			goto out;  		/* validate and update open stateid and open seqid */ @@ -3948,25 +4004,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  			goto out;  		open_sop = openowner(open_stp->st_stateowner);  		status = nfserr_bad_stateid; -		if (!nfsd4_has_session(cstate) && -			!same_clid(&open_sop->oo_owner.so_client->cl_clientid, +		if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,  						&lock->v.new.clientid))  			goto out; -		/* create lockowner and lock stateid */ -		fp = open_stp->st_file; -		strhashval = lock_ownerstr_hashval(fp->fi_inode, -				open_sop->oo_owner.so_client->cl_clientid.cl_id, -				&lock->v.new.owner); -		/* XXX: Do we need to check for duplicate stateowners on -		 * the same file, or should they just be allowed (and -		 * create new stateids)? */ -		status = nfserr_jukebox; -		lock_sop = alloc_init_lock_stateowner(strhashval, -				open_sop->oo_owner.so_client, open_stp, lock); -		if (lock_sop == NULL) -			goto out; -		lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); -		if (lock_stp == NULL) +		status = lookup_or_create_lock_state(cstate, open_stp, lock, +							&lock_stp, &new_state); +		if (status)  			goto out;  	} else {  		/* lock (lock owner + lock stateid) already exists */ @@ -3976,10 +4019,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  				       NFS4_LOCK_STID, &lock_stp);  		if (status)  			goto out; -		lock_sop = lockowner(lock_stp->st_stateowner); -		fp = lock_stp->st_file;  	} -	/* lock_sop and lock_stp have been created or found */ +	lock_sop = lockowner(lock_stp->st_stateowner); +	fp = lock_stp->st_file;  	lkflg = setlkflg(lock->lk_type);  	status = nfs4_check_openmode(lock_stp, lkflg); @@ -4054,7 +4096,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  		break;  	}  out: -	if (status && lock->lk_is_new && lock_sop) +	if (status && new_state)  		release_lockowner(lock_sop);  	if (!cstate->replay_owner)  		nfs4_unlock_state(); @@ -4251,7 +4293,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,  	struct nfs4_ol_stateid *stp;  	struct xdr_netobj *owner = &rlockowner->rl_owner;  	struct list_head matches; -	int i; +	unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);  	__be32 status;  	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", @@ -4266,22 +4308,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,  	nfs4_lock_state();  	status = nfserr_locks_held; -	/* XXX: we're doing a linear search through all the lockowners. -	 * Yipes!  For now we'll just hope clients aren't really using -	 * release_lockowner much, but eventually we have to fix these -	 * data structures. */  	INIT_LIST_HEAD(&matches); -	for (i = 0; i < LOCK_HASH_SIZE; i++) { -		list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) { -			if (!same_owner_str(sop, owner, clid)) -				continue; -			list_for_each_entry(stp, &sop->so_stateids, -					st_perstateowner) { -				lo = lockowner(sop); -				if (check_for_locks(stp->st_file, lo)) -					goto out; -				list_add(&lo->lo_list, &matches); -			} + +	list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) { +		if (sop->so_is_open_owner) +			continue; +		if (!same_owner_str(sop, owner, clid)) +			continue; +		list_for_each_entry(stp, &sop->so_stateids, +				st_perstateowner) { +			lo = lockowner(sop); +			if (check_for_locks(stp->st_file, lo)) +				goto out; +			list_add(&lo->lo_list, &matches);  		}  	}  	/* Clients probably won't expect us to return with some (but not all) @@ -4394,16 +4433,127 @@ nfs4_check_open_reclaim(clientid_t *clid)  	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;  } +#ifdef CONFIG_NFSD_FAULT_INJECTION + +void nfsd_forget_clients(u64 num) +{ +	struct nfs4_client *clp, *next; +	int count = 0; + +	nfs4_lock_state(); +	list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { +		nfsd4_remove_clid_dir(clp); +		expire_client(clp); +		if (++count == num) +			break; +	} +	nfs4_unlock_state(); + +	printk(KERN_INFO "NFSD: Forgot %d clients", count); +} + +static void release_lockowner_sop(struct nfs4_stateowner *sop) +{ +	release_lockowner(lockowner(sop)); +} + +static void release_openowner_sop(struct nfs4_stateowner *sop) +{ +	release_openowner(openowner(sop)); +} + +static int nfsd_release_n_owners(u64 num, bool is_open_owner, +				void (*release_sop)(struct nfs4_stateowner *)) +{ +	int i, count = 0; +	struct nfs4_stateowner *sop, *next; + +	for (i = 0; i < OWNER_HASH_SIZE; i++) { +		list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) { +			if (sop->so_is_open_owner != is_open_owner) +				continue; +			release_sop(sop); +			if (++count == num) +				return count; +		} +	} +	return count; +} + +void nfsd_forget_locks(u64 num) +{ +	int count; + +	nfs4_lock_state(); +	count = nfsd_release_n_owners(num, false, release_lockowner_sop); +	nfs4_unlock_state(); + +	printk(KERN_INFO "NFSD: Forgot %d locks", count); +} + +void nfsd_forget_openowners(u64 num) +{ +	int count; + +	nfs4_lock_state(); +	count = nfsd_release_n_owners(num, true, release_openowner_sop); +	nfs4_unlock_state(); + +	printk(KERN_INFO "NFSD: Forgot %d open owners", count); +} + +int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) +{ +	int i, count = 0; +	struct nfs4_file *fp, *fnext; +	struct nfs4_delegation *dp, *dnext; + +	for (i = 0; i < FILE_HASH_SIZE; i++) { +		list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { +			list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { +				deleg_func(dp); +				if (++count == num) +					return count; +			} +		} +	} + +	return count; +} + +void nfsd_forget_delegations(u64 num) +{ +	unsigned int count; + +	nfs4_lock_state(); +	count = nfsd_process_n_delegations(num, unhash_delegation); +	nfs4_unlock_state(); + +	printk(KERN_INFO "NFSD: Forgot %d delegations", count); +} + +void nfsd_recall_delegations(u64 num) +{ +	unsigned int count; + +	nfs4_lock_state(); +	spin_lock(&recall_lock); +	count = nfsd_process_n_delegations(num, nfsd_break_one_deleg); +	spin_unlock(&recall_lock); +	nfs4_unlock_state(); + +	printk(KERN_INFO "NFSD: Recalled %d delegations", count); +} + +#endif /* CONFIG_NFSD_FAULT_INJECTION */ +  /* initialization to perform at module load time: */ -int +void  nfs4_state_init(void)  { -	int i, status; +	int i; -	status = nfsd4_init_slabs(); -	if (status) -		return status;  	for (i = 0; i < CLIENT_HASH_SIZE; i++) {  		INIT_LIST_HEAD(&conf_id_hashtbl[i]);  		INIT_LIST_HEAD(&conf_str_hashtbl[i]); @@ -4416,18 +4566,15 @@ nfs4_state_init(void)  	for (i = 0; i < FILE_HASH_SIZE; i++) {  		INIT_LIST_HEAD(&file_hashtbl[i]);  	} -	for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) { -		INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]); -	} -	for (i = 0; i < LOCK_HASH_SIZE; i++) { -		INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); +	for (i = 0; i < OWNER_HASH_SIZE; i++) { +		INIT_LIST_HEAD(&ownerstr_hashtbl[i]);  	} -	memset(&onestateid, ~0, sizeof(stateid_t)); +	for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) +		INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);  	INIT_LIST_HEAD(&close_lru);  	INIT_LIST_HEAD(&client_lru);  	INIT_LIST_HEAD(&del_recall_lru);  	reclaim_str_hashtbl_size = 0; -	return 0;  }  static void @@ -4526,7 +4673,6 @@ __nfs4_state_shutdown(void)  	spin_unlock(&recall_lock);  	list_for_each_safe(pos, next, &reaplist) {  		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); -		list_del_init(&dp->dl_recall_lru);  		unhash_delegation(dp);  	} diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b6fa792d6b85..0ec5a1b9700e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp,  static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)  {  	if (p == argp->tmp) { -		p = kmalloc(nbytes, GFP_KERNEL); +		p = kmemdup(argp->tmp, nbytes, GFP_KERNEL);  		if (!p)  			return NULL; -		memcpy(p, argp->tmp, nbytes);  	} else {  		BUG_ON(p != argp->tmpp);  		argp->tmpp = NULL; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index bb4a11d58a5a..748eda93ce59 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -18,6 +18,7 @@  #include "idmap.h"  #include "nfsd.h"  #include "cache.h" +#include "fault_inject.h"  /*   *	We have a single directory with several nodes in it. @@ -1128,9 +1129,13 @@ static int __init init_nfsd(void)  	int retval;  	printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); -	retval = nfs4_state_init(); /* nfs4 locking state */ +	retval = nfsd4_init_slabs();  	if (retval)  		return retval; +	nfs4_state_init(); +	retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ +	if (retval) +		goto out_free_slabs;  	nfsd_stat_init();	/* Statistics */  	retval = nfsd_reply_cache_init();  	if (retval) @@ -1161,6 +1166,8 @@ out_free_cache:  	nfsd_reply_cache_shutdown();  out_free_stat:  	nfsd_stat_shutdown(); +	nfsd_fault_inject_cleanup(); +out_free_slabs:  	nfsd4_free_slabs();  	return retval;  } @@ -1175,6 +1182,7 @@ static void __exit exit_nfsd(void)  	nfsd_lockd_shutdown();  	nfsd_idmap_shutdown();  	nfsd4_free_slabs(); +	nfsd_fault_inject_cleanup();  	unregister_filesystem(&nfsd_fs_type);  } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 58134a23fdfb..1d1e8589b4ce 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq)   */  #ifdef CONFIG_NFSD_V4  extern unsigned int max_delegations; -int nfs4_state_init(void); +void nfs4_state_init(void); +int nfsd4_init_slabs(void);  void nfsd4_free_slabs(void);  int nfs4_state_start(void);  void nfs4_state_shutdown(void);  void nfs4_reset_lease(time_t leasetime);  int nfs4_reset_recoverydir(char *recdir);  #else -static inline int nfs4_state_init(void) { return 0; } +static inline void nfs4_state_init(void) { } +static inline int nfsd4_init_slabs(void) { return 0; }  static inline void nfsd4_free_slabs(void) { }  static inline int nfs4_state_start(void) { return 0; }  static inline void nfs4_state_shutdown(void) { } @@ -338,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)  }  /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ -#define NFSD_WRITEONLY_ATTRS_WORD1							    \ -(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEONLY_ATTRS_WORD1 \ +	(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)  /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ -#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \ -(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         ) -#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \ -(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#define NFSD_WRITEABLE_ATTRS_WORD0 \ +	(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) +#define NFSD_WRITEABLE_ATTRS_WORD1 \ +	(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ +	| FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)  #define NFSD_WRITEABLE_ATTRS_WORD2 0  #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index a3cf38476a1b..ffb5df1db94f 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -366,6 +366,7 @@ struct nfs4_openowner {  struct nfs4_lockowner {  	struct nfs4_stateowner	lo_owner; /* must be first element */ +	struct list_head	lo_owner_ino_hash; /* hash by owner,file */  	struct list_head        lo_perstateid; /* for lockowners only */  	struct list_head	lo_list; /* for temporary uses */  }; @@ -482,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void);  extern int nfs4_client_to_reclaim(const char *name);  extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);  extern void nfsd4_recdir_purge_old(void); -extern int nfsd4_create_clid_dir(struct nfs4_client *clp); +extern void nfsd4_create_clid_dir(struct nfs4_client *clp);  extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);  extern void release_session_client(struct nfsd4_session *);  extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d25a723b68ad..edf6d3ed8777 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac  	return error;  } -#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction." -#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type" +/* + * NFS junction information is stored in an extended attribute. + */ +#define NFSD_JUNCTION_XATTR_NAME	XATTR_TRUSTED_PREFIX "junction.nfs" + +/** + * nfsd4_is_junction - Test if an object could be an NFS junction + * + * @dentry: object to test + * + * Returns 1 if "dentry" appears to contain NFS junction information. + * Otherwise 0 is returned. + */  int nfsd4_is_junction(struct dentry *dentry)  {  	struct inode *inode = dentry->d_inode; @@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry)  		return 0;  	if (!(inode->i_mode & S_ISVTX))  		return 0; -	if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0) +	if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)  		return 0;  	return 1;  } diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 8620f79658d4..dfa900948af7 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -109,7 +109,7 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u  int	svc_reg_xprt_class(struct svc_xprt_class *);  void	svc_unreg_xprt_class(struct svc_xprt_class *); -void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, +void	svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,  		      struct svc_serv *);  int	svc_create_xprt(struct svc_serv *, const char *, struct net *,  			const int, const unsigned short, int); @@ -118,7 +118,6 @@ void	svc_xprt_received(struct svc_xprt *);  void	svc_xprt_put(struct svc_xprt *xprt);  void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);  void	svc_close_xprt(struct svc_xprt *xprt); -void	svc_delete_xprt(struct svc_xprt *xprt);  int	svc_port_is_privileged(struct sockaddr *sin);  int	svc_print_xprts(char *buf, int maxlen);  struct	svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 85c50b40759d..c84e9741cb2a 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -34,7 +34,7 @@ struct svc_sock {  /*   * Function prototypes.   */ -void		svc_close_all(struct list_head *); +void		svc_close_all(struct svc_serv *);  int		svc_recv(struct svc_rqst *, long);  int		svc_send(struct svc_rqst *);  void		svc_drop(struct svc_rqst *); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 03b56bc3b659..465df9ae1046 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1641,6 +1641,7 @@ int cache_register_net(struct cache_detail *cd, struct net *net)  		sunrpc_destroy_cache_detail(cd);  	return ret;  } +EXPORT_SYMBOL_GPL(cache_register_net);  int cache_register(struct cache_detail *cd)  { @@ -1653,6 +1654,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)  	remove_cache_proc_entries(cd, net);  	sunrpc_destroy_cache_detail(cd);  } +EXPORT_SYMBOL_GPL(cache_unregister_net);  void cache_unregister(struct cache_detail *cd)  { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9d01d46b05f3..e4aabc02368b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)  fail_free:  	kfree(m->to_pool); +	m->to_pool = NULL;  fail:  	return -ENOMEM;  } @@ -285,9 +286,10 @@ svc_pool_map_put(void)  	mutex_lock(&svc_pool_map_mutex);  	if (!--m->count) { -		m->mode = SVC_POOL_DEFAULT;  		kfree(m->to_pool); +		m->to_pool = NULL;  		kfree(m->pool_to); +		m->pool_to = NULL;  		m->npools = 0;  	} @@ -527,17 +529,20 @@ svc_destroy(struct svc_serv *serv)  		printk("svc_destroy: no threads for serv=%p!\n", serv);  	del_timer_sync(&serv->sv_temptimer); - -	svc_close_all(&serv->sv_tempsocks); +	/* +	 * The set of xprts (contained in the sv_tempsocks and +	 * sv_permsocks lists) is now constant, since it is modified +	 * only by accepting new sockets (done by service threads in +	 * svc_recv) or aging old ones (done by sv_temptimer), or +	 * configuration changes (excluded by whatever locking the +	 * caller is using--nfsd_mutex in the case of nfsd).  So it's +	 * safe to traverse those lists and shut everything down: +	 */ +	svc_close_all(serv);  	if (serv->sv_shutdown)  		serv->sv_shutdown(serv); -	svc_close_all(&serv->sv_permsocks); - -	BUG_ON(!list_empty(&serv->sv_permsocks)); -	BUG_ON(!list_empty(&serv->sv_tempsocks)); -  	cache_clean_deferred(serv);  	if (svc_serv_is_pooled(serv)) @@ -683,8 +688,8 @@ found_pool:   * Create or destroy enough new threads to make the number   * of threads the given number.  If `pool' is non-NULL, applies   * only to threads in that pool, otherwise round-robins between - * all pools.  Must be called with a svc_get() reference and - * the BKL or another lock to protect access to svc_serv fields. + * all pools.  Caller must ensure that mutual exclusion between this and + * server startup or shutdown.   *   * Destroying threads relies on the service threads filling in   * rqstp->rq_task, which only the nfs ones do.  Assumes the serv diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 38649cfa4e81..74cb0d8e9ca1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -22,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);  static int svc_deferred_recv(struct svc_rqst *rqstp);  static struct cache_deferred_req *svc_defer(struct cache_req *req);  static void svc_age_temp_xprts(unsigned long closure); +static void svc_delete_xprt(struct svc_xprt *xprt);  /* apparently the "standard" is that clients close   * idle connections after 5 minutes, servers after @@ -147,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put);   * Called by transport drivers to initialize the transport independent   * portion of the transport instance.   */ -void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, -		   struct svc_serv *serv) +void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, +		   struct svc_xprt *xprt, struct svc_serv *serv)  {  	memset(xprt, 0, sizeof(*xprt));  	xprt->xpt_class = xcl; @@ -163,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,  	spin_lock_init(&xprt->xpt_lock);  	set_bit(XPT_BUSY, &xprt->xpt_flags);  	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); -	xprt->xpt_net = get_net(&init_net); +	xprt->xpt_net = get_net(net);  }  EXPORT_SYMBOL_GPL(svc_xprt_init); @@ -878,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt)  /*   * Remove a dead transport   */ -void svc_delete_xprt(struct svc_xprt *xprt) +static void svc_delete_xprt(struct svc_xprt *xprt)  {  	struct svc_serv	*serv = xprt->xpt_server;  	struct svc_deferred_req *dr; @@ -893,14 +894,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)  	spin_lock_bh(&serv->sv_lock);  	if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))  		list_del_init(&xprt->xpt_list); -	/* -	 * The only time we're called while xpt_ready is still on a list -	 * is while the list itself is about to be destroyed (in -	 * svc_destroy).  BUT svc_xprt_enqueue could still be attempting -	 * to add new entries to the sp_sockets list, so we can't leave -	 * a freed xprt on it. -	 */ -	list_del_init(&xprt->xpt_ready); +	BUG_ON(!list_empty(&xprt->xpt_ready));  	if (test_bit(XPT_TEMP, &xprt->xpt_flags))  		serv->sv_tmpcnt--;  	spin_unlock_bh(&serv->sv_lock); @@ -928,22 +922,48 @@ void svc_close_xprt(struct svc_xprt *xprt)  }  EXPORT_SYMBOL_GPL(svc_close_xprt); -void svc_close_all(struct list_head *xprt_list) +static void svc_close_list(struct list_head *xprt_list) +{ +	struct svc_xprt *xprt; + +	list_for_each_entry(xprt, xprt_list, xpt_list) { +		set_bit(XPT_CLOSE, &xprt->xpt_flags); +		set_bit(XPT_BUSY, &xprt->xpt_flags); +	} +} + +void svc_close_all(struct svc_serv *serv)  { +	struct svc_pool *pool;  	struct svc_xprt *xprt;  	struct svc_xprt *tmp; +	int i; + +	svc_close_list(&serv->sv_tempsocks); +	svc_close_list(&serv->sv_permsocks); +	for (i = 0; i < serv->sv_nrpools; i++) { +		pool = &serv->sv_pools[i]; + +		spin_lock_bh(&pool->sp_lock); +		while (!list_empty(&pool->sp_sockets)) { +			xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); +			list_del_init(&xprt->xpt_ready); +		} +		spin_unlock_bh(&pool->sp_lock); +	}  	/* -	 * The server is shutting down, and no more threads are running. -	 * svc_xprt_enqueue() might still be running, but at worst it -	 * will re-add the xprt to sp_sockets, which will soon get -	 * freed.  So we don't bother with any more locking, and don't -	 * leave the close to the (nonexistent) server threads: +	 * At this point the sp_sockets lists will stay empty, since +	 * svc_enqueue will not add new entries without taking the +	 * sp_lock and checking XPT_BUSY.  	 */ -	list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { -		set_bit(XPT_CLOSE, &xprt->xpt_flags); +	list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list)  		svc_delete_xprt(xprt); -	} +	list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list) +		svc_delete_xprt(xprt); + +	BUG_ON(!list_empty(&serv->sv_permsocks)); +	BUG_ON(!list_empty(&serv->sv_tempsocks));  }  /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 4653286fcc9e..464570906f80 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -739,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)  {  	int err, level, optname, one = 1; -	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); +	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, +		      &svsk->sk_xprt, serv);  	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);  	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;  	svsk->sk_sk->sk_write_space = svc_write_space; @@ -1343,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)  {  	struct sock	*sk = svsk->sk_sk; -	svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); +	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class, +		      &svsk->sk_xprt, serv);  	set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);  	if (sk->sk_state == TCP_LISTEN) {  		dprintk("setting up TCP socket for listening\n"); @@ -1659,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,  		return ERR_PTR(-ENOMEM);  	xprt = &svsk->sk_xprt; -	svc_xprt_init(&svc_tcp_bc_class, xprt, serv); +	svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);  	serv->sv_bc_xprt = xprt; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ba1296d88de0..894cb42db91d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -453,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,  	if (!cma_xprt)  		return NULL; -	svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv); +	svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);  	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);  	INIT_LIST_HEAD(&cma_xprt->sc_dto_q);  	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh new file mode 100755 index 000000000000..06a399ac8b2f --- /dev/null +++ b/tools/nfsd/inject_fault.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> +# +# Script for easier NFSD fault injection + +# Check that debugfs has been mounted +DEBUGFS=`cat /proc/mounts | grep debugfs` +if [ "$DEBUGFS" == "" ]; then +	echo "debugfs does not appear to be mounted!" +	echo "Please mount debugfs and try again" +	exit 1 +fi + +# Check that the fault injection directory exists +DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd +if [ ! -d "$DEBUGDIR" ]; then +	echo "$DEBUGDIR does not exist" +	echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION" +	exit 1 +fi + +function help() +{ +	echo "Usage $0 injection_type [count]" +	echo "" +	echo "Injection types are:" +	ls $DEBUGDIR +	exit 1 +} + +if [ $# == 0 ]; then +	help +elif [ ! -f $DEBUGDIR/$1 ]; then +	help +elif [ $# != 2 ]; then +	COUNT=0 +else +	COUNT=$2 +fi + +BEFORE=`mktemp` +AFTER=`mktemp` +dmesg > $BEFORE +echo $COUNT > $DEBUGDIR/$1 +dmesg > $AFTER +# Capture lines that only exist in the $AFTER file +diff $BEFORE $AFTER | grep ">" +rm -f $BEFORE $AFTER  | 
