41 files changed, 875 insertions, 683 deletions
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index a537368ba0db..fd9f28b8a933 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -332,11 +332,18 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac)
  */
 int afs_end_cursor(struct afs_addr_cursor *ac)
 {
-	if (ac->responded && ac->index != ac->start)
-		WRITE_ONCE(ac->alist->index, ac->index);
+	struct afs_addr_list *alist;
+
+	alist = ac->alist;
+	if (alist) {
+		if (ac->responded && ac->index != ac->start)
+			WRITE_ONCE(alist->index, ac->index);
+		afs_put_addrlist(alist);
+	}
 
-	afs_put_addrlist(ac->alist);
+	ac->addr = NULL;
 	ac->alist = NULL;
+	ac->begun = false;
 	return ac->error;
 }
 
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 23c7f395d718..ba2b458b36d1 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -17,10 +17,13 @@
 #include <linux/pagemap.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
+#include <linux/dns_resolver.h>
 #include "internal.h"
 
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 				 unsigned int flags);
+static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
+					 unsigned int flags);
 static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, struct dir_context *ctx);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
@@ -64,6 +67,17 @@ const struct inode_operations afs_dir_inode_operations = {
 	.listxattr	= afs_listxattr,
 };
 
+const struct file_operations afs_dynroot_file_operations = {
+	.open		= dcache_dir_open,
+	.release	= dcache_dir_close,
+	.iterate_shared	= dcache_readdir,
+	.llseek		= dcache_dir_lseek,
+};
+
+const struct inode_operations afs_dynroot_inode_operations = {
+	.lookup		= afs_dynroot_lookup,
+};
+
 const struct dentry_operations afs_fs_dentry_operations = {
 	.d_revalidate	= afs_d_revalidate,
 	.d_delete	= afs_d_delete,
@@ -468,25 +482,58 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
 }
 
 /*
+ * Probe to see if a cell may exist.  This prevents positive dentries from
+ * being created unnecessarily.
+ */
+static int afs_probe_cell_name(struct dentry *dentry)
+{
+	struct afs_cell *cell;
+	const char *name = dentry->d_name.name;
+	size_t len = dentry->d_name.len;
+	int ret;
+
+	/* Names prefixed with a dot are R/W mounts. */
+	if (name[0] == '.') {
+		if (len == 1)
+			return -EINVAL;
+		name++;
+		len--;
+	}
+
+	cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len);
+	if (!IS_ERR(cell)) {
+		afs_put_cell(afs_d2net(dentry), cell);
+		return 0;
+	}
+
+	ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL);
+	if (ret == -ENODATA)
+		ret = -EDESTADDRREQ;
+	return ret;
+}
+
+/*
  * Try to auto mount the mountpoint with pseudo directory, if the autocell
  * operation is setted.
  */
-static struct inode *afs_try_auto_mntpt(
-	int ret, struct dentry *dentry, struct inode *dir, struct key *key,
-	struct afs_fid *fid)
+static struct inode *afs_try_auto_mntpt(struct dentry *dentry,
+					struct inode *dir, struct afs_fid *fid)
 {
-	const char *devname = dentry->d_name.name;
 	struct afs_vnode *vnode = AFS_FS_I(dir);
 	struct inode *inode;
+	int ret = -ENOENT;
 
-	_enter("%d, %p{%pd}, {%x:%u}, %p",
-	       ret, dentry, dentry, vnode->fid.vid, vnode->fid.vnode, key);
+	_enter("%p{%pd}, {%x:%u}",
+	       dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
+
+	if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
+		goto out;
 
-	if (ret != -ENOENT ||
-	    !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
+	ret = afs_probe_cell_name(dentry);
+	if (ret < 0)
 		goto out;
 
-	inode = afs_iget_autocell(dir, devname, strlen(devname), key);
+	inode = afs_iget_pseudo_dir(dir->i_sb, false);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
 		goto out;
@@ -545,13 +592,16 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 
 	ret = afs_do_lookup(dir, dentry, &fid, key);
 	if (ret < 0) {
-		inode = afs_try_auto_mntpt(ret, dentry, dir, key, &fid);
-		if (!IS_ERR(inode)) {
-			key_put(key);
-			goto success;
+		if (ret == -ENOENT) {
+			inode = afs_try_auto_mntpt(dentry, dir, &fid);
+			if (!IS_ERR(inode)) {
+				key_put(key);
+				goto success;
+			}
+
+			ret = PTR_ERR(inode);
 		}
 
-		ret = PTR_ERR(inode);
 		key_put(key);
 		if (ret == -ENOENT) {
 			d_add(dentry, NULL);
@@ -583,12 +633,53 @@ success:
 }
 
 /*
+ * Look up an entry in a dynroot directory.
+ */
+static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
+					 unsigned int flags)
+{
+	struct afs_vnode *vnode;
+	struct afs_fid fid;
+	struct inode *inode;
+	int ret;
+
+	vnode = AFS_FS_I(dir);
+
+	_enter("%pd", dentry);
+
+	ASSERTCMP(d_inode(dentry), ==, NULL);
+
+	if (dentry->d_name.len >= AFSNAMEMAX) {
+		_leave(" = -ENAMETOOLONG");
+		return ERR_PTR(-ENAMETOOLONG);
+	}
+
+	inode = afs_try_auto_mntpt(dentry, dir, &fid);
+	if (IS_ERR(inode)) {
+		ret = PTR_ERR(inode);
+		if (ret == -ENOENT) {
+			d_add(dentry, NULL);
+			_leave(" = NULL [negative]");
+			return NULL;
+		}
+		_leave(" = %d [do]", ret);
+		return ERR_PTR(ret);
+	}
+
+	d_add(dentry, inode);
+	_leave(" = 0 { ino=%lu v=%u }",
+	       d_inode(dentry)->i_ino, d_inode(dentry)->i_generation);
+	return NULL;
+}
+
+/*
  * check that a dentry lookup hit has found a valid entry
  * - NOTE! the hit can be a negative hit too, so we can't assume we have an
  *   inode
  */
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
+	struct afs_super_info *as = dentry->d_sb->s_fs_info;
 	struct afs_vnode *vnode, *dir;
 	struct afs_fid uninitialized_var(fid);
 	struct dentry *parent;
@@ -600,6 +691,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
+	if (as->dyn_root)
+		return 1;
+
 	if (d_really_is_positive(dentry)) {
 		vnode = AFS_FS_I(d_inode(dentry));
 		_enter("{v={%x:%u} n=%pd fl=%lx},",
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index c7f17c44c7ce..6b39d0255b72 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -147,7 +147,7 @@ int afs_iget5_test(struct inode *inode, void *opaque)
  *
  * These pseudo inodes don't match anything.
  */
-static int afs_iget5_autocell_test(struct inode *inode, void *opaque)
+static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque)
 {
 	return 0;
 }
@@ -169,31 +169,34 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
 }
 
 /*
- * inode retrieval for autocell
+ * Create an inode for a dynamic root directory or an autocell dynamic
+ * automount dir.
  */
-struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
-				int namesz, struct key *key)
+struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
 {
 	struct afs_iget_data data;
 	struct afs_super_info *as;
 	struct afs_vnode *vnode;
-	struct super_block *sb;
 	struct inode *inode;
 	static atomic_t afs_autocell_ino;
 
-	_enter("{%x:%u},%*.*s,",
-	       AFS_FS_I(dir)->fid.vid, AFS_FS_I(dir)->fid.vnode,
-	       namesz, namesz, dev_name ?: "");
+	_enter("");
 
-	sb = dir->i_sb;
 	as = sb->s_fs_info;
-	data.volume = as->volume;
-	data.fid.vid = as->volume->vid;
-	data.fid.unique = 0;
-	data.fid.vnode = 0;
+	if (as->volume) {
+		data.volume = as->volume;
+		data.fid.vid = as->volume->vid;
+	}
+	if (root) {
+		data.fid.vnode = 1;
+		data.fid.unique = 1;
+	} else {
+		data.fid.vnode = atomic_inc_return(&afs_autocell_ino);
+		data.fid.unique = 0;
+	}
 
-	inode = iget5_locked(sb, atomic_inc_return(&afs_autocell_ino),
-			     afs_iget5_autocell_test, afs_iget5_set,
+	inode = iget5_locked(sb, data.fid.vnode,
+			     afs_iget5_pseudo_dir_test, afs_iget5_set,
 			     &data);
 	if (!inode) {
 		_leave(" = -ENOMEM");
@@ -211,7 +214,12 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
 
 	inode->i_size		= 0;
 	inode->i_mode		= S_IFDIR | S_IRUGO | S_IXUGO;
-	inode->i_op		= &afs_autocell_inode_operations;
+	if (root) {
+		inode->i_op	= &afs_dynroot_inode_operations;
+		inode->i_fop	= &afs_dynroot_file_operations;
+	} else {
+		inode->i_op	= &afs_autocell_inode_operations;
+	}
 	set_nlink(inode, 2);
 	inode->i_uid		= GLOBAL_ROOT_UID;
 	inode->i_gid		= GLOBAL_ROOT_GID;
@@ -223,8 +231,12 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
 	inode->i_generation	= 0;
 
 	set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
-	set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-	inode->i_flags |= S_AUTOMOUNT | S_NOATIME;
+	if (!root) {
+		set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+		inode->i_flags |= S_AUTOMOUNT;
+	}
+
+	inode->i_flags |= S_NOATIME;
 	unlock_new_inode(inode);
 	_leave(" = %p", inode);
 	return inode;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 804d1f905622..f38d6a561a84 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -36,6 +36,7 @@ struct afs_mount_params {
 	bool			rwpath;		/* T if the parent should be considered R/W */
 	bool			force;		/* T to force cell type */
 	bool			autocell;	/* T if set auto mount operation */
+	bool			dyn_root;	/* T if dynamic root */
 	afs_voltype_t		type;		/* type of volume requested */
 	int			volnamesz;	/* size of volume name */
 	const char		*volname;	/* name of volume to mount */
@@ -186,6 +187,7 @@ struct afs_super_info {
 	struct afs_net		*net;		/* Network namespace */
 	struct afs_cell		*cell;		/* The cell in which the volume resides */
 	struct afs_volume	*volume;	/* volume record */
+	bool			dyn_root;	/* True if dynamic root */
 };
 
 static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
@@ -634,10 +636,13 @@ extern bool afs_cm_incoming_call(struct afs_call *);
 /*
  * dir.c
  */
-extern bool afs_dir_check_page(struct inode *, struct page *);
+extern const struct file_operations afs_dir_file_operations;
 extern const struct inode_operations afs_dir_inode_operations;
+extern const struct file_operations afs_dynroot_file_operations;
+extern const struct inode_operations afs_dynroot_inode_operations;
 extern const struct dentry_operations afs_fs_dentry_operations;
-extern const struct file_operations afs_dir_file_operations;
+
+extern bool afs_dir_check_page(struct inode *, struct page *);
 
 /*
  * file.c
@@ -695,8 +700,7 @@ extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
  */
 extern int afs_fetch_status(struct afs_vnode *, struct key *);
 extern int afs_iget5_test(struct inode *, void *);
-extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
-				       struct key *);
+extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
 extern struct inode *afs_iget(struct super_block *, struct key *,
 			      struct afs_fid *, struct afs_file_status *,
 			      struct afs_callback *,
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 690fea9d84c3..99fd13500a97 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -72,7 +72,7 @@ static int afs_mntpt_open(struct inode *inode, struct file *file)
  */
 static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 {
-	struct afs_super_info *super;
+	struct afs_super_info *as;
 	struct vfsmount *mnt;
 	struct afs_vnode *vnode;
 	struct page *page;
@@ -104,13 +104,13 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 			goto error_no_page;
 
 		if (mntpt->d_name.name[0] == '.') {
-			devname[0] = '#';
-			memcpy(devname + 1, mntpt->d_name.name, size - 1);
+			devname[0] = '%';
+			memcpy(devname + 1, mntpt->d_name.name + 1, size - 1);
 			memcpy(devname + size, afs_root_cell,
 			       sizeof(afs_root_cell));
 			rwpath = true;
 		} else {
-			devname[0] = '%';
+			devname[0] = '#';
 			memcpy(devname + 1, mntpt->d_name.name, size);
 			memcpy(devname + size + 1, afs_root_cell,
 			       sizeof(afs_root_cell));
@@ -142,11 +142,13 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	}
 
 	/* work out what options we want */
-	super = AFS_FS_S(mntpt->d_sb);
-	memcpy(options, "cell=", 5);
-	strcpy(options + 5, super->volume->cell->name);
-	if (super->volume->type == AFSVL_RWVOL || rwpath)
-		strcat(options, ",rwpath");
+	as = AFS_FS_S(mntpt->d_sb);
+	if (as->cell) {
+		memcpy(options, "cell=", 5);
+		strcpy(options + 5, as->cell->name);
+		if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath)
+			strcat(options, ",rwpath");
+	}
 
 	/* try and do the mount */
 	_debug("--- attempting mount %s -o %s ---", devname, options);
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index d04511fb3879..ad1328d85526 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -330,26 +330,6 @@ start:
 
 	if (!afs_start_fs_iteration(fc, vnode))
 		goto failed;
-	goto use_server;
-
-next_server:
-	_debug("next");
-	afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
-	fc->cbi = NULL;
-	fc->index++;
-	if (fc->index >= fc->server_list->nr_servers)
-		fc->index = 0;
-	if (fc->index != fc->start)
-		goto use_server;
-
-	/* That's all the servers poked to no good effect.  Try again if some
-	 * of them were busy.
-	 */
-	if (fc->flags & AFS_FS_CURSOR_VBUSY)
-		goto restart_from_beginning;
-
-	fc->ac.error = -EDESTADDRREQ;
-	goto failed;
 
 use_server:
 	_debug("use");
@@ -383,6 +363,7 @@ use_server:
 	afs_get_addrlist(alist);
 	read_unlock(&server->fs_lock);
 
+	memset(&fc->ac, 0, sizeof(fc->ac));
 
 	/* Probe the current fileserver if we haven't done so yet. */
 	if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
@@ -397,12 +378,8 @@ use_server:
 	else
 		afs_put_addrlist(alist);
 
-	fc->ac.addr  = NULL;
 	fc->ac.start = READ_ONCE(alist->index);
 	fc->ac.index = fc->ac.start;
-	fc->ac.error = 0;
-	fc->ac.begun = false;
-	goto iterate_address;
 
 iterate_address:
 	ASSERT(fc->ac.alist);
@@ -410,16 +387,35 @@ iterate_address:
 	/* Iterate over the current server's address list to try and find an
 	 * address on which it will respond to us.
 	 */
-	if (afs_iterate_addresses(&fc->ac)) {
-		_leave(" = t");
-		return true;
-	}
+	if (!afs_iterate_addresses(&fc->ac))
+		goto next_server;
+
+	_leave(" = t");
+	return true;
 
+next_server:
+	_debug("next");
 	afs_end_cursor(&fc->ac);
-	goto next_server;
+	afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+	fc->cbi = NULL;
+	fc->index++;
+	if (fc->index >= fc->server_list->nr_servers)
+		fc->index = 0;
+	if (fc->index != fc->start)
+		goto use_server;
+
+	/* That's all the servers poked to no good effect.  Try again if some
+	 * of them were busy.
+	 */
+	if (fc->flags & AFS_FS_CURSOR_VBUSY)
+		goto restart_from_beginning;
+
+	fc->ac.error = -EDESTADDRREQ;
+	goto failed;
 
 failed:
 	fc->flags |= AFS_FS_CURSOR_STOP;
+	afs_end_cursor(&fc->ac);
 	_leave(" = f [failed %d]", fc->ac.error);
 	return false;
 }
@@ -458,12 +454,10 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 			return false;
 		}
 
+		memset(&fc->ac, 0, sizeof(fc->ac));
 		fc->ac.alist = alist;
-		fc->ac.addr  = NULL;
 		fc->ac.start = READ_ONCE(alist->index);
 		fc->ac.index = fc->ac.start;
-		fc->ac.error = 0;
-		fc->ac.begun = false;
 		goto iterate_address;
 
 	case 0:
@@ -520,238 +514,3 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
 
 	return fc->ac.error;
 }
-
-#if 0
-/*
- * Set a filesystem server cursor for using a specific FS server.
- */
-int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-	afs_init_fs_cursor(fc, vnode);
-
-	read_seqlock_excl(&vnode->cb_lock);
-	if (vnode->cb_interest) {
-		if (vnode->cb_interest->server->fs_state == 0)
-			fc->server = afs_get_server(vnode->cb_interest->server);
-		else
-			fc->ac.error = vnode->cb_interest->server->fs_state;
-	} else {
-		fc->ac.error = -ESTALE;
-	}
-	read_sequnlock_excl(&vnode->cb_lock);
-
-	return fc->ac.error;
-}
-
-/*
- * pick a server to use to try accessing this volume
- * - returns with an elevated usage count on the server chosen
- */
-bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-	struct afs_volume *volume = vnode->volume;
-	struct afs_server *server;
-	int ret, state, loop;
-
-	_enter("%s", volume->vlocation->vldb.name);
-
-	/* stick with the server we're already using if we can */
-	if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
-		fc->server = afs_get_server(vnode->cb_interest->server);
-		goto set_server;
-	}
-
-	down_read(&volume->server_sem);
-
-	/* handle the no-server case */
-	if (volume->nservers == 0) {
-		fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE;
-		up_read(&volume->server_sem);
-		_leave(" = f [no servers %d]", fc->ac.error);
-		return false;
-	}
-
-	/* basically, just search the list for the first live server and use
-	 * that */
-	ret = 0;
-	for (loop = 0; loop < volume->nservers; loop++) {
-		server = volume->servers[loop];
-		state = server->fs_state;
-
-		_debug("consider %d [%d]", loop, state);
-
-		switch (state) {
-		case 0:
-			goto picked_server;
-
-		case -ENETUNREACH:
-			if (ret == 0)
-				ret = state;
-			break;
-
-		case -EHOSTUNREACH:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH)
-				ret = state;
-			break;
-
-		case -ECONNREFUSED:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH ||
-			    ret == -EHOSTUNREACH)
-				ret = state;
-			break;
-
-		default:
-		case -EREMOTEIO:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH ||
-			    ret == -EHOSTUNREACH ||
-			    ret == -ECONNREFUSED)
-				ret = state;
-			break;
-		}
-	}
-
-error:
-	fc->ac.error = ret;
-
-	/* no available servers
-	 * - TODO: handle the no active servers case better
-	 */
-	up_read(&volume->server_sem);
-	_leave(" = f [%d]", fc->ac.error);
-	return false;
-
-picked_server:
-	/* Found an apparently healthy server.  We need to register an interest
-	 * in receiving callbacks before we talk to it.
-	 */
-	ret = afs_register_server_cb_interest(vnode,
-					      &volume->cb_interests[loop], server);
-	if (ret < 0)
-		goto error;
-
-	fc->server = afs_get_server(server);
-	up_read(&volume->server_sem);
-set_server:
-	fc->ac.alist = afs_get_addrlist(fc->server->addrs);
-	fc->ac.addr = &fc->ac.alist->addrs[0];
-	_debug("USING SERVER: %pIS\n", &fc->ac.addr->transport);
-	_leave(" = t (picked %pIS)", &fc->ac.addr->transport);
-	return true;
-}
-
-/*
- * release a server after use
- * - releases the ref on the server struct that was acquired by picking
- * - records result of using a particular server to access a volume
- * - return true to try again, false if okay or to issue error
- * - the caller must release the server struct if result was false
- */
-bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc,
-			   struct afs_vnode *vnode)
-{
-	struct afs_volume *volume = vnode->volume;
-	struct afs_server *server = fc->server;
-	unsigned loop;
-
-	_enter("%s,%pIS,%d",
-	       volume->vlocation->vldb.name, &fc->ac.addr->transport,
-	       fc->ac.error);
-
-	switch (fc->ac.error) {
-		/* success */
-	case 0:
-		server->fs_state = 0;
-		_leave(" = f");
-		return false;
-
-		/* the fileserver denied all knowledge of the volume */
-	case -ENOMEDIUM:
-		down_write(&volume->server_sem);
-
-		/* firstly, find where the server is in the active list (if it
-		 * is) */
-		for (loop = 0; loop < volume->nservers; loop++)
-			if (volume->servers[loop] == server)
-				goto present;
-
-		/* no longer there - may have been discarded by another op */
-		goto try_next_server_upw;
-
-	present:
-		volume->nservers--;
-		memmove(&volume->servers[loop],
-			&volume->servers[loop + 1],
-			sizeof(volume->servers[loop]) *
-			(volume->nservers - loop));
-		volume->servers[volume->nservers] = NULL;
-		afs_put_server(afs_v2net(vnode), server);
-		volume->rjservers++;
-
-		if (volume->nservers > 0)
-			/* another server might acknowledge its existence */
-			goto try_next_server_upw;
-
-		/* handle the case where all the fileservers have rejected the
-		 * volume
-		 * - TODO: try asking the fileservers for volume information
-		 * - TODO: contact the VL server again to see if the volume is
-		 *         no longer registered
-		 */
-		up_write(&volume->server_sem);
-		afs_put_server(afs_v2net(vnode), server);
-		fc->server = NULL;
-		_leave(" = f [completely rejected]");
-		return false;
-
-		/* problem reaching the server */
-	case -ENETUNREACH:
-	case -EHOSTUNREACH:
-	case -ECONNREFUSED:
-	case -ETIME:
-	case -ETIMEDOUT:
-	case -EREMOTEIO:
-		/* mark the server as dead
-		 * TODO: vary dead timeout depending on error
-		 */
-		spin_lock(&server->fs_lock);
-		if (!server->fs_state) {
-			server->fs_state = fc->ac.error;
-			printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error);
-		}
-		spin_unlock(&server->fs_lock);
-		goto try_next_server;
-
-		/* miscellaneous error */
-	default:
-	case -ENOMEM:
-	case -ENONET:
-		/* tell the caller to accept the result */
-		afs_put_server(afs_v2net(vnode), server);
-		fc->server = NULL;
-		_leave(" = f [local failure]");
-		return false;
-	}
-
-	/* tell the caller to loop around and try the next server */
-try_next_server_upw:
-	up_write(&volume->server_sem);
-try_next_server:
-	afs_put_server(afs_v2net(vnode), server);
-	_leave(" = t [try next server]");
-	return true;
-}
-
-/*
- * Clean up a fileserver cursor.
- */
-int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net)
-{
-	afs_end_cursor(&fc->ac);
-	afs_put_server(net, fc->server);
-	return fc->ac.error;
-}
-
-#endif
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 0ab3f8457839..0f8dc4c8f07c 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -58,7 +58,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
 		server = afs_lookup_server(cell, key, &vldb->fs_server[i]);
 		if (IS_ERR(server)) {
 			ret = PTR_ERR(server);
-			if (ret == -ENOENT)
+			if (ret == -ENOENT ||
+			    ret == -ENOMEDIUM)
 				continue;
 			goto error_2;
 		}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 1037dd41a622..3623c952b6ff 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -64,6 +64,7 @@ static atomic_t afs_count_active_inodes;
 enum {
 	afs_no_opt,
 	afs_opt_cell,
+	afs_opt_dyn,
 	afs_opt_rwpath,
 	afs_opt_vol,
 	afs_opt_autocell,
@@ -71,6 +72,7 @@ enum {
 
 static const match_table_t afs_options_list = {
 	{ afs_opt_cell,		"cell=%s"	},
+	{ afs_opt_dyn,		"dyn"		},
 	{ afs_opt_rwpath,	"rwpath"	},
 	{ afs_opt_vol,		"vol=%s"	},
 	{ afs_opt_autocell,	"autocell"	},
@@ -148,6 +150,11 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
 	const char *suf = "";
 	char pref = '%';
 
+	if (as->dyn_root) {
+		seq_puts(m, "none");
+		return 0;
+	}
+	
 	switch (volume->type) {
 	case AFSVL_RWVOL:
 		break;
@@ -171,8 +178,12 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
  */
 static int afs_show_options(struct seq_file *m, struct dentry *root)
 {
+	struct afs_super_info *as = AFS_FS_S(root->d_sb);
+
+	if (as->dyn_root)
+		seq_puts(m, ",dyn");
 	if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
-		seq_puts(m, "autocell");
+		seq_puts(m, ",autocell");
 	return 0;
 }
 
@@ -212,7 +223,7 @@ static int afs_parse_options(struct afs_mount_params *params,
 			break;
 
 		case afs_opt_rwpath:
-			params->rwpath = 1;
+			params->rwpath = true;
 			break;
 
 		case afs_opt_vol:
@@ -220,7 +231,11 @@ static int afs_parse_options(struct afs_mount_params *params,
 			break;
 
 		case afs_opt_autocell:
-			params->autocell = 1;
+			params->autocell = true;
+			break;
+
+		case afs_opt_dyn:
+			params->dyn_root = true;
 			break;
 
 		default:
@@ -254,7 +269,7 @@ static int afs_parse_device_name(struct afs_mount_params *params,
 	int cellnamesz;
 
 	_enter(",%s", name);
-
+	
 	if (!name) {
 		printk(KERN_ERR "kAFS: no volume name specified\n");
 		return -EINVAL;
@@ -336,7 +351,14 @@ static int afs_test_super(struct super_block *sb, void *data)
 	struct afs_super_info *as1 = data;
 	struct afs_super_info *as = AFS_FS_S(sb);
 
-	return as->net == as1->net && as->volume->vid == as1->volume->vid;
+	return (as->net == as1->net &&
+		as->volume &&
+		as->volume->vid == as1->volume->vid);
+}
+
+static int afs_dynroot_test_super(struct super_block *sb, void *data)
+{
+	return false;
 }
 
 static int afs_set_super(struct super_block *sb, void *data)
@@ -365,24 +387,30 @@ static int afs_fill_super(struct super_block *sb,
 	sb->s_blocksize_bits	= PAGE_SHIFT;
 	sb->s_magic		= AFS_FS_MAGIC;
 	sb->s_op		= &afs_super_ops;
-	sb->s_xattr		= afs_xattr_handlers;
+	if (!as->dyn_root)
+		sb->s_xattr	= afs_xattr_handlers;
 	ret = super_setup_bdi(sb);
 	if (ret)
 		return ret;
 	sb->s_bdi->ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
-	sprintf(sb->s_id, "%u", as->volume->vid);
-
-	afs_activate_volume(as->volume);
 
 	/* allocate the root inode and dentry */
-	fid.vid		= as->volume->vid;
-	fid.vnode	= 1;
-	fid.unique	= 1;
-	inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
+	if (as->dyn_root) {
+		inode = afs_iget_pseudo_dir(sb, true);
+		sb->s_flags	|= SB_RDONLY;
+	} else {
+		sprintf(sb->s_id, "%u", as->volume->vid);
+		afs_activate_volume(as->volume);
+		fid.vid		= as->volume->vid;
+		fid.vnode	= 1;
+		fid.unique	= 1;
+		inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
+	}
+
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	if (params->autocell)
+	if (params->autocell || params->dyn_root)
 		set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
 
 	ret = -ENOMEM;
@@ -407,7 +435,10 @@ static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params)
 	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
 	if (as) {
 		as->net = afs_get_net(params->net);
-		as->cell = afs_get_cell(params->cell);
+		if (params->dyn_root)
+			as->dyn_root = true;
+		else
+			as->cell = afs_get_cell(params->cell);
 	}
 	return as;
 }
@@ -451,18 +482,20 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 			goto error;
 	}
 
-	ret = afs_parse_device_name(&params, dev_name);
-	if (ret < 0)
-		goto error;
+	if (!params.dyn_root) {
+		ret = afs_parse_device_name(&params, dev_name);
+		if (ret < 0)
+			goto error;
 
-	/* try and do the mount securely */
-	key = afs_request_key(params.cell);
-	if (IS_ERR(key)) {
-		_leave(" = %ld [key]", PTR_ERR(key));
-		ret = PTR_ERR(key);
-		goto error;
+		/* try and do the mount securely */
+		key = afs_request_key(params.cell);
+		if (IS_ERR(key)) {
+			_leave(" = %ld [key]", PTR_ERR(key));
+			ret = PTR_ERR(key);
+			goto error;
+		}
+		params.key = key;
 	}
-	params.key = key;
 
 	/* allocate a superblock info record */
 	ret = -ENOMEM;
@@ -470,20 +503,25 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
 	if (!as)
 		goto error_key;
 
-	/* Assume we're going to need a volume record; at the very least we can
-	 * use it to update the volume record if we have one already.  This
-	 * checks that the volume exists within the cell.
-	 */
-	candidate = afs_create_volume(&params);
-	if (IS_ERR(candidate)) {
-		ret = PTR_ERR(candidate);
-		goto error_as;
-	}
+	if (!params.dyn_root) {
+		/* Assume we're going to need a volume record; at the very
+		 * least we can use it to update the volume record if we have
+		 * one already.  This checks that the volume exists within the
+		 * cell.
+		 */
+		candidate = afs_create_volume(&params);
+		if (IS_ERR(candidate)) {
+			ret = PTR_ERR(candidate);
+			goto error_as;
+		}
 
-	as->volume = candidate;
+		as->volume = candidate;
+	}
 
 	/* allocate a deviceless superblock */
-	sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
+	sb = sget(fs_type,
+		  as->dyn_root ? afs_dynroot_test_super : afs_test_super,
+		  afs_set_super, flags, as);
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
 		goto error_as;
@@ -529,9 +567,11 @@ static void afs_kill_super(struct super_block *sb)
 	/* Clear the callback interests (which will do ilookup5) before
 	 * deactivating the superblock.
 	 */
-	afs_clear_callback_interests(as->net, as->volume->servers);
+	if (as->volume)
+		afs_clear_callback_interests(as->net, as->volume->servers);
 	kill_anon_super(sb);
-	afs_deactivate_volume(as->volume);
+	if (as->volume)
+		afs_deactivate_volume(as->volume);
 	afs_destroy_sbi(as);
 }
 
@@ -619,12 +659,24 @@ static void afs_destroy_inode(struct inode *inode)
  */
 static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
+	struct afs_super_info *as = AFS_FS_S(dentry->d_sb);
 	struct afs_fs_cursor fc;
 	struct afs_volume_status vs;
 	struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 	struct key *key;
 	int ret;
 
+	buf->f_type	= dentry->d_sb->s_magic;
+	buf->f_bsize	= AFS_BLOCK_SIZE;
+	buf->f_namelen	= AFSNAMEMAX - 1;
+
+	if (as->dyn_root) {
+		buf->f_blocks	= 1;
+		buf->f_bavail	= 0;
+		buf->f_bfree	= 0;
+		return 0;
+	}
+	
 	key = afs_request_key(vnode->volume->cell);
 	if (IS_ERR(key))
 		return PTR_ERR(key);
@@ -645,10 +697,6 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	key_put(key);
 
 	if (ret == 0) {
-		buf->f_type	= dentry->d_sb->s_magic;
-		buf->f_bsize	= AFS_BLOCK_SIZE;
-		buf->f_namelen	= AFSNAMEMAX - 1;
-
 		if (vs.max_quota == 0)
 			buf->f_blocks = vs.part_max_blocks;
 		else
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index e372f89fd36a..5d8562f1ad4a 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
 	struct afs_uvldbentry__xdr *uvldb;
 	struct afs_vldb_entry *entry;
 	bool new_only = false;
-	u32 tmp;
+	u32 tmp, nr_servers;
 	int i, ret;
 
 	_enter("");
@@ -36,6 +36,10 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
 	uvldb = call->buffer;
 	entry = call->reply[0];
 
+	nr_servers = ntohl(uvldb->nServers);
+	if (nr_servers > AFS_NMAXNSERVERS)
+		nr_servers = AFS_NMAXNSERVERS;
+
 	for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++)
 		entry->name[i] = (u8)ntohl(uvldb->name[i]);
 	entry->name[i] = 0;
@@ -44,14 +48,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
 	/* If there is a new replication site that we can use, ignore all the
 	 * sites that aren't marked as new.
 	 */
-	for (i = 0; i < AFS_NMAXNSERVERS; i++) {
+	for (i = 0; i < nr_servers; i++) {
 		tmp = ntohl(uvldb->serverFlags[i]);
 		if (!(tmp & AFS_VLSF_DONTUSE) &&
 		    (tmp & AFS_VLSF_NEWREPSITE))
 			new_only = true;
 	}
 
-	for (i = 0; i < AFS_NMAXNSERVERS; i++) {
+	for (i = 0; i < nr_servers; i++) {
 		struct afs_uuid__xdr *xdr;
 		struct afs_uuid *uuid;
 		int j;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 684c48293353..b517a588781f 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -26,9 +26,8 @@ static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
 					   unsigned long type_mask)
 {
 	struct afs_server_list *slist;
-	struct afs_server *server;
 	struct afs_volume *volume;
-	int ret = -ENOMEM, nr_servers = 0, i, j;
+	int ret = -ENOMEM, nr_servers = 0, i;
 
 	for (i = 0; i < vldb->nr_servers; i++)
 		if (vldb->fs_mask[i] & type_mask)
@@ -58,50 +57,10 @@ static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
 
 	refcount_set(&slist->usage, 1);
 	volume->servers = slist;
-
-	/* Make sure a records exists for each server this volume occupies. */
-	for (i = 0; i < nr_servers; i++) {
-		if (!(vldb->fs_mask[i] & type_mask))
-			continue;
-
-		server = afs_lookup_server(params->cell, params->key,
-					   &vldb->fs_server[i]);
-		if (IS_ERR(server)) {
-			ret = PTR_ERR(server);
-			if (ret == -ENOENT)
-				continue;
-			goto error_2;
-		}
-
-		/* Insertion-sort by server pointer */
-		for (j = 0; j < slist->nr_servers; j++)
-			if (slist->servers[j].server >= server)
-				break;
-		if (j < slist->nr_servers) {
-			if (slist->servers[j].server == server) {
-				afs_put_server(params->net, server);
-				continue;
-			}
-
-			memmove(slist->servers + j + 1,
-				slist->servers + j,
-				(slist->nr_servers - j) * sizeof(struct afs_server_entry));
-		}
-
-		slist->servers[j].server = server;
-		slist->nr_servers++;
-	}
-
-	if (slist->nr_servers == 0) {
-		ret = -EDESTADDRREQ;
-		goto error_2;
-	}
-
 	return volume;
 
-error_2:
-	afs_put_serverlist(params->net, slist);
 error_1:
+	afs_put_cell(params->net, volume->cell);
 	kfree(volume);
 error_0:
 	return ERR_PTR(ret);
@@ -327,7 +286,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
 
 	/* See if the volume's server list got updated. */
 	new = afs_alloc_server_list(volume->cell, key,
-				      vldb, (1 << volume->type));
+				    vldb, (1 << volume->type));
 	if (IS_ERR(new)) {
 		ret = PTR_ERR(new);
 		goto error_vldb;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index dbf07051aacd..b4336b42ce3b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -299,7 +299,8 @@ unlock:
  * start an async read(ahead) operation.  return nr_pages we submitted
  * a read for on success, or negative error code.
  */
-static int start_read(struct inode *inode, struct list_head *page_list, int max)
+static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
+		      struct list_head *page_list, int max)
 {
 	struct ceph_osd_client *osdc =
 		&ceph_inode_to_client(inode)->client->osdc;
@@ -316,7 +317,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 	int got = 0;
 	int ret = 0;
 
-	if (!current->journal_info) {
+	if (!rw_ctx) {
 		/* caller of readpages does not hold buffer and read caps
 		 * (fadvise, madvise and readahead cases) */
 		int want = CEPH_CAP_FILE_CACHE;
@@ -437,6 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_file_info *ci = file->private_data;
+	struct ceph_rw_context *rw_ctx;
 	int rc = 0;
 	int max = 0;
 
@@ -449,11 +452,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 	if (rc == 0)
 		goto out;
 
+	rw_ctx = ceph_find_rw_context(ci);
 	max = fsc->mount_options->rsize >> PAGE_SHIFT;
-	dout("readpages %p file %p nr_pages %d max %d\n",
-	     inode, file, nr_pages, max);
+	dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
+	     inode, file, rw_ctx, nr_pages, max);
 	while (!list_empty(page_list)) {
-		rc = start_read(inode, page_list, max);
+		rc = start_read(inode, rw_ctx, page_list, max);
 		if (rc < 0)
 			goto out;
 	}
@@ -574,7 +578,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	struct ceph_fs_client *fsc;
 	struct ceph_snap_context *snapc, *oldest;
 	loff_t page_off = page_offset(page);
-	long writeback_stat;
 	int err, len = PAGE_SIZE;
 	struct ceph_writeback_ctl ceph_wbc;
 
@@ -615,8 +618,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n",
 	     inode, page, page->index, page_off, len, snapc, snapc->seq);
 
-	writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
-	if (writeback_stat >
+	if (atomic_long_inc_return(&fsc->writeback_count) >
 	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
 		set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
 
@@ -651,6 +653,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	end_page_writeback(page);
 	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
 	ceph_put_snap_context(snapc);  /* page's reference */
+
+	if (atomic_long_dec_return(&fsc->writeback_count) <
+	    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
+		clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
+
 	return err;
 }
 
@@ -1450,9 +1457,10 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
 
 	if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
 	    ci->i_inline_version == CEPH_INLINE_NONE) {
-		current->journal_info = vma->vm_file;
+		CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
+		ceph_add_rw_context(fi, &rw_ctx);
 		ret = filemap_fault(vmf);
-		current->journal_info = NULL;
+		ceph_del_rw_context(fi, &rw_ctx);
 	} else
 		ret = -EAGAIN;
 
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a14b2c974c9e..6582c4507e6c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -154,13 +154,19 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
 	spin_unlock(&mdsc->caps_list_lock);
 }
 
-void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+/*
+ * Called under mdsc->mutex.
+ */
+int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 		      struct ceph_cap_reservation *ctx, int need)
 {
-	int i;
+	int i, j;
 	struct ceph_cap *cap;
 	int have;
 	int alloc = 0;
+	int max_caps;
+	bool trimmed = false;
+	struct ceph_mds_session *s;
 	LIST_HEAD(newcaps);
 
 	dout("reserve caps ctx=%p need=%d\n", ctx, need);
@@ -179,16 +185,37 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc,
 	spin_unlock(&mdsc->caps_list_lock);
 
 	for (i = have; i < need; i++) {
+retry:
 		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
-		if (!cap)
-			break;
+		if (!cap) {
+			if (!trimmed) {
+				for (j = 0; j < mdsc->max_sessions; j++) {
+					s = __ceph_lookup_mds_session(mdsc, j);
+					if (!s)
+						continue;
+					mutex_unlock(&mdsc->mutex);
+
+					mutex_lock(&s->s_mutex);
+					max_caps = s->s_nr_caps - (need - i);
+					ceph_trim_caps(mdsc, s, max_caps);
+					mutex_unlock(&s->s_mutex);
+
+					ceph_put_mds_session(s);
+					mutex_lock(&mdsc->mutex);
+				}
+				trimmed = true;
+				goto retry;
+			} else {
+				pr_warn("reserve caps ctx=%p ENOMEM "
+					"need=%d got=%d\n",
+					ctx, need, have + alloc);
+				goto out_nomem;
+			}
+		}
 		list_add(&cap->caps_item, &newcaps);
 		alloc++;
 	}
-	/* we didn't manage to reserve as much as we needed */
-	if (have + alloc != need)
-		pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
-			ctx, need, have + alloc);
+	BUG_ON(have + alloc != need);
 
 	spin_lock(&mdsc->caps_list_lock);
 	mdsc->caps_total_count += alloc;
@@ -204,6 +231,24 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc,
 	dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
 	     ctx, mdsc->caps_total_count, mdsc->caps_use_count,
 	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+	return 0;
+
+out_nomem:
+	while (!list_empty(&newcaps)) {
+		cap = list_first_entry(&newcaps,
+				struct ceph_cap, caps_item);
+		list_del(&cap->caps_item);
+		kmem_cache_free(ceph_cap_cachep, cap);
+	}
+
+	spin_lock(&mdsc->caps_list_lock);
+	mdsc->caps_avail_count += have;
+	mdsc->caps_reserve_count -= have;
+	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+					 mdsc->caps_reserve_count +
+					 mdsc->caps_avail_count);
+	spin_unlock(&mdsc->caps_list_lock);
+	return -ENOMEM;
 }
 
 int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
@@ -498,7 +543,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
 	 */
 	if ((issued & CEPH_CAP_FILE_SHARED) != (had & CEPH_CAP_FILE_SHARED)) {
 		if (issued & CEPH_CAP_FILE_SHARED)
-			ci->i_shared_gen++;
+			atomic_inc(&ci->i_shared_gen);
 		if (S_ISDIR(ci->vfs_inode.i_mode)) {
 			dout(" marking %p NOT complete\n", &ci->vfs_inode);
 			__ceph_dir_clear_complete(ci);
@@ -577,18 +622,30 @@ void ceph_add_cap(struct inode *inode,
 		}
 	}
 
-	if (!ci->i_snap_realm) {
+	if (!ci->i_snap_realm ||
+	    ((flags & CEPH_CAP_FLAG_AUTH) &&
+	     realmino != (u64)-1 && ci->i_snap_realm->ino != realmino)) {
 		/*
 		 * add this inode to the appropriate snap realm
 		 */
 		struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc,
 							       realmino);
 		if (realm) {
+			struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
+			if (oldrealm) {
+				spin_lock(&oldrealm->inodes_with_caps_lock);
+				list_del_init(&ci->i_snap_realm_item);
+				spin_unlock(&oldrealm->inodes_with_caps_lock);
+			}
+
 			spin_lock(&realm->inodes_with_caps_lock);
 			ci->i_snap_realm = realm;
 			list_add(&ci->i_snap_realm_item,
 				 &realm->inodes_with_caps);
 			spin_unlock(&realm->inodes_with_caps_lock);
+
+			if (oldrealm)
+				ceph_put_snap_realm(mdsc, oldrealm);
 		} else {
 			pr_err("ceph_add_cap: couldn't find snap realm %llx\n",
 			       realmino);
@@ -890,6 +947,11 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
 /*
  * called under i_ceph_lock
  */
+static int __ceph_is_single_caps(struct ceph_inode_info *ci)
+{
+	return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
+}
+
 static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 {
 	return !RB_EMPTY_ROOT(&ci->i_caps);
@@ -1703,21 +1765,24 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	int mds = -1;   /* keep track of how far we've gone through i_caps list
 			   to avoid an infinite loop on retry */
 	struct rb_node *p;
-	int delayed = 0, sent = 0, num;
-	bool is_delayed = flags & CHECK_CAPS_NODELAY;
+	int delayed = 0, sent = 0;
+	bool no_delay = flags & CHECK_CAPS_NODELAY;
 	bool queue_invalidate = false;
-	bool force_requeue = false;
 	bool tried_invalidate = false;
 
 	/* if we are unmounting, flush any unused caps immediately. */
 	if (mdsc->stopping)
-		is_delayed = true;
+		no_delay = true;
 
 	spin_lock(&ci->i_ceph_lock);
 
 	if (ci->i_ceph_flags & CEPH_I_FLUSH)
 		flags |= CHECK_CAPS_FLUSH;
 
+	if (!(flags & CHECK_CAPS_AUTHONLY) ||
+	    (ci->i_auth_cap && __ceph_is_single_caps(ci)))
+		__cap_delay_cancel(mdsc, ci);
+
 	goto retry_locked;
 retry:
 	spin_lock(&ci->i_ceph_lock);
@@ -1772,7 +1837,7 @@ retry_locked:
 	 * have cached pages, but don't want them, then try to invalidate.
 	 * If we fail, it's because pages are locked.... try again later.
 	 */
-	if ((!is_delayed || mdsc->stopping) &&
+	if ((!no_delay || mdsc->stopping) &&
 	    !S_ISDIR(inode->i_mode) &&		/* ignore readdir cache */
 	    !(ci->i_wb_ref || ci->i_wrbuffer_ref) &&   /* no dirty pages... */
 	    inode->i_data.nrpages &&		/* have cached pages */
@@ -1781,27 +1846,16 @@ retry_locked:
 	    !tried_invalidate) {
 		dout("check_caps trying to invalidate on %p\n", inode);
 		if (try_nonblocking_invalidate(inode) < 0) {
-			if (revoking & (CEPH_CAP_FILE_CACHE|
-					CEPH_CAP_FILE_LAZYIO)) {
-				dout("check_caps queuing invalidate\n");
-				queue_invalidate = true;
-				ci->i_rdcache_revoking = ci->i_rdcache_gen;
-			} else {
-				dout("check_caps failed to invalidate pages\n");
-				/* we failed to invalidate pages.  check these
-				   caps again later. */
-				force_requeue = true;
-				__cap_set_timeouts(mdsc, ci);
-			}
+			dout("check_caps queuing invalidate\n");
+			queue_invalidate = true;
+			ci->i_rdcache_revoking = ci->i_rdcache_gen;
 		}
 		tried_invalidate = true;
 		goto retry_locked;
 	}
 
-	num = 0;
 	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
 		cap = rb_entry(p, struct ceph_cap, ci_node);
-		num++;
 
 		/* avoid looping forever */
 		if (mds >= cap->mds ||
@@ -1864,7 +1918,7 @@ retry_locked:
 		    cap->mds_wanted == want)
 			continue;     /* nope, all good */
 
-		if (is_delayed)
+		if (no_delay)
 			goto ack;
 
 		/* delay? */
@@ -1955,15 +2009,8 @@ ack:
 		goto retry; /* retake i_ceph_lock and restart our cap scan. */
 	}
 
-	/*
-	 * Reschedule delayed caps release if we delayed anything,
-	 * otherwise cancel.
-	 */
-	if (delayed && is_delayed)
-		force_requeue = true;   /* __send_cap delayed release; requeue */
-	if (!delayed && !is_delayed)
-		__cap_delay_cancel(mdsc, ci);
-	else if (!is_delayed || force_requeue)
+	/* Reschedule delayed caps release if we delayed anything */
+	if (delayed)
 		__cap_delay_requeue(mdsc, ci);
 
 	spin_unlock(&ci->i_ceph_lock);
@@ -2160,7 +2207,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
 	u64 flush_tid;
 	int err = 0;
 	int dirty;
-	int wait = wbc->sync_mode == WB_SYNC_ALL;
+	int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync);
 
 	dout("write_inode %p wait=%d\n", inode, wait);
 	if (wait) {
@@ -3426,7 +3473,14 @@ retry:
 	 */
 
 	issued = cap->issued;
-	WARN_ON(issued != cap->implemented);
+	if (issued != cap->implemented)
+		pr_err_ratelimited("handle_cap_export: issued != implemented: "
+				"ino (%llx.%llx) mds%d seq %d mseq %d "
+				"issued %s implemented %s\n",
+				ceph_vinop(inode), mds, cap->seq, cap->mseq,
+				ceph_cap_string(issued),
+				ceph_cap_string(cap->implemented));
+
 
 	tcap = __get_cap_for_mds(ci, target);
 	if (tcap) {
@@ -3572,12 +3626,13 @@ retry:
 		if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
 		    (ocap->seq != le32_to_cpu(ph->seq) ||
 		     ocap->mseq != le32_to_cpu(ph->mseq))) {
-			pr_err("handle_cap_import: mismatched seq/mseq: "
-			       "ino (%llx.%llx) mds%d seq %d mseq %d "
-			       "importer mds%d has peer seq %d mseq %d\n",
-			       ceph_vinop(inode), peer, ocap->seq,
-			       ocap->mseq, mds, le32_to_cpu(ph->seq),
-			       le32_to_cpu(ph->mseq));
+			pr_err_ratelimited("handle_cap_import: "
+					"mismatched seq/mseq: ino (%llx.%llx) "
+					"mds%d seq %d mseq %d importer mds%d "
+					"has peer seq %d mseq %d\n",
+					ceph_vinop(inode), peer, ocap->seq,
+					ocap->mseq, mds, le32_to_cpu(ph->seq),
+					le32_to_cpu(ph->mseq));
 		}
 		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
 	}
@@ -3939,11 +3994,20 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 
 	cap = __get_cap_for_mds(ci, mds);
 	if (cap && __cap_is_valid(cap)) {
-		if (force ||
-		    ((cap->issued & drop) &&
-		     (cap->issued & unless) == 0)) {
-			if ((cap->issued & drop) &&
-			    (cap->issued & unless) == 0) {
+		unless &= cap->issued;
+		if (unless) {
+			if (unless & CEPH_CAP_AUTH_EXCL)
+				drop &= ~CEPH_CAP_AUTH_SHARED;
+			if (unless & CEPH_CAP_LINK_EXCL)
+				drop &= ~CEPH_CAP_LINK_SHARED;
+			if (unless & CEPH_CAP_XATTR_EXCL)
+				drop &= ~CEPH_CAP_XATTR_SHARED;
+			if (unless & CEPH_CAP_FILE_EXCL)
+				drop &= ~CEPH_CAP_FILE_SHARED;
+		}
+
+		if (force || (cap->issued & drop)) {
+			if (cap->issued & drop) {
 				int wanted = __ceph_caps_wanted(ci);
 				if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0)
 					wanted |= cap->mds_wanted;
@@ -3975,7 +4039,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 			*p += sizeof(*rel);
 			ret = 1;
 		} else {
-			dout("encode_inode_release %p cap %p %s\n",
+			dout("encode_inode_release %p cap %p %s (noop)\n",
 			     inode, cap, ceph_cap_string(cap->issued));
 		}
 	}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8a5266699b67..0c4346806e17 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -173,7 +173,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
  * the MDS if/when the directory is modified).
  */
 static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
-			    u32 shared_gen)
+			    int shared_gen)
 {
 	struct ceph_file_info *fi = file->private_data;
 	struct dentry *parent = file->f_path.dentry;
@@ -184,7 +184,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 	u64 idx = 0;
 	int err = 0;
 
-	dout("__dcache_readdir %p v%u at %llx\n", dir, shared_gen, ctx->pos);
+	dout("__dcache_readdir %p v%u at %llx\n", dir, (unsigned)shared_gen, ctx->pos);
 
 	/* search start position */
 	if (ctx->pos > 2) {
@@ -231,11 +231,17 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 			goto out;
 		}
 
-		di = ceph_dentry(dentry);
 		spin_lock(&dentry->d_lock);
-		if (di->lease_shared_gen == shared_gen &&
-		    d_really_is_positive(dentry) &&
-		    fpos_cmp(ctx->pos, di->offset) <= 0) {
+		di = ceph_dentry(dentry);
+		if (d_unhashed(dentry) ||
+		    d_really_is_negative(dentry) ||
+		    di->lease_shared_gen != shared_gen) {
+			spin_unlock(&dentry->d_lock);
+			dput(dentry);
+			err = -EAGAIN;
+			goto out;
+		}
+		if (fpos_cmp(ctx->pos, di->offset) <= 0) {
 			emit_dentry = true;
 		}
 		spin_unlock(&dentry->d_lock);
@@ -333,7 +339,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
 	    __ceph_dir_is_complete_ordered(ci) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
-		u32 shared_gen = ci->i_shared_gen;
+		int shared_gen = atomic_read(&ci->i_shared_gen);
 		spin_unlock(&ci->i_ceph_lock);
 		err = __dcache_readdir(file, ctx, shared_gen);
 		if (err != -EAGAIN)
@@ -381,6 +387,7 @@ more:
 		if (op == CEPH_MDS_OP_READDIR) {
 			req->r_direct_hash = ceph_frag_value(frag);
 			__set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
+			req->r_inode_drop = CEPH_CAP_FILE_EXCL;
 		}
 		if (fi->last_name) {
 			req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
@@ -750,7 +757,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 			spin_unlock(&ci->i_ceph_lock);
 			dout(" dir %p complete, -ENOENT\n", dir);
 			d_add(dentry, NULL);
-			di->lease_shared_gen = ci->i_shared_gen;
+			di->lease_shared_gen = atomic_read(&ci->i_shared_gen);
 			return NULL;
 		}
 		spin_unlock(&ci->i_ceph_lock);
@@ -835,7 +842,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_args.mknod.mode = cpu_to_le32(mode);
 	req->r_args.mknod.rdev = cpu_to_le32(rdev);
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	if (acls.pagelist) {
 		req->r_pagelist = acls.pagelist;
@@ -887,7 +894,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (!err && !req->r_reply_info.head->is_dentry)
@@ -936,7 +943,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	req->r_parent = dir;
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_args.mkdir.mode = cpu_to_le32(mode);
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	if (acls.pagelist) {
 		req->r_pagelist = acls.pagelist;
@@ -983,7 +990,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	/* release LINK_SHARED on source inode (mds will lock it) */
-	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
+	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (err) {
 		d_drop(dentry);
@@ -1096,7 +1103,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	/* release LINK_RDCACHE on source inode (mds will lock it) */
-	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
+	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
 	if (d_really_is_positive(new_dentry))
 		req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry));
 	err = ceph_mdsc_do_request(mdsc, old_dir, req);
@@ -1106,16 +1113,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 * do_request, above).  If there is no trace, we need
 		 * to do it here.
 		 */
-
-		/* d_move screws up sibling dentries' offsets */
-		ceph_dir_clear_complete(old_dir);
-		ceph_dir_clear_complete(new_dir);
-
 		d_move(old_dentry, new_dentry);
-
-		/* ensure target dentry is invalidated, despite
-		   rehashing bug in vfs_rename_dir */
-		ceph_invalidate_dentry_lease(new_dentry);
 	}
 	ceph_mdsc_put_request(req);
 	return err;
@@ -1199,12 +1197,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
 	int valid = 0;
 
 	spin_lock(&ci->i_ceph_lock);
-	if (ci->i_shared_gen == di->lease_shared_gen)
+	if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen)
 		valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
 	spin_unlock(&ci->i_ceph_lock);
 	dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
-	     dir, (unsigned)ci->i_shared_gen, dentry,
-	     (unsigned)di->lease_shared_gen, valid);
+	     dir, (unsigned)atomic_read(&ci->i_shared_gen),
+	     dentry, (unsigned)di->lease_shared_gen, valid);
 	return valid;
 }
 
@@ -1332,24 +1330,37 @@ static void ceph_d_release(struct dentry *dentry)
  */
 static void ceph_d_prune(struct dentry *dentry)
 {
-	dout("ceph_d_prune %p\n", dentry);
+	struct ceph_inode_info *dir_ci;
+	struct ceph_dentry_info *di;
+
+	dout("ceph_d_prune %pd %p\n", dentry, dentry);
 
 	/* do we have a valid parent? */
 	if (IS_ROOT(dentry))
 		return;
 
-	/* if we are not hashed, we don't affect dir's completeness */
-	if (d_unhashed(dentry))
+	/* we hold d_lock, so d_parent is stable */
+	dir_ci = ceph_inode(d_inode(dentry->d_parent));
+	if (dir_ci->i_vino.snap == CEPH_SNAPDIR)
 		return;
 
-	if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR)
+	/* who calls d_delete() should also disable dcache readdir */
+	if (d_really_is_negative(dentry))
 		return;
 
-	/*
-	 * we hold d_lock, so d_parent is stable, and d_fsdata is never
-	 * cleared until d_release
-	 */
-	ceph_dir_clear_complete(d_inode(dentry->d_parent));
+	/* d_fsdata does not get cleared until d_release */
+	if (!d_unhashed(dentry)) {
+		__ceph_dir_clear_complete(dir_ci);
+		return;
+	}
+
+	/* Disable dcache readdir just in case that someone called d_drop()
+	 * or d_invalidate(), but MDS didn't revoke CEPH_CAP_FILE_SHARED
+	 * properly (dcache readdir is still enabled) */
+	di = ceph_dentry(dentry);
+	if (di->offset > 0 &&
+	    di->lease_shared_gen == atomic_read(&dir_ci->i_shared_gen))
+		__ceph_dir_clear_ordered(dir_ci);
 }
 
 /*
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5c17125f45c7..6639926eed4e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -181,6 +181,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 			return -ENOMEM;
 		}
 		cf->fmode = fmode;
+
+		spin_lock_init(&cf->rw_contexts_lock);
+		INIT_LIST_HEAD(&cf->rw_contexts);
+
 		cf->next_offset = 2;
 		cf->readdir_cache_idx = -1;
 		file->private_data = cf;
@@ -396,7 +400,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	if (flags & O_CREAT) {
-		req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+		req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
 		req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 		if (acls.pagelist) {
 			req->r_pagelist = acls.pagelist;
@@ -464,6 +468,7 @@ int ceph_release(struct inode *inode, struct file *file)
 		ceph_mdsc_put_request(cf->last_readdir);
 	kfree(cf->last_name);
 	kfree(cf->dir_info);
+	WARN_ON(!list_empty(&cf->rw_contexts));
 	kmem_cache_free(ceph_file_cachep, cf);
 
 	/* wake up anyone waiting for caps on this inode */
@@ -1199,12 +1204,13 @@ again:
 			retry_op = READ_INLINE;
 		}
 	} else {
+		CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
 		dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
 		     ceph_cap_string(got));
-		current->journal_info = filp;
+		ceph_add_rw_context(fi, &rw_ctx);
 		ret = generic_file_read_iter(iocb, to);
-		current->journal_info = NULL;
+		ceph_del_rw_context(fi, &rw_ctx);
 	}
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ab81652198c4..c6ec5aa46100 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -494,7 +494,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_wrbuffer_ref = 0;
 	ci->i_wrbuffer_ref_head = 0;
 	atomic_set(&ci->i_filelock_ref, 0);
-	ci->i_shared_gen = 0;
+	atomic_set(&ci->i_shared_gen, 0);
 	ci->i_rdcache_gen = 0;
 	ci->i_rdcache_revoking = 0;
 
@@ -1041,7 +1041,7 @@ static void update_dentry_lease(struct dentry *dentry,
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		goto out_unlock;
 
-	di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
+	di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen);
 
 	if (duration == 0)
 		goto out_unlock;
@@ -1080,6 +1080,27 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
 
 	BUG_ON(d_inode(dn));
 
+	if (S_ISDIR(in->i_mode)) {
+		/* If inode is directory, d_splice_alias() below will remove
+		 * 'realdn' from its origin parent. We need to ensure that
+		 * origin parent's readdir cache will not reference 'realdn'
+		 */
+		realdn = d_find_any_alias(in);
+		if (realdn) {
+			struct ceph_dentry_info *di = ceph_dentry(realdn);
+			spin_lock(&realdn->d_lock);
+
+			realdn->d_op->d_prune(realdn);
+
+			di->time = jiffies;
+			di->lease_shared_gen = 0;
+			di->offset = 0;
+
+			spin_unlock(&realdn->d_lock);
+			dput(realdn);
+		}
+	}
+
 	/* dn must be unhashed */
 	if (!d_unhashed(dn))
 		d_drop(dn);
@@ -1295,8 +1316,8 @@ retry_lookup:
 		if (!rinfo->head->is_target) {
 			dout("fill_trace null dentry\n");
 			if (d_really_is_positive(dn)) {
-				ceph_dir_clear_ordered(dir);
 				dout("d_delete %p\n", dn);
+				ceph_dir_clear_ordered(dir);
 				d_delete(dn);
 			} else if (have_lease) {
 				if (d_unhashed(dn))
@@ -1323,7 +1344,6 @@ retry_lookup:
 			dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
 			     dn, d_inode(dn), ceph_vinop(d_inode(dn)),
 			     ceph_vinop(in));
-			ceph_dir_clear_ordered(dir);
 			d_invalidate(dn);
 			have_lease = false;
 		}
@@ -1573,9 +1593,19 @@ retry_lookup:
 		} else if (d_really_is_positive(dn) &&
 			   (ceph_ino(d_inode(dn)) != tvino.ino ||
 			    ceph_snap(d_inode(dn)) != tvino.snap)) {
+			struct ceph_dentry_info *di = ceph_dentry(dn);
 			dout(" dn %p points to wrong inode %p\n",
 			     dn, d_inode(dn));
-			__ceph_dir_clear_ordered(ci);
+
+			spin_lock(&dn->d_lock);
+			if (di->offset > 0 &&
+			    di->lease_shared_gen ==
+			    atomic_read(&ci->i_shared_gen)) {
+				__ceph_dir_clear_ordered(ci);
+				di->offset = 0;
+			}
+			spin_unlock(&dn->d_lock);
+
 			d_delete(dn);
 			dput(dn);
 			goto retry_lookup;
@@ -1600,9 +1630,7 @@ retry_lookup:
 				 &req->r_caps_reservation);
 		if (ret < 0) {
 			pr_err("fill_inode badness on %p\n", in);
-			if (d_really_is_positive(dn))
-				__ceph_dir_clear_ordered(ci);
-			else
+			if (d_really_is_negative(dn))
 				iput(in);
 			d_drop(dn);
 			err = ret;
@@ -2000,8 +2028,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 			ceph_encode_timespec(&req->r_args.setattr.atime,
 					     &attr->ia_atime);
 			mask |= CEPH_SETATTR_ATIME;
-			release |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD |
-				CEPH_CAP_FILE_WR;
+			release |= CEPH_CAP_FILE_SHARED |
+				   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
 		}
 	}
 	if (ia_valid & ATTR_MTIME) {
@@ -2022,8 +2050,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 			ceph_encode_timespec(&req->r_args.setattr.mtime,
 					     &attr->ia_mtime);
 			mask |= CEPH_SETATTR_MTIME;
-			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
-				CEPH_CAP_FILE_WR;
+			release |= CEPH_CAP_FILE_SHARED |
+				   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
 		}
 	}
 	if (ia_valid & ATTR_SIZE) {
@@ -2041,8 +2069,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 			req->r_args.setattr.old_size =
 				cpu_to_le64(inode->i_size);
 			mask |= CEPH_SETATTR_SIZE;
-			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
-				CEPH_CAP_FILE_WR;
+			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
+				   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
 		}
 	}
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1b468250e947..2e8f90f96540 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -604,10 +604,20 @@ static void __register_request(struct ceph_mds_client *mdsc,
 			       struct ceph_mds_request *req,
 			       struct inode *dir)
 {
+	int ret = 0;
+
 	req->r_tid = ++mdsc->last_tid;
-	if (req->r_num_caps)
-		ceph_reserve_caps(mdsc, &req->r_caps_reservation,
-				  req->r_num_caps);
+	if (req->r_num_caps) {
+		ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation,
+					req->r_num_caps);
+		if (ret < 0) {
+			pr_err("__register_request %p "
+			       "failed to reserve caps: %d\n", req, ret);
+			/* set req->r_err to fail early from __do_request */
+			req->r_err = ret;
+			return;
+		}
+	}
 	dout("__register_request %p tid %lld\n", req, req->r_tid);
 	ceph_mdsc_get_request(req);
 	insert_request(&mdsc->request_tree, req);
@@ -1545,9 +1555,9 @@ out:
 /*
  * Trim session cap count down to some max number.
  */
-static int trim_caps(struct ceph_mds_client *mdsc,
-		     struct ceph_mds_session *session,
-		     int max_caps)
+int ceph_trim_caps(struct ceph_mds_client *mdsc,
+		   struct ceph_mds_session *session,
+		   int max_caps)
 {
 	int trim_caps = session->s_nr_caps - max_caps;
 
@@ -2438,11 +2448,14 @@ out:
  */
 void ceph_invalidate_dir_request(struct ceph_mds_request *req)
 {
-	struct inode *inode = req->r_parent;
+	struct inode *dir = req->r_parent;
+	struct inode *old_dir = req->r_old_dentry_dir;
 
-	dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
+	dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir);
 
-	ceph_dir_clear_complete(inode);
+	ceph_dir_clear_complete(dir);
+	if (old_dir)
+		ceph_dir_clear_complete(old_dir);
 	if (req->r_dentry)
 		ceph_invalidate_dentry_lease(req->r_dentry);
 	if (req->r_old_dentry)
@@ -2773,7 +2786,7 @@ static void handle_session(struct ceph_mds_session *session,
 		break;
 
 	case CEPH_SESSION_RECALL_STATE:
-		trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
+		ceph_trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
 		break;
 
 	case CEPH_SESSION_FLUSHMSG:
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 837ac4b087a0..71e3b783ee6f 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -444,4 +444,7 @@ ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target);
 extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
 					  struct ceph_mds_session *session);
 
+extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
+			  struct ceph_mds_session *session,
+			  int max_caps);
 #endif
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 8a2ca41e4b97..07cf95e6413d 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -922,13 +922,17 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			/*
 			 * Move the inode to the new realm
 			 */
-			spin_lock(&realm->inodes_with_caps_lock);
+			oldrealm = ci->i_snap_realm;
+			spin_lock(&oldrealm->inodes_with_caps_lock);
 			list_del_init(&ci->i_snap_realm_item);
+			spin_unlock(&oldrealm->inodes_with_caps_lock);
+
+			spin_lock(&realm->inodes_with_caps_lock);
 			list_add(&ci->i_snap_realm_item,
 				 &realm->inodes_with_caps);
-			oldrealm = ci->i_snap_realm;
 			ci->i_snap_realm = realm;
 			spin_unlock(&realm->inodes_with_caps_lock);
+
 			spin_unlock(&ci->i_ceph_lock);
 
 			ceph_get_snap_realm(mdsc, realm);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 2beeec07fa76..21b2e5b004eb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -256,7 +256,8 @@ struct ceph_inode_xattr {
  */
 struct ceph_dentry_info {
 	struct ceph_mds_session *lease_session;
-	u32 lease_gen, lease_shared_gen;
+	int lease_shared_gen;
+	u32 lease_gen;
 	u32 lease_seq;
 	unsigned long lease_renew_after, lease_renew_from;
 	struct list_head lru;
@@ -353,7 +354,7 @@ struct ceph_inode_info {
 	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
 	atomic_t i_filelock_ref;
-	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
+	atomic_t i_shared_gen;       /* increment each time we get FILE_SHARED */
 	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
 
@@ -648,7 +649,7 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
 extern void ceph_caps_init(struct ceph_mds_client *mdsc);
 extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
 extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
-extern void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 			     struct ceph_cap_reservation *ctx, int need);
 extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
 			       struct ceph_cap_reservation *ctx);
@@ -668,6 +669,9 @@ struct ceph_file_info {
 	short fmode;     /* initialized on open */
 	short flags;     /* CEPH_F_* */
 
+	spinlock_t rw_contexts_lock;
+	struct list_head rw_contexts;
+
 	/* readdir: position within the dir */
 	u32 frag;
 	struct ceph_mds_request *last_readdir;
@@ -684,6 +688,49 @@ struct ceph_file_info {
 	int dir_info_len;
 };
 
+struct ceph_rw_context {
+	struct list_head list;
+	struct task_struct *thread;
+	int caps;
+};
+
+#define CEPH_DEFINE_RW_CONTEXT(_name, _caps)	\
+	struct ceph_rw_context _name = {	\
+		.thread = current,		\
+		.caps = _caps,			\
+	}
+
+static inline void ceph_add_rw_context(struct ceph_file_info *cf,
+				       struct ceph_rw_context *ctx)
+{
+	spin_lock(&cf->rw_contexts_lock);
+	list_add(&ctx->list, &cf->rw_contexts);
+	spin_unlock(&cf->rw_contexts_lock);
+}
+
+static inline void ceph_del_rw_context(struct ceph_file_info *cf,
+				       struct ceph_rw_context *ctx)
+{
+	spin_lock(&cf->rw_contexts_lock);
+	list_del(&ctx->list);
+	spin_unlock(&cf->rw_contexts_lock);
+}
+
+static inline struct ceph_rw_context*
+ceph_find_rw_context(struct ceph_file_info *cf)
+{
+	struct ceph_rw_context *ctx, *found = NULL;
+	spin_lock(&cf->rw_contexts_lock);
+	list_for_each_entry(ctx, &cf->rw_contexts, list) {
+		if (ctx->thread == current) {
+			found = ctx;
+			break;
+		}
+	}
+	spin_unlock(&cf->rw_contexts_lock);
+	return found;
+}
+
 struct ceph_readdir_cache_control {
 	struct page  *page;
 	struct dentry **dentries;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index c7a863219fa3..e35e711db68e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -128,6 +128,10 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
 		seq_puts(m, " type: CDROM ");
 	else
 		seq_printf(m, " type: %d ", dev_type);
+	if (tcon->seal)
+		seq_printf(m, " Encrypted");
+	if (tcon->unix_ext)
+		seq_printf(m, " POSIX Extensions");
 	if (tcon->ses->server->ops->dump_share_caps)
 		tcon->ses->server->ops->dump_share_caps(m, tcon);
 
@@ -246,7 +250,10 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 			atomic_read(&server->smbd_conn->mr_used_count));
 skip_rdma:
 #endif
-		seq_printf(m, "\nNumber of credits: %d", server->credits);
+		seq_printf(m, "\nNumber of credits: %d Dialect 0x%x",
+			server->credits,  server->dialect);
+		if (server->sign)
+			seq_printf(m, " signed");
 		i++;
 		list_for_each(tmp2, &server->smb_ses_list) {
 			ses = list_entry(tmp2, struct cifs_ses,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4e0922d24eb2..9ceebf30eb22 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -6343,9 +6343,7 @@ SetEARetry:
 	pSMB->InformationLevel =
 		cpu_to_le16(SMB_SET_FILE_EA);
 
-	parm_data =
-		(struct fealist *) (((char *) &pSMB->hdr.Protocol) +
-				       offset);
+	parm_data = (void *)pSMB + offsetof(struct smb_hdr, Protocol) + offset;
 	pSMB->ParameterOffset = cpu_to_le16(param_offset);
 	pSMB->DataOffset = cpu_to_le16(offset);
 	pSMB->SetupCount = 1;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 6eb9f9691ed4..2a2b34ccaf49 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -192,6 +192,35 @@ struct smb2_symlink_err_rsp {
 	__u8  PathBuffer[0];
 } __packed;
 
+/* SMB 3.1.1 and later dialects. See MS-SMB2 section 2.2.2.1 */
+struct smb2_error_context_rsp {
+	__le32 ErrorDataLength;
+	__le32 ErrorId;
+	__u8  ErrorContextData; /* ErrorDataLength long array */
+} __packed;
+
+/* Defines for Type field below (see MS-SMB2 2.2.2.2.2.1) */
+#define MOVE_DST_IPADDR_V4	cpu_to_le32(0x00000001)
+#define MOVE_DST_IPADDR_V6	cpu_to_le32(0x00000002)
+
+struct move_dst_ipaddr {
+	__le32 Type;
+	__u32  Reserved;
+	__u8   address[16]; /* IPv4 followed by 12 bytes rsvd or IPv6 address */
+} __packed;
+
+struct share_redirect_error_context_rsp {
+	__le32 StructureSize;
+	__le32 NotificationType;
+	__le32 ResourceNameOffset;
+	__le32 ResourceNameLength;
+	__le16 Flags;
+	__le16 TargetType;
+	__le32 IPAddrCount;
+	struct move_dst_ipaddr IpAddrMoveList[0];
+	/* __u8 ResourceName[] */ /* Name of share as counted Unicode string */
+} __packed;
+
 #define SMB2_CLIENT_GUID_SIZE 16
 
 struct smb2_negotiate_req {
@@ -320,7 +349,9 @@ struct smb2_logoff_rsp {
 } __packed;
 
 /* Flags/Reserved for SMB3.1.1 */
-#define SMB2_SHAREFLAG_CLUSTER_RECONNECT	0x0001
+#define SMB2_TREE_CONNECT_FLAG_CLUSTER_RECONNECT cpu_to_le16(0x0001)
+#define SMB2_TREE_CONNECT_FLAG_REDIRECT_TO_OWNER cpu_to_le16(0x0002)
+#define SMB2_TREE_CONNECT_FLAG_EXTENSION_PRESENT cpu_to_le16(0x0004)
 
 struct smb2_tree_connect_req {
 	struct smb2_sync_hdr sync_hdr;
@@ -331,6 +362,82 @@ struct smb2_tree_connect_req {
 	__u8   Buffer[1];	/* variable length */
 } __packed;
 
+/* See MS-SMB2 section 2.2.9.2 */
+/* Context Types */
+#define SMB2_RESERVED_TREE_CONNECT_CONTEXT_ID 0x0000
+#define SMB2_REMOTED_IDENTITY_TREE_CONNECT_CONTEXT_ID cpu_to_le16(0x0001)
+
+struct tree_connect_contexts {
+	__le16 ContextType;
+	__le16 DataLength;
+	__le32 Reserved;
+	__u8   Data[0];
+} __packed;
+
+/* Remoted identity tree connect context structures - see MS-SMB2 2.2.9.2.1 */
+struct smb3_blob_data {
+	__le16 BlobSize;
+	__u8   BlobData[0];
+} __packed;
+
+/* Valid values for Attr */
+#define SE_GROUP_MANDATORY		0x00000001
+#define SE_GROUP_ENABLED_BY_DEFAULT	0x00000002
+#define SE_GROUP_ENABLED		0x00000004
+#define SE_GROUP_OWNER			0x00000008
+#define SE_GROUP_USE_FOR_DENY_ONLY	0x00000010
+#define SE_GROUP_INTEGRITY		0x00000020
+#define SE_GROUP_INTEGRITY_ENABLED	0x00000040
+#define SE_GROUP_RESOURCE		0x20000000
+#define SE_GROUP_LOGON_ID		0xC0000000
+
+/* struct sid_attr_data is SidData array in BlobData format then le32 Attr */
+
+struct sid_array_data {
+	__le16 SidAttrCount;
+	/* SidAttrList - array of sid_attr_data structs */
+} __packed;
+
+struct luid_attr_data {
+
+} __packed;
+
+/*
+ * struct privilege_data is the same as BLOB_DATA - see MS-SMB2 2.2.9.2.1.5
+ * but with size of LUID_ATTR_DATA struct and BlobData set to LUID_ATTR DATA
+ */
+
+struct privilege_array_data {
+	__le16 PrivilegeCount;
+	/* array of privilege_data structs */
+} __packed;
+
+struct remoted_identity_tcon_context {
+	__le16 TicketType; /* must be 0x0001 */
+	__le16 TicketSize; /* total size of this struct */
+	__le16 User; /* offset to SID_ATTR_DATA struct with user info */
+	__le16 UserName; /* offset to null terminated Unicode username string */
+	__le16 Domain; /* offset to null terminated Unicode domain name */
+	__le16 Groups; /* offset to SID_ARRAY_DATA struct with group info */
+	__le16 RestrictedGroups; /* similar to above */
+	__le16 Privileges; /* offset to PRIVILEGE_ARRAY_DATA struct */
+	__le16 PrimaryGroup; /* offset to SID_ARRAY_DATA struct */
+	__le16 Owner; /* offset to BLOB_DATA struct */
+	__le16 DefaultDacl; /* offset to BLOB_DATA struct */
+	__le16 DeviceGroups; /* offset to SID_ARRAY_DATA struct */
+	__le16 UserClaims; /* offset to BLOB_DATA struct */
+	__le16 DeviceClaims; /* offset to BLOB_DATA struct */
+	__u8   TicketInfo[0]; /* variable length buf - remoted identity data */
+} __packed;
+
+struct smb2_tree_connect_req_extension {
+	__le32 TreeConnectContextOffset;
+	__le16 TreeConnectContextCount;
+	__u8  Reserved[10];
+	__u8  PathName[0]; /* variable sized array */
+	/* followed by array of TreeConnectContexts */
+} __packed;
+
 struct smb2_tree_connect_rsp {
 	struct smb2_hdr hdr;
 	__le16 StructureSize;	/* Must be 16 */
@@ -365,7 +472,8 @@ struct smb2_tree_connect_rsp {
 #define SHI1005_FLAGS_ENABLE_HASH_V1			0x00002000
 #define SHI1005_FLAGS_ENABLE_HASH_V2			0x00004000
 #define SHI1005_FLAGS_ENCRYPT_DATA			0x00008000
-#define SHI1005_FLAGS_ALL				0x0000FF33
+#define SMB2_SHAREFLAG_IDENTITY_REMOTING		0x00040000 /* 3.1.1 */
+#define SHI1005_FLAGS_ALL				0x0004FF33
 
 /* Possible share capabilities */
 #define SMB2_SHARE_CAP_DFS	cpu_to_le32(0x00000008) /* all dialects */
@@ -373,6 +481,7 @@ struct smb2_tree_connect_rsp {
 #define SMB2_SHARE_CAP_SCALEOUT	cpu_to_le32(0x00000020) /* 3.0 */
 #define SMB2_SHARE_CAP_CLUSTER	cpu_to_le32(0x00000040) /* 3.0 */
 #define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */
+#define SMB2_SHARE_CAP_REDIRECT_TO_OWNER cpu_to_le32(0x00000100) /* 3.1.1 */
 
 struct smb2_tree_disconnect_req {
 	struct smb2_sync_hdr sync_hdr;
@@ -556,6 +665,7 @@ struct create_context {
 #define SMB2_LEASE_WRITE_CACHING	cpu_to_le32(0x04)
 
 #define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET cpu_to_le32(0x00000004)
 
 #define SMB2_LEASE_KEY_SIZE 16
 
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 5130492847eb..91710eb571fb 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -217,9 +217,10 @@ static void smbd_destroy_rdma_work(struct work_struct *work)
 			spin_unlock_irqrestore(
 				&info->reassembly_queue_lock, flags);
 			put_receive_buffer(info, response);
-		}
+		} else
+			spin_unlock_irqrestore(&info->reassembly_queue_lock, flags);
 	} while (response);
-	spin_unlock_irqrestore(&info->reassembly_queue_lock, flags);
+
 	info->reassembly_data_length = 0;
 
 	log_rdma_event(INFO, "free receive buffers\n");
@@ -1934,15 +1935,16 @@ again:
 				 * No need to lock if we are not at the
 				 * end of the queue
 				 */
-				if (!queue_length)
+				if (queue_length)
+					list_del(&response->list);
+				else {
 					spin_lock_irq(
 						&info->reassembly_queue_lock);
-				list_del(&response->list);
-				queue_removed++;
-				if (!queue_length)
+					list_del(&response->list);
 					spin_unlock_irq(
 						&info->reassembly_queue_lock);
-
+				}
+				queue_removed++;
 				info->count_reassembly_queue--;
 				info->count_dequeue_reassembly_queue++;
 				put_receive_buffer(info, response);
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index 58e2fe40b2a0..5933f995309a 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -33,8 +33,7 @@ config CRAMFS_BLOCKDEV
 
 config CRAMFS_MTD
 	bool "Support CramFs image directly mapped in physical memory"
-	depends on CRAMFS && MTD
-	depends on CRAMFS=m || MTD=y
+	depends on CRAMFS && CRAMFS <= MTD
 	default y if !CRAMFS_BLOCKDEV
 	help
 	  This option allows the CramFs driver to load data directly from
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index c53d9cc5ae7a..a03ce3422578 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -275,7 +275,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
 {
 	struct kernfs_open_file *of = kernfs_of(file);
 	const struct kernfs_ops *ops;
-	size_t len;
+	ssize_t len;
 	char *buf;
 
 	if (of->atomic_write_len) {
diff --git a/fs/locks.c b/fs/locks.c
index 21b4dfa289ee..d6ff4beb70ce 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1554,9 +1554,9 @@ out:
 EXPORT_SYMBOL(__break_lease);
 
 /**
- *	lease_get_mtime - get the last modified time of an inode
+ *	lease_get_mtime - update modified time of an inode with exclusive lease
  *	@inode: the inode
- *      @time:  pointer to a timespec which will contain the last modified time
+ *      @time:  pointer to a timespec which contains the last modified time
  *
  * This is to force NFS clients to flush their caches for files with
  * exclusive leases.  The justification is that if someone has an
@@ -1580,8 +1580,6 @@ void lease_get_mtime(struct inode *inode, struct timespec *time)
 
 	if (has_lease)
 		*time = current_time(inode);
-	else
-		*time = inode->i_mtime;
 }
 
 EXPORT_SYMBOL(lease_get_mtime);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2758480555fa..1a70581e1cb2 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -251,6 +251,34 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 }
 
 /*
+ * Fill in the pre_op attr for the wcc data
+ */
+void fill_pre_wcc(struct svc_fh *fhp)
+{
+	struct inode    *inode;
+	struct kstat	stat;
+	__be32 err;
+
+	if (fhp->fh_pre_saved)
+		return;
+
+	inode = d_inode(fhp->fh_dentry);
+	err = fh_getattr(fhp, &stat);
+	if (err) {
+		/* Grab the times from inode anyway */
+		stat.mtime = inode->i_mtime;
+		stat.ctime = inode->i_ctime;
+		stat.size  = inode->i_size;
+	}
+
+	fhp->fh_pre_mtime = stat.mtime;
+	fhp->fh_pre_ctime = stat.ctime;
+	fhp->fh_pre_size  = stat.size;
+	fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
+	fhp->fh_pre_saved = true;
+}
+
+/*
  * Fill in the post_op attr for the wcc data
  */
 void fill_post_wcc(struct svc_fh *fhp)
@@ -261,7 +289,8 @@ void fill_post_wcc(struct svc_fh *fhp)
 		printk("nfsd: inode locked twice during operation.\n");
 
 	err = fh_getattr(fhp, &fhp->fh_post_attr);
-	fhp->fh_post_change = nfsd4_change_attribute(d_inode(fhp->fh_dentry));
+	fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr,
+						     d_inode(fhp->fh_dentry));
 	if (err) {
 		fhp->fh_post_saved = false;
 		/* Grab the ctime anyway - set_change_info might use it */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 008ea0b627d0..a0bed2b2004d 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1363,14 +1363,14 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
 	const struct nfsd4_layout_ops *ops;
 	struct nfs4_layout_stateid *ls;
 	__be32 nfserr;
-	int accmode;
+	int accmode = NFSD_MAY_READ_IF_EXEC;
 
 	switch (lgp->lg_seg.iomode) {
 	case IOMODE_READ:
-		accmode = NFSD_MAY_READ;
+		accmode |= NFSD_MAY_READ;
 		break;
 	case IOMODE_RW:
-		accmode = NFSD_MAY_READ | NFSD_MAY_WRITE;
+		accmode |= NFSD_MAY_READ | NFSD_MAY_WRITE;
 		break;
 	default:
 		dprintk("%s: invalid iomode %d\n",
@@ -1703,6 +1703,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 	status = nfserr_minor_vers_mismatch;
 	if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0)
 		goto out;
+	status = nfserr_resource;
+	if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
+		goto out;
 
 	status = nfs41_check_op_ordering(args);
 	if (status) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b29b5a185a2c..150521c9671b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3590,6 +3590,7 @@ nfsd4_verify_open_stid(struct nfs4_stid *s)
 	switch (s->sc_type) {
 	default:
 		break;
+	case 0:
 	case NFS4_CLOSED_STID:
 	case NFS4_CLOSED_DELEG_STID:
 		ret = nfserr_bad_stateid;
@@ -5182,7 +5183,6 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
 			    lockowner(stp->st_stateowner)))
 		goto out;
 
-	stp->st_stid.sc_type = NFS4_CLOSED_STID;
 	release_lock_stateid(stp);
 	ret = nfs_ok;
 
@@ -6078,10 +6078,8 @@ out:
 		 * If this is a new, never-before-used stateid, and we are
 		 * returning an error, then just go ahead and release it.
 		 */
-		if (status && new) {
-			lock_stp->st_stid.sc_type = NFS4_CLOSED_STID;
+		if (status && new)
 			release_lock_stateid(lock_stp);
-		}
 
 		mutex_unlock(&lock_stp->st_mutex);
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2c61c6b8ae09..e502fd16246b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -455,8 +455,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
 	}
 
 	label->len = 0;
-#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
-	if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
+	if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) &&
+	    bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
 		READ_BUF(4);
 		len += 4;
 		dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */
@@ -476,7 +476,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
 		if (!label->data)
 			return nfserr_jukebox;
 	}
-#endif
 	if (bmval[2] & FATTR4_WORD2_MODE_UMASK) {
 		if (!umask)
 			goto xdr_error;
@@ -1918,8 +1917,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 
 	if (argp->taglen > NFSD4_MAX_TAGLEN)
 		goto xdr_error;
-	if (argp->opcnt > 100)
-		goto xdr_error;
+	/*
+	 * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS
+	 * here, so we return success at the xdr level so that
+	 * nfsd4_proc can handle this is an NFS-level error.
+	 */
+	if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
+		return 0;
 
 	if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
 		argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
@@ -1991,7 +1995,7 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
 		*p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
 		*p++ = 0;
 	} else if (IS_I_VERSION(inode)) {
-		p = xdr_encode_hyper(p, nfsd4_change_attribute(inode));
+		p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode));
 	} else {
 		*p++ = cpu_to_be32(stat->ctime.tv_sec);
 		*p++ = cpu_to_be32(stat->ctime.tv_nsec);
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index b8444189223b..755e256a9103 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -253,36 +253,20 @@ fh_clear_wcc(struct svc_fh *fhp)
  * By using both ctime and the i_version counter we guarantee that as
  * long as time doesn't go backwards we never reuse an old value.
  */
-static inline u64 nfsd4_change_attribute(struct inode *inode)
+static inline u64 nfsd4_change_attribute(struct kstat *stat,
+					 struct inode *inode)
 {
 	u64 chattr;
 
-	chattr =  inode->i_ctime.tv_sec;
+	chattr =  stat->ctime.tv_sec;
 	chattr <<= 30;
-	chattr += inode->i_ctime.tv_nsec;
+	chattr += stat->ctime.tv_nsec;
 	chattr += inode_query_iversion(inode);
 	return chattr;
 }
 
-/*
- * Fill in the pre_op attr for the wcc data
- */
-static inline void
-fill_pre_wcc(struct svc_fh *fhp)
-{
-	struct inode    *inode;
-
-	inode = d_inode(fhp->fh_dentry);
-	if (!fhp->fh_pre_saved) {
-		fhp->fh_pre_mtime = inode->i_mtime;
-		fhp->fh_pre_ctime = inode->i_ctime;
-		fhp->fh_pre_size  = inode->i_size;
-		fhp->fh_pre_change = nfsd4_change_attribute(inode);
-		fhp->fh_pre_saved = true;
-	}
-}
-
-extern void fill_post_wcc(struct svc_fh *);
+extern void fill_pre_wcc(struct svc_fh *fhp);
+extern void fill_post_wcc(struct svc_fh *fhp);
 #else
 #define fh_clear_wcc(ignored)
 #define fill_pre_wcc(ignored)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 644a0342f0e0..79b6064f8977 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -188,6 +188,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	*p++ = htonl((u32) stat->ino);
 	*p++ = htonl((u32) stat->atime.tv_sec);
 	*p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0);
+	time = stat->mtime;
 	lease_get_mtime(d_inode(dentry), &time); 
 	*p++ = htonl((u32) time.tv_sec);
 	*p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); 
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
index ae782df5c063..fe484cf93e5c 100644
--- a/fs/orangefs/dcache.c
+++ b/fs/orangefs/dcache.c
@@ -33,7 +33,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
 	new_op->upcall.req.lookup.parent_refn = parent->refn;
 	strncpy(new_op->upcall.req.lookup.d_name,
 		dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 
 	gossip_debug(GOSSIP_DCACHE_DEBUG,
 		     "%s:%s:%d interrupt flag [%d]\n",
@@ -118,8 +118,12 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		return 0;
 
 	/* We do not need to continue with negative dentries. */
-	if (!dentry->d_inode)
-		goto out;
+	if (!dentry->d_inode) {
+		gossip_debug(GOSSIP_DCACHE_DEBUG,
+		    "%s: negative dentry or positive dentry and inode valid.\n",
+		    __func__);
+		return 1;
+	}
 
 	/* Now we must perform a getattr to validate the inode contents. */
 
@@ -129,14 +133,7 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		    __FILE__, __func__, __LINE__);
 		return 0;
 	}
-	if (ret == 0)
-		return 0;
-
-out:
-	gossip_debug(GOSSIP_DCACHE_DEBUG,
-	    "%s: negative dentry or positive dentry and inode valid.\n",
-	    __func__);
-	return 1;
+	return !ret;
 }
 
 const struct dentry_operations orangefs_dentry_operations = {
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index c98bba2dbc94..6e3134e6d98a 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -41,7 +41,7 @@ static int orangefs_create(struct inode *dir,
 			       ORANGEFS_TYPE_METAFILE, mode);
 
 	strncpy(new_op->upcall.req.create.d_name,
-		dentry->d_name.name, ORANGEFS_NAME_MAX);
+		dentry->d_name.name, ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
 
@@ -142,7 +142,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
 	new_op->upcall.req.lookup.parent_refn = parent->refn;
 
 	strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 
 	gossip_debug(GOSSIP_NAME_DEBUG,
 		     "%s: doing lookup on %s under %pU,%d\n",
@@ -244,7 +244,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
 
 	new_op->upcall.req.remove.parent_refn = parent->refn;
 	strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op, "orangefs_unlink",
 				get_interruptible_flag(inode));
@@ -300,8 +300,8 @@ static int orangefs_symlink(struct inode *dir,
 
 	strncpy(new_op->upcall.req.sym.entry_name,
 		dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
-	strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
+	strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
 
@@ -372,7 +372,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 			      ORANGEFS_TYPE_DIRECTORY, mode);
 
 	strncpy(new_op->upcall.req.mkdir.d_name,
-		dentry->d_name.name, ORANGEFS_NAME_MAX);
+		dentry->d_name.name, ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
 
@@ -453,10 +453,10 @@ static int orangefs_rename(struct inode *old_dir,
 
 	strncpy(new_op->upcall.req.rename.d_old_name,
 		old_dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 	strncpy(new_op->upcall.req.rename.d_new_name,
 		new_dentry->d_name.name,
-		ORANGEFS_NAME_MAX);
+		ORANGEFS_NAME_MAX - 1);
 
 	ret = service_operation(new_op,
 				"orangefs_rename",
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 1c59dff530de..6e35f2f3c897 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -328,7 +328,7 @@ static int help_show(struct seq_file *m, void *v)
 /*
  * initialize the client-debug file.
  */
-int orangefs_client_debug_init(void)
+static int orangefs_client_debug_init(void)
 {
 
 	int rc = -ENOMEM;
@@ -1056,7 +1056,7 @@ int orangefs_debugfs_new_debug(void __user *arg)
 			client_debug_string,
 			llu(mask_info.mask_value));
 	} else {
-		gossip_lerr("Invalid mask type....\n");
+		gossip_err("Invalid mask type....\n");
 		return -EINVAL;
 	}
 
diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h
index b5fd9cd4960f..51147f9ce3d6 100644
--- a/fs/orangefs/orangefs-debugfs.h
+++ b/fs/orangefs/orangefs-debugfs.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 int orangefs_debugfs_init(int);
 void orangefs_debugfs_cleanup(void);
-int orangefs_client_debug_init(void);
 int orangefs_prepare_debugfs_help_string(int);
 int orangefs_debugfs_new_client_mask(void __user *);
 int orangefs_debugfs_new_client_string(void __user *);
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 2595453fe737..eebbaece85ef 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -56,11 +56,7 @@
 
 #include "orangefs-dev-proto.h"
 
-#ifdef ORANGEFS_KERNEL_DEBUG
-#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       10
-#else
 #define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       20
-#endif
 
 #define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS   30
 
@@ -104,11 +100,11 @@ enum orangefs_vfs_op_states {
  * orangefs kernel memory related flags
  */
 
-#if ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB))
+#if (defined CONFIG_DEBUG_SLAB)
 #define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE
 #else
 #define ORANGEFS_CACHE_CREATE_FLAGS 0
-#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */
+#endif
 
 extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
 extern const struct xattr_handler *orangefs_xattr_handlers[];
@@ -471,8 +467,6 @@ int orangefs_inode_check_changed(struct inode *inode);
 
 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr);
 
-void orangefs_make_bad_inode(struct inode *inode);
-
 int orangefs_unmount_sb(struct super_block *sb);
 
 bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index 97fe93129f38..ea6256d136d1 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -230,25 +230,42 @@ static int orangefs_inode_type(enum orangefs_ds_type objtype)
 		return -1;
 }
 
-static int orangefs_inode_is_stale(struct inode *inode, int new,
+static void orangefs_make_bad_inode(struct inode *inode)
+{
+	if (is_root_handle(inode)) {
+		/*
+		 * if this occurs, the pvfs2-client-core was killed but we
+		 * can't afford to lose the inode operations and such
+		 * associated with the root handle in any case.
+		 */
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** NOT making bad root inode %pU\n",
+			     get_khandle_from_ino(inode));
+	} else {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** making bad inode %pU\n",
+			     get_khandle_from_ino(inode));
+		make_bad_inode(inode);
+	}
+}
+
+static int orangefs_inode_is_stale(struct inode *inode,
     struct ORANGEFS_sys_attr_s *attrs, char *link_target)
 {
 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 	int type = orangefs_inode_type(attrs->objtype);
-	if (!new) {
-		/*
-		 * If the inode type or symlink target have changed then this
-		 * inode is stale.
-		 */
-		if (type == -1 || !(inode->i_mode & type)) {
-			orangefs_make_bad_inode(inode);
-			return 1;
-		}
-		if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
-		    link_target, ORANGEFS_NAME_MAX)) {
-			orangefs_make_bad_inode(inode);
-			return 1;
-		}
+	/*
+	 * If the inode type or symlink target have changed then this
+	 * inode is stale.
+	 */
+	if (type == -1 || !(inode->i_mode & type)) {
+		orangefs_make_bad_inode(inode);
+		return 1;
+	}
+	if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
+	    link_target, ORANGEFS_NAME_MAX)) {
+		orangefs_make_bad_inode(inode);
+		return 1;
 	}
 	return 0;
 }
@@ -294,16 +311,18 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
 	if (ret != 0)
 		goto out;
 
-	type = orangefs_inode_type(new_op->
-	    downcall.resp.getattr.attributes.objtype);
-	ret = orangefs_inode_is_stale(inode, new,
-	    &new_op->downcall.resp.getattr.attributes,
-	    new_op->downcall.resp.getattr.link_target);
-	if (ret) {
-		ret = -ESTALE;
-		goto out;
+	if (!new) {
+		ret = orangefs_inode_is_stale(inode,
+		    &new_op->downcall.resp.getattr.attributes,
+		    new_op->downcall.resp.getattr.link_target);
+		if (ret) {
+			ret = -ESTALE;
+			goto out;
+		}
 	}
 
+	type = orangefs_inode_type(new_op->
+	    downcall.resp.getattr.attributes.objtype);
 	switch (type) {
 	case S_IFREG:
 		inode->i_flags = orangefs_inode_flags(&new_op->
@@ -348,6 +367,12 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
 			inode->i_link = orangefs_inode->link_target;
 		}
 		break;
+	/* i.e. -1 */
+	default:
+		/* XXX: ESTALE?  This is what is done if it is not new. */
+		orangefs_make_bad_inode(inode);
+		ret = -ESTALE;
+		goto out;
 	}
 
 	inode->i_uid = make_kuid(&init_user_ns, new_op->
@@ -401,7 +426,7 @@ int orangefs_inode_check_changed(struct inode *inode)
 	if (ret != 0)
 		goto out;
 
-	ret = orangefs_inode_is_stale(inode, 0,
+	ret = orangefs_inode_is_stale(inode,
 	    &new_op->downcall.resp.getattr.attributes,
 	    new_op->downcall.resp.getattr.link_target);
 out:
@@ -444,25 +469,6 @@ int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
 	return ret;
 }
 
-void orangefs_make_bad_inode(struct inode *inode)
-{
-	if (is_root_handle(inode)) {
-		/*
-		 * if this occurs, the pvfs2-client-core was killed but we
-		 * can't afford to lose the inode operations and such
-		 * associated with the root handle in any case.
-		 */
-		gossip_debug(GOSSIP_UTILS_DEBUG,
-			     "*** NOT making bad root inode %pU\n",
-			     get_khandle_from_ino(inode));
-	} else {
-		gossip_debug(GOSSIP_UTILS_DEBUG,
-			     "*** making bad inode %pU\n",
-			     get_khandle_from_ino(inode));
-		make_bad_inode(inode);
-	}
-}
-
 /*
  * The following is a very dirty hack that is now a permanent part of the
  * ORANGEFS protocol. See protocol.h for more error definitions.
@@ -537,6 +543,7 @@ int orangefs_normalize_to_errno(__s32 error_code)
 	 */
 	} else {
 		gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
+		error_code = -EINVAL;
 	}
 	return error_code;
 }
diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h
index e0bf5e4dce0d..dc6e3e6269c3 100644
--- a/fs/orangefs/protocol.h
+++ b/fs/orangefs/protocol.h
@@ -395,13 +395,6 @@ struct ORANGEFS_dev_map_desc {
 
 /* gossip.h *****************************************************************/
 
-#ifdef GOSSIP_DISABLE_DEBUG
-#define gossip_debug(mask, fmt, ...)					\
-do {									\
-	if (0)								\
-		printk(KERN_DEBUG fmt, ##__VA_ARGS__);			\
-} while (0)
-#else
 extern __u64 orangefs_gossip_debug_mask;
 
 /* try to avoid function call overhead by checking masks in macro */
@@ -410,13 +403,5 @@ do {									\
 	if (orangefs_gossip_debug_mask & (mask))			\
 		printk(KERN_DEBUG fmt, ##__VA_ARGS__);			\
 } while (0)
-#endif /* GOSSIP_DISABLE_DEBUG */
-
-/* do file and line number printouts w/ the GNU preprocessor */
-#define gossip_ldebug(mask, fmt, ...)					\
-	gossip_debug(mask, "%s: " fmt, __func__, ##__VA_ARGS__)
 
 #define gossip_err pr_err
-#define gossip_lerr(fmt, ...)						\
-	gossip_err("%s line %d: " fmt,					\
-		   __FILE__, __LINE__, ##__VA_ARGS__)
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 62d49e53061c..3ae5fdba0225 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -335,7 +335,7 @@ static int orangefs_encode_fh(struct inode *inode,
 	struct orangefs_object_kref refn;
 
 	if (*max_len < len) {
-		gossip_lerr("fh buffer is too small for encoding\n");
+		gossip_err("fh buffer is too small for encoding\n");
 		*max_len = len;
 		type = 255;
 		goto out;
@@ -383,7 +383,7 @@ static int orangefs_unmount(int id, __s32 fs_id, const char *devname)
 	op->upcall.req.fs_umount.id = id;
 	op->upcall.req.fs_umount.fs_id = fs_id;
 	strncpy(op->upcall.req.fs_umount.orangefs_config_server,
-	    devname, ORANGEFS_MAX_SERVER_ADDR_LEN);
+	    devname, ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
 	r = service_operation(op, "orangefs_fs_umount", 0);
 	/* Not much to do about an error here. */
 	if (r)
@@ -478,7 +478,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 
 	strncpy(new_op->upcall.req.fs_mount.orangefs_config_server,
 		devname,
-		ORANGEFS_MAX_SERVER_ADDR_LEN);
+		ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
 
 	gossip_debug(GOSSIP_SUPER_DEBUG,
 		     "Attempting ORANGEFS Mount via host %s\n",
@@ -520,7 +520,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 	 */
 	strncpy(ORANGEFS_SB(sb)->devname,
 		devname,
-		ORANGEFS_MAX_SERVER_ADDR_LEN);
+		ORANGEFS_MAX_SERVER_ADDR_LEN - 1);
 
 	/* mount_pending must be cleared */
 	ORANGEFS_SB(sb)->mount_pending = 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 4be761c1a03d..eea09f6d8830 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -181,8 +181,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 	 * if request is to read from zero offset, reset iterator to first
 	 * record as it might have been already advanced by previous requests
 	 */
-	if (*ppos == 0)
+	if (*ppos == 0) {
 		m->index = 0;
+		m->version = 0;
+		m->count = 0;
+	}
 
 	/* Don't assume *ppos is where we left it */
 	if (unlikely(*ppos != m->read_pos)) {