52 files changed, 21631 insertions, 9313 deletions
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index ebba3b18e5da..682bd8ec2c10 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -1,13 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config AFS_FS
 	tristate "Andrew File System support (AFS)"
 	depends on INET
 	select AF_RXRPC
 	select DNS_RESOLVER
+	select NETFS_SUPPORT
+	select CRYPTO_KRB5
 	help
 	  If you say Y here, you will get an experimental Andrew File System
 	  driver. It currently only supports unsecured read-only AFS access.
 
-	  See <file:Documentation/filesystems/afs.txt> for more information.
+	  See <file:Documentation/filesystems/afs.rst> for more information.
 
 	  If unsure, say N.
 
@@ -17,7 +20,7 @@ config AFS_DEBUG
 	help
 	  Say Y here to make runtime controllable debugging messages appear.
 
-	  See <file:Documentation/filesystems/afs.txt> for more information.
+	  See <file:Documentation/filesystems/afs.rst> for more information.
 
 	  If unsure, say N.
 
@@ -27,3 +30,15 @@ config AFS_FSCACHE
 	help
 	  Say Y here if you want AFS data to be cached locally on disk through
 	  the generic filesystem cache manager
+
+config AFS_DEBUG_CURSOR
+	bool "AFS server cursor debugging"
+	depends on AFS_FS
+	help
+	  Say Y here to cause the contents of a server cursor to be dumped to
+	  the dmesg log if the server rotation algorithm fails to successfully
+	  contact a server.
+
+	  See <file:Documentation/filesystems/afs.rst> for more information.
+
+	  If unsure, say N.
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 4f64b95d57bd..b49b8fe682f3 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -1,32 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for Red Hat Linux AFS client.
 #
 
-afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o
-
-kafs-objs := \
-	$(afs-cache-y) \
+kafs-y := \
+	addr_list.o \
+	addr_prefs.o \
 	callback.o \
 	cell.o \
+	cm_security.o \
 	cmservice.o \
 	dir.o \
+	dir_edit.o \
+	dir_search.o \
+	dir_silly.o \
+	dynroot.o \
 	file.o \
 	flock.o \
 	fsclient.o \
+	fs_operation.o \
+	fs_probe.o \
 	inode.o \
 	main.o \
 	misc.o \
 	mntpt.o \
-	proc.o \
+	rotate.o \
 	rxrpc.o \
 	security.o \
 	server.o \
+	server_list.o \
 	super.o \
-	netdevices.o \
+	validation.o \
 	vlclient.o \
-	vlocation.o \
-	vnode.o \
+	vl_alias.o \
+	vl_list.o \
+	vl_probe.o \
+	vl_rotate.o \
 	volume.o \
-	write.o
+	write.o \
+	xattr.o \
+	yfsclient.o
 
+kafs-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_AFS_FS)  := kafs.o
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
new file mode 100644
index 000000000000..e941da5b6dd9
--- /dev/null
+++ b/fs/afs/addr_list.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Server address list management
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/dns_resolver.h>
+#include <linux/inet.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+#include "afs_fs.h"
+
+static void afs_free_addrlist(struct rcu_head *rcu)
+{
+	struct afs_addr_list *alist = container_of(rcu, struct afs_addr_list, rcu);
+	unsigned int i;
+
+	for (i = 0; i < alist->nr_addrs; i++)
+		rxrpc_kernel_put_peer(alist->addrs[i].peer);
+	trace_afs_alist(alist->debug_id, refcount_read(&alist->usage), afs_alist_trace_free);
+	kfree(alist);
+}
+
+/*
+ * Release an address list.
+ */
+void afs_put_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason)
+{
+	unsigned int debug_id;
+	bool dead;
+	int r;
+
+	if (!alist)
+		return;
+	debug_id = alist->debug_id;
+	dead = __refcount_dec_and_test(&alist->usage, &r);
+	trace_afs_alist(debug_id, r - 1, reason);
+	if (dead)
+		call_rcu(&alist->rcu, afs_free_addrlist);
+}
+
+struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason)
+{
+	int r;
+
+	if (alist) {
+		__refcount_inc(&alist->usage, &r);
+		trace_afs_alist(alist->debug_id, r + 1, reason);
+	}
+	return alist;
+}
+
+/*
+ * Allocate an address list.
+ */
+struct afs_addr_list *afs_alloc_addrlist(unsigned int nr)
+{
+	struct afs_addr_list *alist;
+	static atomic_t debug_id;
+
+	_enter("%u", nr);
+
+	if (nr > AFS_MAX_ADDRESSES)
+		nr = AFS_MAX_ADDRESSES;
+
+	alist = kzalloc(struct_size(alist, addrs, nr), GFP_KERNEL);
+	if (!alist)
+		return NULL;
+
+	refcount_set(&alist->usage, 1);
+	alist->max_addrs = nr;
+	alist->debug_id = atomic_inc_return(&debug_id);
+	trace_afs_alist(alist->debug_id, 1, afs_alist_trace_alloc);
+	return alist;
+}
+
+/*
+ * Parse a text string consisting of delimited addresses.
+ */
+struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
+					       const char *text, size_t len,
+					       char delim,
+					       unsigned short service,
+					       unsigned short port)
+{
+	struct afs_vlserver_list *vllist;
+	struct afs_addr_list *alist;
+	const char *p, *end = text + len;
+	const char *problem;
+	unsigned int nr = 0;
+	int ret = -ENOMEM;
+
+	_enter("%*.*s,%c", (int)len, (int)len, text, delim);
+
+	if (!len) {
+		_leave(" = -EDESTADDRREQ [empty]");
+		return ERR_PTR(-EDESTADDRREQ);
+	}
+
+	if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
+		delim = ',';
+
+	/* Count the addresses */
+	p = text;
+	do {
+		if (!*p) {
+			problem = "nul";
+			goto inval;
+		}
+		if (*p == delim)
+			continue;
+		nr++;
+		if (*p == '[') {
+			p++;
+			if (p == end) {
+				problem = "brace1";
+				goto inval;
+			}
+			p = memchr(p, ']', end - p);
+			if (!p) {
+				problem = "brace2";
+				goto inval;
+			}
+			p++;
+			if (p >= end)
+				break;
+		}
+
+		p = memchr(p, delim, end - p);
+		if (!p)
+			break;
+		p++;
+	} while (p < end);
+
+	_debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
+
+	vllist = afs_alloc_vlserver_list(1);
+	if (!vllist)
+		return ERR_PTR(-ENOMEM);
+
+	vllist->nr_servers = 1;
+	vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT);
+	if (!vllist->servers[0].server)
+		goto error_vl;
+
+	alist = afs_alloc_addrlist(nr);
+	if (!alist)
+		goto error;
+
+	/* Extract the addresses */
+	p = text;
+	do {
+		const char *q, *stop;
+		unsigned int xport = port;
+		__be32 x[4];
+		int family;
+
+		if (*p == delim) {
+			p++;
+			continue;
+		}
+
+		if (*p == '[') {
+			p++;
+			q = memchr(p, ']', end - p);
+		} else {
+			for (q = p; q < end; q++)
+				if (*q == '+' || *q == delim)
+					break;
+		}
+
+		if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) {
+			family = AF_INET;
+		} else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) {
+			family = AF_INET6;
+		} else {
+			problem = "family";
+			goto bad_address;
+		}
+
+		p = q;
+		if (stop != p) {
+			problem = "nostop";
+			goto bad_address;
+		}
+
+		if (q < end && *q == ']')
+			p++;
+
+		if (p < end) {
+			if (*p == '+') {
+				/* Port number specification "+1234" */
+				xport = 0;
+				p++;
+				if (p >= end || !isdigit(*p)) {
+					problem = "port";
+					goto bad_address;
+				}
+				do {
+					xport *= 10;
+					xport += *p - '0';
+					if (xport > 65535) {
+						problem = "pval";
+						goto bad_address;
+					}
+					p++;
+				} while (p < end && isdigit(*p));
+			} else if (*p == delim) {
+				p++;
+			} else {
+				problem = "weird";
+				goto bad_address;
+			}
+		}
+
+		if (family == AF_INET)
+			ret = afs_merge_fs_addr4(net, alist, x[0], xport);
+		else
+			ret = afs_merge_fs_addr6(net, alist, x, xport);
+		if (ret < 0)
+			goto error;
+
+	} while (p < end);
+
+	rcu_assign_pointer(vllist->servers[0].server->addresses, alist);
+	_leave(" = [nr %u]", alist->nr_addrs);
+	return vllist;
+
+inval:
+	_leave(" = -EINVAL [%s %zu %*.*s]",
+	       problem, p - text, (int)len, (int)len, text);
+	return ERR_PTR(-EINVAL);
+bad_address:
+	_leave(" = -EINVAL [%s %zu %*.*s]",
+	       problem, p - text, (int)len, (int)len, text);
+	ret = -EINVAL;
+error:
+	afs_put_addrlist(alist, afs_alist_trace_put_parse_error);
+error_vl:
+	afs_put_vlserverlist(net, vllist);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Perform a DNS query for VL servers and build a up an address list.
+ */
+struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+{
+	struct afs_vlserver_list *vllist;
+	char *result = NULL;
+	int ret;
+
+	_enter("%s", cell->name);
+
+	ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len,
+			"srv=1", &result, _expiry, true);
+	if (ret < 0) {
+		_leave(" = %d [dns]", ret);
+		return ERR_PTR(ret);
+	}
+
+	if (*_expiry == 0)
+		*_expiry = ktime_get_real_seconds() + 60;
+
+	if (ret > 1 && result[0] == 0)
+		vllist = afs_extract_vlserver_list(cell, result, ret);
+	else
+		vllist = afs_parse_text_addrs(cell->net, result, ret, ',',
+					      VL_SERVICE, AFS_VL_PORT);
+	kfree(result);
+	if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM))
+		pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist));
+
+	return vllist;
+}
+
+/*
+ * Merge an IPv4 entry into a fileserver address list.
+ */
+int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *alist,
+		       __be32 xdr, u16 port)
+{
+	struct sockaddr_rxrpc srx;
+	struct rxrpc_peer *peer;
+	int i;
+
+	if (alist->nr_addrs >= alist->max_addrs)
+		return 0;
+
+	srx.srx_family = AF_RXRPC;
+	srx.transport_type = SOCK_DGRAM;
+	srx.transport_len = sizeof(srx.transport.sin);
+	srx.transport.sin.sin_family = AF_INET;
+	srx.transport.sin.sin_port = htons(port);
+	srx.transport.sin.sin_addr.s_addr = xdr;
+
+	peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL);
+	if (!peer)
+		return -ENOMEM;
+
+	for (i = 0; i < alist->nr_ipv4; i++) {
+		if (peer == alist->addrs[i].peer) {
+			rxrpc_kernel_put_peer(peer);
+			return 0;
+		}
+		if (peer <= alist->addrs[i].peer)
+			break;
+	}
+
+	if (i < alist->nr_addrs)
+		memmove(alist->addrs + i + 1,
+			alist->addrs + i,
+			sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
+
+	alist->addrs[i].peer = peer;
+	alist->nr_ipv4++;
+	alist->nr_addrs++;
+	return 0;
+}
+
+/*
+ * Merge an IPv6 entry into a fileserver address list.
+ */
+int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist,
+		       __be32 *xdr, u16 port)
+{
+	struct sockaddr_rxrpc srx;
+	struct rxrpc_peer *peer;
+	int i;
+
+	if (alist->nr_addrs >= alist->max_addrs)
+		return 0;
+
+	srx.srx_family = AF_RXRPC;
+	srx.transport_type = SOCK_DGRAM;
+	srx.transport_len = sizeof(srx.transport.sin6);
+	srx.transport.sin6.sin6_family = AF_INET6;
+	srx.transport.sin6.sin6_port = htons(port);
+	memcpy(&srx.transport.sin6.sin6_addr, xdr, 16);
+
+	peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL);
+	if (!peer)
+		return -ENOMEM;
+
+	for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+		if (peer == alist->addrs[i].peer) {
+			rxrpc_kernel_put_peer(peer);
+			return 0;
+		}
+		if (peer <= alist->addrs[i].peer)
+			break;
+	}
+
+	if (i < alist->nr_addrs)
+		memmove(alist->addrs + i + 1,
+			alist->addrs + i,
+			sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
+	alist->addrs[i].peer = peer;
+	alist->nr_addrs++;
+	return 0;
+}
+
+/*
+ * Set the app data on the rxrpc peers an address list points to
+ */
+void afs_set_peer_appdata(struct afs_server *server,
+			  struct afs_addr_list *old_alist,
+			  struct afs_addr_list *new_alist)
+{
+	unsigned long data = (unsigned long)server;
+	int n = 0, o = 0;
+
+	if (!old_alist) {
+		/* New server.  Just set all. */
+		for (; n < new_alist->nr_addrs; n++)
+			rxrpc_kernel_set_peer_data(new_alist->addrs[n].peer, data);
+		return;
+	}
+	if (!new_alist) {
+		/* Dead server.  Just remove all. */
+		for (; o < old_alist->nr_addrs; o++)
+			rxrpc_kernel_set_peer_data(old_alist->addrs[o].peer, 0);
+		return;
+	}
+
+	/* Walk through the two lists simultaneously, setting new peers and
+	 * clearing old ones.  The two lists are ordered by pointer to peer
+	 * record.
+	 */
+	while (n < new_alist->nr_addrs && o < old_alist->nr_addrs) {
+		struct rxrpc_peer *pn = new_alist->addrs[n].peer;
+		struct rxrpc_peer *po = old_alist->addrs[o].peer;
+
+		if (pn == po)
+			continue;
+		if (pn < po) {
+			rxrpc_kernel_set_peer_data(pn, data);
+			n++;
+		} else {
+			rxrpc_kernel_set_peer_data(po, 0);
+			o++;
+		}
+	}
+
+	if (n < new_alist->nr_addrs)
+		for (; n < new_alist->nr_addrs; n++)
+			rxrpc_kernel_set_peer_data(new_alist->addrs[n].peer, data);
+	if (o < old_alist->nr_addrs)
+		for (; o < old_alist->nr_addrs; o++)
+			rxrpc_kernel_set_peer_data(old_alist->addrs[o].peer, 0);
+}
diff --git a/fs/afs/addr_prefs.c b/fs/afs/addr_prefs.c
new file mode 100644
index 000000000000..133736412c3d
--- /dev/null
+++ b/fs/afs/addr_prefs.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Address preferences management
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": addr_prefs: " fmt
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
+#include <linux/seq_file.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+
+static inline struct afs_net *afs_seq2net_single(struct seq_file *m)
+{
+	return afs_net(seq_file_single_net(m));
+}
+
+/*
+ * Split a NUL-terminated string up to the first newline around spaces.  The
+ * source string will be modified to have NUL-terminations inserted.
+ */
+static int afs_split_string(char **pbuf, char *strv[], unsigned int maxstrv)
+{
+	unsigned int count = 0;
+	char *p = *pbuf;
+
+	maxstrv--; /* Allow for terminal NULL */
+	for (;;) {
+		/* Skip over spaces */
+		while (isspace(*p)) {
+			if (*p == '\n') {
+				p++;
+				break;
+			}
+			p++;
+		}
+		if (!*p)
+			break;
+
+		/* Mark start of word */
+		if (count >= maxstrv) {
+			pr_warn("Too many elements in string\n");
+			return -EINVAL;
+		}
+		strv[count++] = p;
+
+		/* Skip over word */
+		while (!isspace(*p) && *p)
+			p++;
+		if (!*p)
+			break;
+
+		/* Mark end of word */
+		if (*p == '\n') {
+			*p++ = 0;
+			break;
+		}
+		*p++ = 0;
+	}
+
+	*pbuf = p;
+	strv[count] = NULL;
+	return count;
+}
+
+/*
+ * Parse an address with an optional subnet mask.
+ */
+static int afs_parse_address(char *p, struct afs_addr_preference *pref)
+{
+	const char *stop;
+	unsigned long mask, tmp;
+	char *end = p + strlen(p);
+	bool bracket = false;
+
+	if (*p == '[') {
+		p++;
+		bracket = true;
+	}
+
+#if 0
+	if (*p == '[') {
+		p++;
+		q = memchr(p, ']', end - p);
+		if (!q) {
+			pr_warn("Can't find closing ']'\n");
+			return -EINVAL;
+		}
+	} else {
+		for (q = p; q < end; q++)
+			if (*q == '/')
+				break;
+	}
+#endif
+
+	if (in4_pton(p, end - p, (u8 *)&pref->ipv4_addr, -1, &stop)) {
+		pref->family = AF_INET;
+		mask = 32;
+	} else if (in6_pton(p, end - p, (u8 *)&pref->ipv6_addr, -1, &stop)) {
+		pref->family = AF_INET6;
+		mask = 128;
+	} else {
+		pr_warn("Can't determine address family\n");
+		return -EINVAL;
+	}
+
+	p = (char *)stop;
+	if (bracket) {
+		if (*p != ']') {
+			pr_warn("Can't find closing ']'\n");
+			return -EINVAL;
+		}
+		p++;
+	}
+
+	if (*p == '/') {
+		p++;
+		tmp = simple_strtoul(p, &p, 10);
+		if (tmp > mask) {
+			pr_warn("Subnet mask too large\n");
+			return -EINVAL;
+		}
+		if (tmp == 0) {
+			pr_warn("Subnet mask too small\n");
+			return -EINVAL;
+		}
+		mask = tmp;
+	}
+
+	if (*p) {
+		pr_warn("Invalid address\n");
+		return -EINVAL;
+	}
+
+	pref->subnet_mask = mask;
+	return 0;
+}
+
+enum cmp_ret {
+	CONTINUE_SEARCH,
+	INSERT_HERE,
+	EXACT_MATCH,
+	SUBNET_MATCH,
+};
+
+/*
+ * See if a candidate address matches a listed address.
+ */
+static enum cmp_ret afs_cmp_address_pref(const struct afs_addr_preference *a,
+					 const struct afs_addr_preference *b)
+{
+	int subnet = min(a->subnet_mask, b->subnet_mask);
+	const __be32 *pa, *pb;
+	u32 mask, na, nb;
+	int diff;
+
+	if (a->family != b->family)
+		return INSERT_HERE;
+
+	switch (a->family) {
+	case AF_INET6:
+		pa = a->ipv6_addr.s6_addr32;
+		pb = b->ipv6_addr.s6_addr32;
+		break;
+	case AF_INET:
+		pa = &a->ipv4_addr.s_addr;
+		pb = &b->ipv4_addr.s_addr;
+		break;
+	}
+
+	while (subnet > 32) {
+		diff = ntohl(*pa++) - ntohl(*pb++);
+		if (diff < 0)
+			return INSERT_HERE; /* a<b */
+		if (diff > 0)
+			return CONTINUE_SEARCH; /* a>b */
+		subnet -= 32;
+	}
+
+	if (subnet == 0)
+		return EXACT_MATCH;
+
+	mask = 0xffffffffU << (32 - subnet);
+	na = ntohl(*pa);
+	nb = ntohl(*pb);
+	diff = (na & mask) - (nb & mask);
+	//kdebug("diff %08x %08x %08x %d", na, nb, mask, diff);
+	if (diff < 0)
+		return INSERT_HERE; /* a<b */
+	if (diff > 0)
+		return CONTINUE_SEARCH; /* a>b */
+	if (a->subnet_mask == b->subnet_mask)
+		return EXACT_MATCH;
+	if (a->subnet_mask > b->subnet_mask)
+		return SUBNET_MATCH; /* a binds tighter than b */
+	return CONTINUE_SEARCH; /* b binds tighter than a */
+}
+
+/*
+ * Insert an address preference.
+ */
+static int afs_insert_address_pref(struct afs_addr_preference_list **_preflist,
+				   struct afs_addr_preference *pref,
+				   int index)
+{
+	struct afs_addr_preference_list *preflist = *_preflist, *old = preflist;
+	size_t size, max_prefs;
+
+	_enter("{%u/%u/%u},%u", preflist->ipv6_off, preflist->nr, preflist->max_prefs, index);
+
+	if (preflist->nr == 255)
+		return -ENOSPC;
+	if (preflist->nr >= preflist->max_prefs) {
+		max_prefs = preflist->max_prefs + 1;
+		size = struct_size(preflist, prefs, max_prefs);
+		size = roundup_pow_of_two(size);
+		max_prefs = min_t(size_t, (size - sizeof(*preflist)) / sizeof(*pref), 255);
+		preflist = kmalloc(size, GFP_KERNEL);
+		if (!preflist)
+			return -ENOMEM;
+		*preflist = **_preflist;
+		preflist->max_prefs = max_prefs;
+		*_preflist = preflist;
+
+		if (index < preflist->nr)
+			memcpy(preflist->prefs + index + 1, old->prefs + index,
+			       sizeof(*pref) * (preflist->nr - index));
+		if (index > 0)
+			memcpy(preflist->prefs, old->prefs, sizeof(*pref) * index);
+	} else {
+		if (index < preflist->nr)
+			memmove(preflist->prefs + index + 1, preflist->prefs + index,
+			       sizeof(*pref) * (preflist->nr - index));
+	}
+
+	preflist->prefs[index] = *pref;
+	preflist->nr++;
+	if (pref->family == AF_INET)
+		preflist->ipv6_off++;
+	return 0;
+}
+
+/*
+ * Add an address preference.
+ *	echo "add <proto> <IP>[/<mask>] <prior>" >/proc/fs/afs/addr_prefs
+ */
+static int afs_add_address_pref(struct afs_net *net, struct afs_addr_preference_list **_preflist,
+				int argc, char **argv)
+{
+	struct afs_addr_preference_list *preflist = *_preflist;
+	struct afs_addr_preference pref;
+	enum cmp_ret cmp;
+	int ret, i, stop;
+
+	if (argc != 3) {
+		pr_warn("Wrong number of params\n");
+		return -EINVAL;
+	}
+
+	if (strcmp(argv[0], "udp") != 0) {
+		pr_warn("Unsupported protocol\n");
+		return -EINVAL;
+	}
+
+	ret = afs_parse_address(argv[1], &pref);
+	if (ret < 0)
+		return ret;
+
+	ret = kstrtou16(argv[2], 10, &pref.prio);
+	if (ret < 0) {
+		pr_warn("Invalid priority\n");
+		return ret;
+	}
+
+	if (pref.family == AF_INET) {
+		i = 0;
+		stop = preflist->ipv6_off;
+	} else {
+		i = preflist->ipv6_off;
+		stop = preflist->nr;
+	}
+
+	for (; i < stop; i++) {
+		cmp = afs_cmp_address_pref(&pref, &preflist->prefs[i]);
+		switch (cmp) {
+		case CONTINUE_SEARCH:
+			continue;
+		case INSERT_HERE:
+		case SUBNET_MATCH:
+			return afs_insert_address_pref(_preflist, &pref, i);
+		case EXACT_MATCH:
+			preflist->prefs[i].prio = pref.prio;
+			return 0;
+		}
+	}
+
+	return afs_insert_address_pref(_preflist, &pref, i);
+}
+
+/*
+ * Delete an address preference.
+ */
+static int afs_delete_address_pref(struct afs_addr_preference_list **_preflist,
+				   int index)
+{
+	struct afs_addr_preference_list *preflist = *_preflist;
+
+	_enter("{%u/%u/%u},%u", preflist->ipv6_off, preflist->nr, preflist->max_prefs, index);
+
+	if (preflist->nr == 0)
+		return -ENOENT;
+
+	if (index < preflist->nr - 1)
+		memmove(preflist->prefs + index, preflist->prefs + index + 1,
+			sizeof(preflist->prefs[0]) * (preflist->nr - index - 1));
+
+	if (index < preflist->ipv6_off)
+		preflist->ipv6_off--;
+	preflist->nr--;
+	return 0;
+}
+
+/*
+ * Delete an address preference.
+ *	echo "del <proto> <IP>[/<mask>]" >/proc/fs/afs/addr_prefs
+ */
+static int afs_del_address_pref(struct afs_net *net, struct afs_addr_preference_list **_preflist,
+				int argc, char **argv)
+{
+	struct afs_addr_preference_list *preflist = *_preflist;
+	struct afs_addr_preference pref;
+	enum cmp_ret cmp;
+	int ret, i, stop;
+
+	if (argc != 2) {
+		pr_warn("Wrong number of params\n");
+		return -EINVAL;
+	}
+
+	if (strcmp(argv[0], "udp") != 0) {
+		pr_warn("Unsupported protocol\n");
+		return -EINVAL;
+	}
+
+	ret = afs_parse_address(argv[1], &pref);
+	if (ret < 0)
+		return ret;
+
+	if (pref.family == AF_INET) {
+		i = 0;
+		stop = preflist->ipv6_off;
+	} else {
+		i = preflist->ipv6_off;
+		stop = preflist->nr;
+	}
+
+	for (; i < stop; i++) {
+		cmp = afs_cmp_address_pref(&pref, &preflist->prefs[i]);
+		switch (cmp) {
+		case CONTINUE_SEARCH:
+			continue;
+		case INSERT_HERE:
+		case SUBNET_MATCH:
+			return 0;
+		case EXACT_MATCH:
+			return afs_delete_address_pref(_preflist, i);
+		}
+	}
+
+	return -ENOANO;
+}
+
+/*
+ * Handle writes to /proc/fs/afs/addr_prefs
+ */
+int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size)
+{
+	struct afs_addr_preference_list *preflist, *old;
+	struct seq_file *m = file->private_data;
+	struct afs_net *net = afs_seq2net_single(m);
+	size_t psize;
+	char *argv[5];
+	int ret, argc, max_prefs;
+
+	inode_lock(file_inode(file));
+
+	/* Allocate a candidate new list and initialise it from the old. */
+	old = rcu_dereference_protected(net->address_prefs,
+					lockdep_is_held(&file_inode(file)->i_rwsem));
+
+	if (old)
+		max_prefs = old->nr + 1;
+	else
+		max_prefs = 1;
+
+	psize = struct_size(old, prefs, max_prefs);
+	psize = roundup_pow_of_two(psize);
+	max_prefs = min_t(size_t, (psize - sizeof(*old)) / sizeof(old->prefs[0]), 255);
+
+	ret = -ENOMEM;
+	preflist = kmalloc(struct_size(preflist, prefs, max_prefs), GFP_KERNEL);
+	if (!preflist)
+		goto done;
+
+	if (old)
+		memcpy(preflist, old, struct_size(preflist, prefs, old->nr));
+	else
+		memset(preflist, 0, sizeof(*preflist));
+	preflist->max_prefs = max_prefs;
+
+	do {
+		argc = afs_split_string(&buf, argv, ARRAY_SIZE(argv));
+		if (argc < 0) {
+			ret = argc;
+			goto done;
+		}
+		if (argc < 2)
+			goto inval;
+
+		if (strcmp(argv[0], "add") == 0)
+			ret = afs_add_address_pref(net, &preflist, argc - 1, argv + 1);
+		else if (strcmp(argv[0], "del") == 0)
+			ret = afs_del_address_pref(net, &preflist, argc - 1, argv + 1);
+		else
+			goto inval;
+		if (ret < 0)
+			goto done;
+	} while (*buf);
+
+	preflist->version++;
+	rcu_assign_pointer(net->address_prefs, preflist);
+	/* Store prefs before version */
+	smp_store_release(&net->address_pref_version, preflist->version);
+	kfree_rcu(old, rcu);
+	preflist = NULL;
+	ret = 0;
+
+done:
+	kfree(preflist);
+	inode_unlock(file_inode(file));
+	_leave(" = %d", ret);
+	return ret;
+
+inval:
+	pr_warn("Invalid Command\n");
+	ret = -EINVAL;
+	goto done;
+}
+
+/*
+ * Mark the priorities on an address list if the address preferences table has
+ * changed.  The caller must hold the RCU read lock.
+ */
+void afs_get_address_preferences_rcu(struct afs_net *net, struct afs_addr_list *alist)
+{
+	const struct afs_addr_preference_list *preflist =
+		rcu_dereference(net->address_prefs);
+	const struct sockaddr_in6 *sin6;
+	const struct sockaddr_in *sin;
+	const struct sockaddr *sa;
+	struct afs_addr_preference test;
+	enum cmp_ret cmp;
+	int i, j;
+
+	if (!preflist || !preflist->nr || !alist->nr_addrs ||
+	    smp_load_acquire(&alist->addr_pref_version) == preflist->version)
+		return;
+
+	test.family = AF_INET;
+	test.subnet_mask = 32;
+	test.prio = 0;
+	for (i = 0; i < alist->nr_ipv4; i++) {
+		sa = rxrpc_kernel_remote_addr(alist->addrs[i].peer);
+		sin = (const struct sockaddr_in *)sa;
+		test.ipv4_addr = sin->sin_addr;
+		for (j = 0; j < preflist->ipv6_off; j++) {
+			cmp = afs_cmp_address_pref(&test, &preflist->prefs[j]);
+			switch (cmp) {
+			case CONTINUE_SEARCH:
+				continue;
+			case INSERT_HERE:
+				break;
+			case EXACT_MATCH:
+			case SUBNET_MATCH:
+				WRITE_ONCE(alist->addrs[i].prio, preflist->prefs[j].prio);
+				break;
+			}
+		}
+	}
+
+	test.family = AF_INET6;
+	test.subnet_mask = 128;
+	test.prio = 0;
+	for (; i < alist->nr_addrs; i++) {
+		sa = rxrpc_kernel_remote_addr(alist->addrs[i].peer);
+		sin6 = (const struct sockaddr_in6 *)sa;
+		test.ipv6_addr = sin6->sin6_addr;
+		for (j = preflist->ipv6_off; j < preflist->nr; j++) {
+			cmp = afs_cmp_address_pref(&test, &preflist->prefs[j]);
+			switch (cmp) {
+			case CONTINUE_SEARCH:
+				continue;
+			case INSERT_HERE:
+				break;
+			case EXACT_MATCH:
+			case SUBNET_MATCH:
+				WRITE_ONCE(alist->addrs[i].prio, preflist->prefs[j].prio);
+				break;
+			}
+		}
+	}
+
+	smp_store_release(&alist->addr_pref_version, preflist->version);
+}
+
+/*
+ * Mark the priorities on an address list if the address preferences table has
+ * changed.  Avoid taking the RCU read lock if we can.
+ */
+void afs_get_address_preferences(struct afs_net *net, struct afs_addr_list *alist)
+{
+	if (!net->address_prefs ||
+	    /* Load version before prefs */
+	    smp_load_acquire(&net->address_pref_version) == alist->addr_pref_version)
+		return;
+
+	rcu_read_lock();
+	afs_get_address_preferences_rcu(net, alist);
+	rcu_read_unlock();
+}
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 3c462ff6db63..ec3db00bd081 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* AFS common types
  *
  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #ifndef AFS_H
@@ -14,15 +10,21 @@
 
 #include <linux/in.h>
 
-#define AFS_MAXCELLNAME	64		/* maximum length of a cell name */
-#define AFS_MAXVOLNAME	64		/* maximum length of a volume name */
-#define AFSNAMEMAX	256		/* maximum length of a filename plus NUL */
-#define AFSPATHMAX	1024		/* maximum length of a pathname plus NUL */
-#define AFSOPAQUEMAX	1024		/* maximum length of an opaque field */
+#define AFS_MAXCELLNAME		253  	/* Maximum length of a cell name (DNS limited) */
+#define AFS_MAXVOLNAME		64  	/* Maximum length of a volume name */
+#define AFS_MAXNSERVERS		8   	/* Maximum servers in a basic volume record */
+#define AFS_NMAXNSERVERS	13  	/* Maximum servers in a N/U-class volume record */
+#define AFS_MAXTYPES		3	/* Maximum number of volume types */
+#define AFSNAMEMAX		256 	/* Maximum length of a filename plus NUL */
+#define AFSPATHMAX		1024	/* Maximum length of a pathname plus NUL */
+#define AFSOPAQUEMAX		1024	/* Maximum length of an opaque field */
+
+#define AFS_VL_MAX_LIFESPAN	120
+#define AFS_PROBE_MAX_LIFESPAN	30
 
-typedef unsigned			afs_volid_t;
-typedef unsigned			afs_vnodeid_t;
-typedef unsigned long long		afs_dataversion_t;
+typedef u64			afs_volid_t;
+typedef u64			afs_vnodeid_t;
+typedef u64			afs_dataversion_t;
 
 typedef enum {
 	AFSVL_RWVOL,			/* read/write volume */
@@ -49,8 +51,9 @@ typedef enum {
  */
 struct afs_fid {
 	afs_volid_t	vid;		/* volume ID */
-	afs_vnodeid_t	vnode;		/* file index within volume */
-	unsigned	unique;		/* unique ID number (file index version) */
+	afs_vnodeid_t	vnode;		/* Lower 64-bits of file index within volume */
+	u32		vnode_hi;	/* Upper 32-bits of file index */
+	u32		unique;		/* unique ID number (file index version) */
 };
 
 /*
@@ -64,14 +67,27 @@ typedef enum {
 } afs_callback_type_t;
 
 struct afs_callback {
-	struct afs_fid		fid;		/* file identifier */
-	unsigned		version;	/* callback version */
-	unsigned		expiry;		/* time at which expires */
-	afs_callback_type_t	type;		/* type of callback */
+	time64_t		expires_at;	/* Time at which expires */
+	//unsigned		version;	/* Callback version */
+	//afs_callback_type_t	type;		/* Type of callback */
+};
+
+struct afs_callback_break {
+	struct afs_fid		fid;		/* File identifier */
+	//struct afs_callback	cb;		/* Callback details */
 };
 
 #define AFSCBMAX 50	/* maximum callbacks transferred per bulk op */
 
+struct afs_uuid {
+	__be32		time_low;			/* low part of timestamp */
+	__be16		time_mid;			/* mid part of timestamp */
+	__be16		time_hi_and_version;		/* high part of timestamp and version  */
+	__s8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
+	__s8		clock_seq_low;			/* clock seq low */
+	__s8		node[6];			/* spatially unique node ID (MAC addr) */
+};
+
 /*
  * AFS volume information
  */
@@ -111,23 +127,28 @@ typedef u32 afs_access_t;
  * AFS file status information
  */
 struct afs_file_status {
-	unsigned		if_version;	/* interface version */
-#define AFS_FSTATUS_VERSION	1
-
-	afs_file_type_t		type;		/* file type */
-	unsigned		nlink;		/* link count */
 	u64			size;		/* file size */
 	afs_dataversion_t	data_version;	/* current data version */
-	u32			author;		/* author ID */
-	kuid_t			owner;		/* owner ID */
-	kgid_t			group;		/* group ID */
+	struct timespec64	mtime_client;	/* Last time client changed data */
+	struct timespec64	mtime_server;	/* Last time server changed data */
+	s64			author;		/* author ID */
+	s64			owner;		/* owner ID */
+	s64			group;		/* group ID */
 	afs_access_t		caller_access;	/* access rights for authenticated caller */
 	afs_access_t		anon_access;	/* access rights for unauthenticated caller */
 	umode_t			mode;		/* UNIX mode */
-	struct afs_fid		parent;		/* parent dir ID for non-dirs only */
-	time_t			mtime_client;	/* last time client changed data */
-	time_t			mtime_server;	/* last time server changed data */
+	afs_file_type_t		type;		/* file type */
+	u32			nlink;		/* link count */
 	s32			lock_count;	/* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
+	u32			abort_code;	/* Abort if bulk-fetching this failed */
+};
+
+struct afs_status_cb {
+	struct afs_file_status	status;
+	struct afs_callback	callback;
+	bool			have_status;	/* True if status record was retrieved */
+	bool			have_cb;	/* True if cb record was retrieved */
+	bool			have_error;	/* True if status.abort_code indicates an error */
 };
 
 /*
@@ -144,27 +165,42 @@ struct afs_file_status {
  * AFS volume synchronisation information
  */
 struct afs_volsync {
-	time_t			creation;	/* volume creation time */
+	time64_t		creation;	/* Volume creation time (or TIME64_MIN) */
+	time64_t		update;		/* Volume update time (or TIME64_MIN) */
 };
 
 /*
  * AFS volume status record
  */
 struct afs_volume_status {
-	u32			vid;		/* volume ID */
-	u32			parent_id;	/* parent volume ID */
+	afs_volid_t		vid;		/* volume ID */
+	afs_volid_t		parent_id;	/* parent volume ID */
 	u8			online;		/* true if volume currently online and available */
 	u8			in_service;	/* true if volume currently in service */
 	u8			blessed;	/* same as in_service */
 	u8			needs_salvage;	/* true if consistency checking required */
 	u32			type;		/* volume type (afs_voltype_t) */
-	u32			min_quota;	/* minimum space set aside (blocks) */
-	u32			max_quota;	/* maximum space this volume may occupy (blocks) */
-	u32			blocks_in_use;	/* space this volume currently occupies (blocks) */
-	u32			part_blocks_avail; /* space available in volume's partition */
-	u32			part_max_blocks; /* size of volume's partition */
+	u64			min_quota;	/* minimum space set aside (blocks) */
+	u64			max_quota;	/* maximum space this volume may occupy (blocks) */
+	u64			blocks_in_use;	/* space this volume currently occupies (blocks) */
+	u64			part_blocks_avail; /* space available in volume's partition */
+	u64			part_max_blocks; /* size of volume's partition */
+	s64			vol_copy_date;
+	s64			vol_backup_date;
 };
 
 #define AFS_BLOCK_SIZE	1024
 
+/*
+ * XDR encoding of UUID in AFS.
+ */
+struct afs_uuid__xdr {
+	__be32		time_low;
+	__be32		time_mid;
+	__be32		time_hi_and_version;
+	__be32		clock_seq_hi_and_reserved;
+	__be32		clock_seq_low;
+	__be32		node[6];
+};
+
 #endif /* AFS_H */
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
index 255f5dd6040c..565cbe0a8af6 100644
--- a/fs/afs/afs_cm.h
+++ b/fs/afs/afs_cm.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* AFS Cache Manager definitions
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #ifndef AFS_CM_H
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index eb647323d8f0..20ab344baf9d 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* AFS File Service definitions
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #ifndef AFS_FS_H
@@ -17,8 +13,10 @@
 
 enum AFS_FS_Operations {
 	FSFETCHDATA		= 130,	/* AFS Fetch file data */
+	FSFETCHACL		= 131,	/* AFS Fetch file ACL */
 	FSFETCHSTATUS		= 132,	/* AFS Fetch file status */
 	FSSTOREDATA		= 133,	/* AFS Store file data */
+	FSSTOREACL		= 134,	/* AFS Store file ACL */
 	FSSTORESTATUS		= 135,	/* AFS Store file status */
 	FSREMOVEFILE		= 136,	/* AFS Remove a file */
 	FSCREATEFILE		= 137,	/* AFS Create a file */
@@ -31,15 +29,20 @@ enum AFS_FS_Operations {
 	FSGETVOLUMEINFO		= 148,	/* AFS Get information about a volume */
 	FSGETVOLUMESTATUS	= 149,	/* AFS Get volume status information */
 	FSGETROOTVOLUME		= 151,	/* AFS Get root volume name */
+	FSBULKSTATUS		= 155,	/* AFS Fetch multiple file statuses */
 	FSSETLOCK		= 156,	/* AFS Request a file lock */
 	FSEXTENDLOCK		= 157,	/* AFS Extend a file lock */
 	FSRELEASELOCK		= 158,	/* AFS Release a file lock */
 	FSLOOKUP		= 161,	/* AFS lookup file in directory */
+	FSINLINEBULKSTATUS	= 65536, /* AFS Fetch multiple file statuses with inline errors */
 	FSFETCHDATA64		= 65537, /* AFS Fetch file data */
 	FSSTOREDATA64		= 65538, /* AFS Store file data */
+	FSGIVEUPALLCALLBACKS	= 65539, /* AFS Give up all outstanding callbacks on a server */
+	FSGETCAPABILITIES	= 65540, /* Probe and get the capabilities of a fileserver */
 };
 
 enum AFS_FS_Errors {
+	VRESTARTING	= -100,	/* Server is restarting */
 	VSALVAGE	= 101,	/* volume needs salvaging */
 	VNOVNODE	= 102,	/* no such file/dir (vnode) */
 	VNOVOL		= 103,	/* no such volume or volume unavailable */
@@ -51,6 +54,9 @@ enum AFS_FS_Errors {
 	VOVERQUOTA	= 109,	/* volume's maximum quota exceeded */
 	VBUSY		= 110,	/* volume is temporarily unavailable */
 	VMOVED		= 111,	/* volume moved to new server - ask this FS where */
+	VIO		= 112,	/* I/O error in volume */
+	VSALVAGING	= 113,	/* Volume is being salvaged */
+	VRESTRICTED	= 120,	/* Volume is restricted from using  */
 };
 
 #endif /* AFS_FS_H */
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 800f607ffaf5..b835e25a2c02 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* AFS Volume Location Service client interface
  *
  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #ifndef AFS_VL_H
@@ -16,11 +12,19 @@
 
 #define AFS_VL_PORT		7003	/* volume location service port */
 #define VL_SERVICE		52	/* RxRPC service ID for the Volume Location service */
+#define YFS_VL_SERVICE		2503	/* Service ID for AuriStor upgraded VL service */
+#define YFS_VL_MAXCELLNAME	256  	/* Maximum length of a cell name in YFS protocol */
 
 enum AFSVL_Operations {
-	VLGETENTRYBYID		= 503,	/* AFS Get Cache Entry By ID operation ID */
-	VLGETENTRYBYNAME	= 504,	/* AFS Get Cache Entry By Name operation ID */
-	VLPROBE			= 514,	/* AFS Probe Volume Location Service operation ID */
+	VLGETENTRYBYID		= 503,	/* AFS Get VLDB entry by ID */
+	VLGETENTRYBYNAME	= 504,	/* AFS Get VLDB entry by name */
+	VLPROBE			= 514,	/* AFS probe VL service */
+	VLGETENTRYBYIDU		= 526,	/* AFS Get VLDB entry by ID (UUID-variant) */
+	VLGETENTRYBYNAMEU	= 527,	/* AFS Get VLDB entry by name (UUID-variant) */
+	VLGETADDRSU		= 533,	/* AFS Get addrs for fileserver */
+	YVLGETENDPOINTS		= 64002, /* YFS Get endpoints for file/volume server */
+	YVLGETCELLNAME		= 64014, /* YFS Get actual cell name */
+	VLGETCAPABILITIES	= 65537, /* AFS Get server capabilities */
 };
 
 enum AFSVL_Errors {
@@ -54,6 +58,19 @@ enum AFSVL_Errors {
 	AFSVL_NOMEM 		= 363547,	/* malloc/realloc failed to alloc enough memory */
 };
 
+enum {
+	YFS_SERVER_INDEX	= 0,
+	YFS_SERVER_UUID		= 1,
+	YFS_SERVER_ENDPOINT	= 2,
+};
+
+enum {
+	YFS_ENDPOINT_IPV4	= 0,
+	YFS_ENDPOINT_IPV6	= 1,
+};
+
+#define YFS_MAXENDPOINTS	16
+
 /*
  * maps to "struct vldbentry" in vvl-spec.pdf
  */
@@ -74,11 +91,48 @@ struct afs_vldbentry {
 		struct in_addr	addr;		/* server address */
 		unsigned	partition;	/* partition ID on this server */
 		unsigned	flags;		/* server specific flags */
-#define AFS_VLSF_NEWREPSITE	0x0001	/* unused */
+#define AFS_VLSF_NEWREPSITE	0x0001	/* Ignore all 'non-new' servers */
 #define AFS_VLSF_ROVOL		0x0002	/* this server holds a R/O instance of the volume */
 #define AFS_VLSF_RWVOL		0x0004	/* this server holds a R/W instance of the volume */
 #define AFS_VLSF_BACKVOL	0x0008	/* this server holds a backup instance of the volume */
+#define AFS_VLSF_UUID		0x0010	/* This server is referred to by its UUID */
+#define AFS_VLSF_DONTUSE	0x0020	/* This server ref should be ignored */
 	} servers[8];
 };
 
+#define AFS_VLDB_MAXNAMELEN 65
+
+
+struct afs_ListAddrByAttributes__xdr {
+	__be32			Mask;
+#define AFS_VLADDR_IPADDR	0x1	/* Match by ->ipaddr */
+#define AFS_VLADDR_INDEX	0x2	/* Match by ->index */
+#define AFS_VLADDR_UUID		0x4	/* Match by ->uuid */
+	__be32			ipaddr;
+	__be32			index;
+	__be32			spare;
+	struct afs_uuid__xdr	uuid;
+};
+
+struct afs_uvldbentry__xdr {
+	__be32			name[AFS_VLDB_MAXNAMELEN];
+	__be32			nServers;
+	struct afs_uuid__xdr	serverNumber[AFS_NMAXNSERVERS];
+	__be32			serverUnique[AFS_NMAXNSERVERS];
+	__be32			serverPartition[AFS_NMAXNSERVERS];
+	__be32			serverFlags[AFS_NMAXNSERVERS];
+	__be32			volumeId[AFS_MAXTYPES];
+	__be32			cloneId;
+	__be32			flags;
+	__be32			spares1;
+	__be32			spares2;
+	__be32			spares3;
+	__be32			spares4;
+	__be32			spares5;
+	__be32			spares6;
+	__be32			spares7;
+	__be32			spares8;
+	__be32			spares9;
+};
+
 #endif /* AFS_VL_H */
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
deleted file mode 100644
index 577763c3d88b..000000000000
--- a/fs/afs/cache.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/* AFS caching stuff
- *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include "internal.h"
-
-static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
-				       void *buffer, uint16_t buflen);
-static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
-				       void *buffer, uint16_t buflen);
-static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
-						      const void *buffer,
-						      uint16_t buflen);
-
-static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
-					    void *buffer, uint16_t buflen);
-static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
-					    void *buffer, uint16_t buflen);
-static enum fscache_checkaux afs_vlocation_cache_check_aux(
-	void *cookie_netfs_data, const void *buffer, uint16_t buflen);
-
-static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
-					 void *buffer, uint16_t buflen);
-
-static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
-					void *buffer, uint16_t buflen);
-static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
-				     uint64_t *size);
-static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
-					void *buffer, uint16_t buflen);
-static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
-						       const void *buffer,
-						       uint16_t buflen);
-static void afs_vnode_cache_now_uncached(void *cookie_netfs_data);
-
-struct fscache_netfs afs_cache_netfs = {
-	.name			= "afs",
-	.version		= 0,
-};
-
-struct fscache_cookie_def afs_cell_cache_index_def = {
-	.name		= "AFS.cell",
-	.type		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= afs_cell_cache_get_key,
-	.get_aux	= afs_cell_cache_get_aux,
-	.check_aux	= afs_cell_cache_check_aux,
-};
-
-struct fscache_cookie_def afs_vlocation_cache_index_def = {
-	.name			= "AFS.vldb",
-	.type			= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key		= afs_vlocation_cache_get_key,
-	.get_aux		= afs_vlocation_cache_get_aux,
-	.check_aux		= afs_vlocation_cache_check_aux,
-};
-
-struct fscache_cookie_def afs_volume_cache_index_def = {
-	.name		= "AFS.volume",
-	.type		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= afs_volume_cache_get_key,
-};
-
-struct fscache_cookie_def afs_vnode_cache_index_def = {
-	.name			= "AFS.vnode",
-	.type			= FSCACHE_COOKIE_TYPE_DATAFILE,
-	.get_key		= afs_vnode_cache_get_key,
-	.get_attr		= afs_vnode_cache_get_attr,
-	.get_aux		= afs_vnode_cache_get_aux,
-	.check_aux		= afs_vnode_cache_check_aux,
-	.now_uncached		= afs_vnode_cache_now_uncached,
-};
-
-/*
- * set the key for the index entry
- */
-static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
-				       void *buffer, uint16_t bufmax)
-{
-	const struct afs_cell *cell = cookie_netfs_data;
-	uint16_t klen;
-
-	_enter("%p,%p,%u", cell, buffer, bufmax);
-
-	klen = strlen(cell->name);
-	if (klen > bufmax)
-		return 0;
-
-	memcpy(buffer, cell->name, klen);
-	return klen;
-}
-
-/*
- * provide new auxiliary cache data
- */
-static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
-				       void *buffer, uint16_t bufmax)
-{
-	const struct afs_cell *cell = cookie_netfs_data;
-	uint16_t dlen;
-
-	_enter("%p,%p,%u", cell, buffer, bufmax);
-
-	dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]);
-	dlen = min(dlen, bufmax);
-	dlen &= ~(sizeof(cell->vl_addrs[0]) - 1);
-
-	memcpy(buffer, cell->vl_addrs, dlen);
-	return dlen;
-}
-
-/*
- * check that the auxiliary data indicates that the entry is still valid
- */
-static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
-						      const void *buffer,
-						      uint16_t buflen)
-{
-	_leave(" = OKAY");
-	return FSCACHE_CHECKAUX_OKAY;
-}
-
-/*****************************************************************************/
-/*
- * set the key for the index entry
- */
-static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
-					    void *buffer, uint16_t bufmax)
-{
-	const struct afs_vlocation *vlocation = cookie_netfs_data;
-	uint16_t klen;
-
-	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
-
-	klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name));
-	if (klen > bufmax)
-		return 0;
-
-	memcpy(buffer, vlocation->vldb.name, klen);
-
-	_leave(" = %u", klen);
-	return klen;
-}
-
-/*
- * provide new auxiliary cache data
- */
-static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
-					    void *buffer, uint16_t bufmax)
-{
-	const struct afs_vlocation *vlocation = cookie_netfs_data;
-	uint16_t dlen;
-
-	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
-
-	dlen = sizeof(struct afs_cache_vlocation);
-	dlen -= offsetof(struct afs_cache_vlocation, nservers);
-	if (dlen > bufmax)
-		return 0;
-
-	memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen);
-
-	_leave(" = %u", dlen);
-	return dlen;
-}
-
-/*
- * check that the auxiliary data indicates that the entry is still valid
- */
-static
-enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data,
-						    const void *buffer,
-						    uint16_t buflen)
-{
-	const struct afs_cache_vlocation *cvldb;
-	struct afs_vlocation *vlocation = cookie_netfs_data;
-	uint16_t dlen;
-
-	_enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen);
-
-	/* check the size of the data is what we're expecting */
-	dlen = sizeof(struct afs_cache_vlocation);
-	dlen -= offsetof(struct afs_cache_vlocation, nservers);
-	if (dlen != buflen)
-		return FSCACHE_CHECKAUX_OBSOLETE;
-
-	cvldb = container_of(buffer, struct afs_cache_vlocation, nservers);
-
-	/* if what's on disk is more valid than what's in memory, then use the
-	 * VL record from the cache */
-	if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) {
-		memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen);
-		vlocation->valid = 1;
-		_leave(" = SUCCESS [c->m]");
-		return FSCACHE_CHECKAUX_OKAY;
-	}
-
-	/* need to update the cache if the cached info differs */
-	if (memcmp(&vlocation->vldb, buffer, dlen) != 0) {
-		/* delete if the volume IDs for this name differ */
-		if (memcmp(&vlocation->vldb.vid, &cvldb->vid,
-			   sizeof(cvldb->vid)) != 0
-		    ) {
-			_leave(" = OBSOLETE");
-			return FSCACHE_CHECKAUX_OBSOLETE;
-		}
-
-		_leave(" = UPDATE");
-		return FSCACHE_CHECKAUX_NEEDS_UPDATE;
-	}
-
-	_leave(" = OKAY");
-	return FSCACHE_CHECKAUX_OKAY;
-}
-
-/*****************************************************************************/
-/*
- * set the key for the volume index entry
- */
-static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
-					void *buffer, uint16_t bufmax)
-{
-	const struct afs_volume *volume = cookie_netfs_data;
-	uint16_t klen;
-
-	_enter("{%u},%p,%u", volume->type, buffer, bufmax);
-
-	klen = sizeof(volume->type);
-	if (klen > bufmax)
-		return 0;
-
-	memcpy(buffer, &volume->type, sizeof(volume->type));
-
-	_leave(" = %u", klen);
-	return klen;
-
-}
-
-/*****************************************************************************/
-/*
- * set the key for the index entry
- */
-static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
-					void *buffer, uint16_t bufmax)
-{
-	const struct afs_vnode *vnode = cookie_netfs_data;
-	uint16_t klen;
-
-	_enter("{%x,%x,%llx},%p,%u",
-	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
-	       buffer, bufmax);
-
-	klen = sizeof(vnode->fid.vnode);
-	if (klen > bufmax)
-		return 0;
-
-	memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode));
-
-	_leave(" = %u", klen);
-	return klen;
-}
-
-/*
- * provide updated file attributes
- */
-static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
-				     uint64_t *size)
-{
-	const struct afs_vnode *vnode = cookie_netfs_data;
-
-	_enter("{%x,%x,%llx},",
-	       vnode->fid.vnode, vnode->fid.unique,
-	       vnode->status.data_version);
-
-	*size = vnode->status.size;
-}
-
-/*
- * provide new auxiliary cache data
- */
-static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
-					void *buffer, uint16_t bufmax)
-{
-	const struct afs_vnode *vnode = cookie_netfs_data;
-	uint16_t dlen;
-
-	_enter("{%x,%x,%Lx},%p,%u",
-	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
-	       buffer, bufmax);
-
-	dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
-	if (dlen > bufmax)
-		return 0;
-
-	memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique));
-	buffer += sizeof(vnode->fid.unique);
-	memcpy(buffer, &vnode->status.data_version,
-	       sizeof(vnode->status.data_version));
-
-	_leave(" = %u", dlen);
-	return dlen;
-}
-
-/*
- * check that the auxiliary data indicates that the entry is still valid
- */
-static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
-						       const void *buffer,
-						       uint16_t buflen)
-{
-	struct afs_vnode *vnode = cookie_netfs_data;
-	uint16_t dlen;
-
-	_enter("{%x,%x,%llx},%p,%u",
-	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
-	       buffer, buflen);
-
-	/* check the size of the data is what we're expecting */
-	dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
-	if (dlen != buflen) {
-		_leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen);
-		return FSCACHE_CHECKAUX_OBSOLETE;
-	}
-
-	if (memcmp(buffer,
-		   &vnode->fid.unique,
-		   sizeof(vnode->fid.unique)
-		   ) != 0) {
-		unsigned unique;
-
-		memcpy(&unique, buffer, sizeof(unique));
-
-		_leave(" = OBSOLETE [uniq %x != %x]",
-		       unique, vnode->fid.unique);
-		return FSCACHE_CHECKAUX_OBSOLETE;
-	}
-
-	if (memcmp(buffer + sizeof(vnode->fid.unique),
-		   &vnode->status.data_version,
-		   sizeof(vnode->status.data_version)
-		   ) != 0) {
-		afs_dataversion_t version;
-
-		memcpy(&version, buffer + sizeof(vnode->fid.unique),
-		       sizeof(version));
-
-		_leave(" = OBSOLETE [vers %llx != %llx]",
-		       version, vnode->status.data_version);
-		return FSCACHE_CHECKAUX_OBSOLETE;
-	}
-
-	_leave(" = SUCCESS");
-	return FSCACHE_CHECKAUX_OKAY;
-}
-
-/*
- * indication the cookie is no longer uncached
- * - this function is called when the backing store currently caching a cookie
- *   is removed
- * - the netfs should use this to clean up any markers indicating cached pages
- * - this is mandatory for any object that may have data
- */
-static void afs_vnode_cache_now_uncached(void *cookie_netfs_data)
-{
-	struct afs_vnode *vnode = cookie_netfs_data;
-	struct pagevec pvec;
-	pgoff_t first;
-	int loop, nr_pages;
-
-	_enter("{%x,%x,%Lx}",
-	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version);
-
-	pagevec_init(&pvec, 0);
-	first = 0;
-
-	for (;;) {
-		/* grab a bunch of pages to clean */
-		nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping,
-					  first,
-					  PAGEVEC_SIZE - pagevec_count(&pvec));
-		if (!nr_pages)
-			break;
-
-		for (loop = 0; loop < nr_pages; loop++)
-			ClearPageFsCache(pvec.pages[loop]);
-
-		first = pvec.pages[nr_pages - 1]->index + 1;
-
-		pvec.nr = nr_pages;
-		pagevec_release(&pvec);
-		cond_resched();
-	}
-
-	_leave("");
-}
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 7ef637d7f3a5..894d2bad6b6c 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -20,456 +20,246 @@
 #include <linux/sched.h>
 #include "internal.h"
 
-#if 0
-unsigned afs_vnode_update_timeout = 10;
-#endif  /*  0  */
-
-#define afs_breakring_space(server) \
-	CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail,	\
-		   ARRAY_SIZE((server)->cb_break))
-
-//static void afs_callback_updater(struct work_struct *);
-
-static struct workqueue_struct *afs_callback_update_worker;
-
 /*
- * allow the fileserver to request callback state (re-)initialisation
+ * Handle invalidation of an mmap'd file.  We invalidate all the PTEs referring
+ * to the pages in this file's pagecache, forcing the kernel to go through
+ * ->fault() or ->page_mkwrite() - at which point we can handle invalidation
+ * more fully.
  */
-void afs_init_callback_state(struct afs_server *server)
+void afs_invalidate_mmap_work(struct work_struct *work)
 {
-	struct afs_vnode *vnode;
+	struct afs_vnode *vnode = container_of(work, struct afs_vnode, cb_work);
+
+	unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false);
+}
 
-	_enter("{%p}", server);
+static void afs_volume_init_callback(struct afs_volume *volume)
+{
+	struct afs_vnode *vnode;
 
-	spin_lock(&server->cb_lock);
+	down_read(&volume->open_mmaps_lock);
 
-	/* kill all the promises on record from this server */
-	while (!RB_EMPTY_ROOT(&server->cb_promises)) {
-		vnode = rb_entry(server->cb_promises.rb_node,
-				 struct afs_vnode, cb_promise);
-		_debug("UNPROMISE { vid=%x:%u uq=%u}",
-		       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-		rb_erase(&vnode->cb_promise, &server->cb_promises);
-		vnode->cb_promised = false;
+	list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) {
+		if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) {
+			afs_clear_cb_promise(vnode, afs_cb_promise_clear_vol_init_cb);
+			queue_work(system_dfl_wq, &vnode->cb_work);
+		}
 	}
 
-	spin_unlock(&server->cb_lock);
-	_leave("");
+	up_read(&volume->open_mmaps_lock);
 }
 
 /*
- * handle the data invalidation side of a callback being broken
+ * Allow the fileserver to request callback state (re-)initialisation.
+ * Unfortunately, UUIDs are not guaranteed unique.
  */
-void afs_broken_callback_work(struct work_struct *work)
+void afs_init_callback_state(struct afs_server *server)
 {
-	struct afs_vnode *vnode =
-		container_of(work, struct afs_vnode, cb_broken_work);
-
-	_enter("");
-
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-		return;
+	struct afs_server_entry *se;
 
-	/* we're only interested in dealing with a broken callback on *this*
-	 * vnode and only if no-one else has dealt with it yet */
-	if (!mutex_trylock(&vnode->validate_lock))
-		return; /* someone else is dealing with it */
+	down_read(&server->cell->vs_lock);
 
-	if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
-		if (S_ISDIR(vnode->vfs_inode.i_mode))
-			afs_clear_permits(vnode);
-
-		if (afs_vnode_fetch_status(vnode, NULL, NULL) < 0)
-			goto out;
-
-		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-			goto out;
-
-		/* if the vnode's data version number changed then its contents
-		 * are different */
-		if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
-			afs_zap_data(vnode);
+	list_for_each_entry(se, &server->volumes, slink) {
+		se->cb_expires_at = AFS_NO_CB_PROMISE;
+		se->volume->cb_expires_at = AFS_NO_CB_PROMISE;
+		trace_afs_cb_v_break(se->volume->vid, atomic_read(&se->volume->cb_v_break),
+				     afs_cb_break_for_s_reinit);
+		if (!list_empty(&se->volume->open_mmaps))
+			afs_volume_init_callback(se->volume);
 	}
 
-out:
-	mutex_unlock(&vnode->validate_lock);
-
-	/* avoid the potential race whereby the mutex_trylock() in this
-	 * function happens again between the clear_bit() and the
-	 * mutex_unlock() */
-	if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
-		_debug("requeue");
-		queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
-	}
-	_leave("");
+	up_read(&server->cell->vs_lock);
 }
 
 /*
  * actually break a callback
  */
-static void afs_break_callback(struct afs_server *server,
-			       struct afs_vnode *vnode)
+void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
 {
 	_enter("");
 
-	set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-
-	if (vnode->cb_promised) {
-		spin_lock(&vnode->lock);
-
-		_debug("break callback");
-
-		spin_lock(&server->cb_lock);
-		if (vnode->cb_promised) {
-			rb_erase(&vnode->cb_promise, &server->cb_promises);
-			vnode->cb_promised = false;
-		}
-		spin_unlock(&server->cb_lock);
+	clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+	if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_cb_break)) {
+		vnode->cb_break++;
+		vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
+		afs_clear_permits(vnode);
 
-		queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
-		if (list_empty(&vnode->granted_locks) &&
-		    !list_empty(&vnode->pending_locks))
+		if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
 			afs_lock_may_be_available(vnode);
-		spin_unlock(&vnode->lock);
-	}
-}
 
-/*
- * allow the fileserver to explicitly break one callback
- * - happens when
- *   - the backing file is changed
- *   - a lock is released
- */
-static void afs_break_one_callback(struct afs_server *server,
-				   struct afs_fid *fid)
-{
-	struct afs_vnode *vnode;
-	struct rb_node *p;
-
-	_debug("find");
-	spin_lock(&server->fs_lock);
-	p = server->fs_vnodes.rb_node;
-	while (p) {
-		vnode = rb_entry(p, struct afs_vnode, server_rb);
-		if (fid->vid < vnode->fid.vid)
-			p = p->rb_left;
-		else if (fid->vid > vnode->fid.vid)
-			p = p->rb_right;
-		else if (fid->vnode < vnode->fid.vnode)
-			p = p->rb_left;
-		else if (fid->vnode > vnode->fid.vnode)
-			p = p->rb_right;
-		else if (fid->unique < vnode->fid.unique)
-			p = p->rb_left;
-		else if (fid->unique > vnode->fid.unique)
-			p = p->rb_right;
-		else
-			goto found;
-	}
+		if (reason != afs_cb_break_for_deleted &&
+		    vnode->status.type == AFS_FTYPE_FILE &&
+		    atomic_read(&vnode->cb_nr_mmap))
+			queue_work(system_dfl_wq, &vnode->cb_work);
 
-	/* not found so we just ignore it (it may have moved to another
-	 * server) */
-not_available:
-	_debug("not avail");
-	spin_unlock(&server->fs_lock);
-	_leave("");
-	return;
-
-found:
-	_debug("found");
-	ASSERTCMP(server, ==, vnode->server);
-
-	if (!igrab(AFS_VNODE_TO_I(vnode)))
-		goto not_available;
-	spin_unlock(&server->fs_lock);
-
-	afs_break_callback(server, vnode);
-	iput(&vnode->vfs_inode);
-	_leave("");
-}
-
-/*
- * allow the fileserver to break callback promises
- */
-void afs_break_callbacks(struct afs_server *server, size_t count,
-			 struct afs_callback callbacks[])
-{
-	_enter("%p,%zu,", server, count);
-
-	ASSERT(server != NULL);
-	ASSERTCMP(count, <=, AFSCBMAX);
-
-	for (; count > 0; callbacks++, count--) {
-		_debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
-		       callbacks->fid.vid,
-		       callbacks->fid.vnode,
-		       callbacks->fid.unique,
-		       callbacks->version,
-		       callbacks->expiry,
-		       callbacks->type
-		       );
-		afs_break_one_callback(server, &callbacks->fid);
+		trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true);
+	} else {
+		trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false);
 	}
-
-	_leave("");
-	return;
 }
 
-/*
- * record the callback for breaking
- * - the caller must hold server->cb_lock
- */
-static void afs_do_give_up_callback(struct afs_server *server,
-				    struct afs_vnode *vnode)
+void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
 {
-	struct afs_callback *cb;
-
-	_enter("%p,%p", server, vnode);
-
-	cb = &server->cb_break[server->cb_break_head];
-	cb->fid		= vnode->fid;
-	cb->version	= vnode->cb_version;
-	cb->expiry	= vnode->cb_expiry;
-	cb->type	= vnode->cb_type;
-	smp_wmb();
-	server->cb_break_head =
-		(server->cb_break_head + 1) &
-		(ARRAY_SIZE(server->cb_break) - 1);
-
-	/* defer the breaking of callbacks to try and collect as many as
-	 * possible to ship in one operation */
-	switch (atomic_inc_return(&server->cb_break_n)) {
-	case 1 ... AFSCBMAX - 1:
-		queue_delayed_work(afs_callback_update_worker,
-				   &server->cb_break_work, HZ * 2);
-		break;
-	case AFSCBMAX:
-		afs_flush_callback_breaks(server);
-		break;
-	default:
-		break;
-	}
-
-	ASSERT(server->cb_promises.rb_node != NULL);
-	rb_erase(&vnode->cb_promise, &server->cb_promises);
-	vnode->cb_promised = false;
-	_leave("");
+	write_seqlock(&vnode->cb_lock);
+	__afs_break_callback(vnode, reason);
+	write_sequnlock(&vnode->cb_lock);
 }
 
 /*
- * discard the callback on a deleted item
+ * Look up a volume by volume ID under RCU conditions.
  */
-void afs_discard_callback_on_delete(struct afs_vnode *vnode)
+static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell,
+						afs_volid_t vid)
 {
-	struct afs_server *server = vnode->server;
+	struct afs_volume *volume = NULL;
+	struct rb_node *p;
+	int seq = 1;
 
-	_enter("%d", vnode->cb_promised);
+	for (;;) {
+		/* Unfortunately, rbtree walking doesn't give reliable results
+		 * under just the RCU read lock, so we have to check for
+		 * changes.
+		 */
+		seq++; /* 2 on the 1st/lockless path, otherwise odd */
+		read_seqbegin_or_lock(&cell->volume_lock, &seq);
+
+		p = rcu_dereference_raw(cell->volumes.rb_node);
+		while (p) {
+			volume = rb_entry(p, struct afs_volume, cell_node);
+
+			if (volume->vid < vid)
+				p = rcu_dereference_raw(p->rb_left);
+			else if (volume->vid > vid)
+				p = rcu_dereference_raw(p->rb_right);
+			else
+				break;
+			volume = NULL;
+		}
 
-	if (!vnode->cb_promised) {
-		_leave(" [not promised]");
-		return;
+		if (volume && afs_try_get_volume(volume, afs_volume_trace_get_callback))
+			break;
+		if (!need_seqretry(&cell->volume_lock, seq))
+			break;
+		seq |= 1; /* Want a lock next time */
 	}
 
-	ASSERT(server != NULL);
-
-	spin_lock(&server->cb_lock);
-	if (vnode->cb_promised) {
-		ASSERT(server->cb_promises.rb_node != NULL);
-		rb_erase(&vnode->cb_promise, &server->cb_promises);
-		vnode->cb_promised = false;
-	}
-	spin_unlock(&server->cb_lock);
-	_leave("");
+	done_seqretry(&cell->volume_lock, seq);
+	return volume;
 }
 
 /*
- * give up the callback registered for a vnode on the file server when the
- * inode is being cleared
+ * Allow the fileserver to break callbacks at the volume-level.  This is
+ * typically done when, for example, a R/W volume is snapshotted to a R/O
+ * volume (the only way to change an R/O volume).  It may also, however, happen
+ * when a volserver takes control of a volume (offlining it, moving it, etc.).
+ *
+ * Every file in that volume will need to be reevaluated.
  */
-void afs_give_up_callback(struct afs_vnode *vnode)
+static void afs_break_volume_callback(struct afs_server *server,
+				      struct afs_volume *volume)
+	__releases(RCU)
 {
-	struct afs_server *server = vnode->server;
-
-	DECLARE_WAITQUEUE(myself, current);
-
-	_enter("%d", vnode->cb_promised);
-
-	_debug("GIVE UP INODE %p", &vnode->vfs_inode);
+	struct afs_server_list *slist = rcu_dereference(volume->servers);
+	unsigned int i, cb_v_break;
 
-	if (!vnode->cb_promised) {
-		_leave(" [not promised]");
-		return;
-	}
+	write_lock(&volume->cb_v_break_lock);
 
-	ASSERT(server != NULL);
+	for (i = 0; i < slist->nr_servers; i++)
+		if (slist->servers[i].server == server)
+			slist->servers[i].cb_expires_at = AFS_NO_CB_PROMISE;
+	volume->cb_expires_at = AFS_NO_CB_PROMISE;
 
-	spin_lock(&server->cb_lock);
-	if (vnode->cb_promised && afs_breakring_space(server) == 0) {
-		add_wait_queue(&server->cb_break_waitq, &myself);
-		for (;;) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			if (!vnode->cb_promised ||
-			    afs_breakring_space(server) != 0)
-				break;
-			spin_unlock(&server->cb_lock);
-			schedule();
-			spin_lock(&server->cb_lock);
-		}
-		remove_wait_queue(&server->cb_break_waitq, &myself);
-		__set_current_state(TASK_RUNNING);
-	}
+	cb_v_break = atomic_inc_return_release(&volume->cb_v_break);
+	trace_afs_cb_v_break(volume->vid, cb_v_break, afs_cb_break_for_volume_callback);
 
-	/* of course, it's always possible for the server to break this vnode's
-	 * callback first... */
-	if (vnode->cb_promised)
-		afs_do_give_up_callback(server, vnode);
+	write_unlock(&volume->cb_v_break_lock);
+	rcu_read_unlock();
 
-	spin_unlock(&server->cb_lock);
-	_leave("");
+	if (!list_empty(&volume->open_mmaps))
+		afs_volume_init_callback(volume);
 }
 
 /*
- * dispatch a deferred give up callbacks operation
+ * allow the fileserver to explicitly break one callback
+ * - happens when
+ *   - the backing file is changed
+ *   - a lock is released
  */
-void afs_dispatch_give_up_callbacks(struct work_struct *work)
+static void afs_break_one_callback(struct afs_server *server,
+				   struct afs_volume *volume,
+				   struct afs_fid *fid)
 {
-	struct afs_server *server =
-		container_of(work, struct afs_server, cb_break_work.work);
-
-	_enter("");
+	struct super_block *sb;
+	struct afs_vnode *vnode;
+	struct inode *inode;
 
-	/* tell the fileserver to discard the callback promises it has
-	 * - in the event of ENOMEM or some other error, we just forget that we
-	 *   had callbacks entirely, and the server will call us later to break
-	 *   them
+	/* See if we can find a matching inode - even an I_NEW inode needs to
+	 * be marked as it can have its callback broken before we finish
+	 * setting up the local inode.
 	 */
-	afs_fs_give_up_callbacks(server, &afs_async_call);
-}
+	sb = rcu_dereference(volume->sb);
+	if (!sb)
+		return;
 
-/*
- * flush the outstanding callback breaks on a server
- */
-void afs_flush_callback_breaks(struct afs_server *server)
-{
-	mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0);
+	inode = find_inode_rcu(sb, fid->vnode, afs_ilookup5_test_by_fid, fid);
+	if (inode) {
+		vnode = AFS_FS_I(inode);
+		afs_break_callback(vnode, afs_cb_break_for_callback);
+	} else {
+		trace_afs_cb_miss(fid, afs_cb_break_for_callback);
+	}
 }
 
-#if 0
-/*
- * update a bunch of callbacks
- */
-static void afs_callback_updater(struct work_struct *work)
+static void afs_break_some_callbacks(struct afs_server *server,
+				     struct afs_callback_break *cbb,
+				     size_t *_count)
 {
-	struct afs_server *server;
-	struct afs_vnode *vnode, *xvnode;
-	time_t now;
-	long timeout;
-	int ret;
-
-	server = container_of(work, struct afs_server, updater);
-
-	_enter("");
-
-	now = get_seconds();
+	struct afs_callback_break *residue = cbb;
+	struct afs_volume *volume;
+	afs_volid_t vid = cbb->fid.vid;
+	size_t i;
+
+	rcu_read_lock();
+	volume = afs_lookup_volume_rcu(server->cell, vid);
+	if (cbb->fid.vnode == 0 && cbb->fid.unique == 0) {
+		afs_break_volume_callback(server, volume);
+		*_count -= 1;
+		if (*_count)
+			memmove(cbb, cbb + 1, sizeof(*cbb) * *_count);
+	} else {
+		/* TODO: Find all matching volumes if we couldn't match the server and
+		 * break them anyway.
+		 */
 
-	/* find the first vnode to update */
-	spin_lock(&server->cb_lock);
-	for (;;) {
-		if (RB_EMPTY_ROOT(&server->cb_promises)) {
-			spin_unlock(&server->cb_lock);
-			_leave(" [nothing]");
-			return;
+		for (i = *_count; i > 0; cbb++, i--) {
+			if (cbb->fid.vid == vid) {
+				_debug("- Fid { vl=%08llx n=%llu u=%u }",
+				       cbb->fid.vid,
+				       cbb->fid.vnode,
+				       cbb->fid.unique);
+				--*_count;
+				if (volume)
+					afs_break_one_callback(server, volume, &cbb->fid);
+			} else {
+				*residue++ = *cbb;
+			}
 		}
-
-		vnode = rb_entry(rb_first(&server->cb_promises),
-				 struct afs_vnode, cb_promise);
-		if (atomic_read(&vnode->usage) > 0)
-			break;
-		rb_erase(&vnode->cb_promise, &server->cb_promises);
-		vnode->cb_promised = false;
-	}
-
-	timeout = vnode->update_at - now;
-	if (timeout > 0) {
-		queue_delayed_work(afs_vnode_update_worker,
-				   &afs_vnode_update, timeout * HZ);
-		spin_unlock(&server->cb_lock);
-		_leave(" [nothing]");
-		return;
-	}
-
-	list_del_init(&vnode->update);
-	atomic_inc(&vnode->usage);
-	spin_unlock(&server->cb_lock);
-
-	/* we can now perform the update */
-	_debug("update %s", vnode->vldb.name);
-	vnode->state = AFS_VL_UPDATING;
-	vnode->upd_rej_cnt = 0;
-	vnode->upd_busy_cnt = 0;
-
-	ret = afs_vnode_update_record(vl, &vldb);
-	switch (ret) {
-	case 0:
-		afs_vnode_apply_update(vl, &vldb);
-		vnode->state = AFS_VL_UPDATING;
-		break;
-	case -ENOMEDIUM:
-		vnode->state = AFS_VL_VOLUME_DELETED;
-		break;
-	default:
-		vnode->state = AFS_VL_UNCERTAIN;
-		break;
+		rcu_read_unlock();
 	}
 
-	/* and then reschedule */
-	_debug("reschedule");
-	vnode->update_at = get_seconds() + afs_vnode_update_timeout;
-
-	spin_lock(&server->cb_lock);
-
-	if (!list_empty(&server->cb_promises)) {
-		/* next update in 10 minutes, but wait at least 1 second more
-		 * than the newest record already queued so that we don't spam
-		 * the VL server suddenly with lots of requests
-		 */
-		xvnode = list_entry(server->cb_promises.prev,
-				    struct afs_vnode, update);
-		if (vnode->update_at <= xvnode->update_at)
-			vnode->update_at = xvnode->update_at + 1;
-		xvnode = list_entry(server->cb_promises.next,
-				    struct afs_vnode, update);
-		timeout = xvnode->update_at - now;
-		if (timeout < 0)
-			timeout = 0;
-	} else {
-		timeout = afs_vnode_update_timeout;
-	}
-
-	list_add_tail(&vnode->update, &server->cb_promises);
-
-	_debug("timeout %ld", timeout);
-	queue_delayed_work(afs_vnode_update_worker,
-			   &afs_vnode_update, timeout * HZ);
-	spin_unlock(&server->cb_lock);
-	afs_put_vnode(vl);
+	afs_put_volume(volume, afs_volume_trace_put_callback);
 }
-#endif
 
 /*
- * initialise the callback update process
+ * allow the fileserver to break callback promises
  */
-int __init afs_callback_update_init(void)
+void afs_break_callbacks(struct afs_server *server, size_t count,
+			 struct afs_callback_break *callbacks)
 {
-	afs_callback_update_worker =
-		create_singlethread_workqueue("kafs_callbackd");
-	return afs_callback_update_worker ? 0 : -ENOMEM;
-}
+	_enter("%p,%zu,", server, count);
 
-/*
- * shut down the callback update process
- */
-void afs_callback_update_kill(void)
-{
-	destroy_workqueue(afs_callback_update_worker);
+	ASSERT(server != NULL);
+
+	while (count > 0)
+		afs_break_some_callbacks(server, callbacks, &count);
 }
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 3c090b7555ea..71c10a05cebe 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -1,221 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS cell and server record management
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/key.h>
 #include <linux/ctype.h>
 #include <linux/dns_resolver.h>
 #include <linux/sched.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
 #include <keys/rxrpc-type.h>
 #include "internal.h"
 
-DECLARE_RWSEM(afs_proc_cells_sem);
-LIST_HEAD(afs_proc_cells);
+static unsigned __read_mostly afs_cell_gc_delay = 10;
+static unsigned __read_mostly afs_cell_min_ttl = 10 * 60;
+static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60;
+static atomic_t cell_debug_id;
+
+static void afs_cell_timer(struct timer_list *timer);
+static void afs_destroy_cell_work(struct work_struct *work);
+static void afs_manage_cell_work(struct work_struct *work);
+
+static void afs_dec_cells_outstanding(struct afs_net *net)
+{
+	if (atomic_dec_and_test(&net->cells_outstanding))
+		wake_up_var(&net->cells_outstanding);
+}
 
-static LIST_HEAD(afs_cells);
-static DEFINE_RWLOCK(afs_cells_lock);
-static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
-static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
-static struct afs_cell *afs_cell_root;
+static void afs_set_cell_state(struct afs_cell *cell, enum afs_cell_state state)
+{
+	smp_store_release(&cell->state, state); /* Commit cell changes before state */
+	smp_wmb(); /* Set cell state before task state */
+	wake_up_var(&cell->state);
+}
 
 /*
- * allocate a cell record and fill in its name, VL server address list and
- * allocate an anonymous key
+ * Look up and get an activation reference on a cell record.  The caller must
+ * hold net->cells_lock at least read-locked.
  */
-static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen,
-				       char *vllist)
+static struct afs_cell *afs_find_cell_locked(struct afs_net *net,
+					     const char *name, unsigned int namesz,
+					     enum afs_cell_trace reason)
+{
+	struct afs_cell *cell = NULL;
+	struct rb_node *p;
+	int n;
+
+	_enter("%*.*s", namesz, namesz, name);
+
+	if (name && namesz == 0)
+		return ERR_PTR(-EINVAL);
+	if (namesz > AFS_MAXCELLNAME)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	if (!name) {
+		cell = rcu_dereference_protected(net->ws_cell,
+						 lockdep_is_held(&net->cells_lock));
+		if (!cell)
+			return ERR_PTR(-EDESTADDRREQ);
+		goto found;
+	}
+
+	p = net->cells.rb_node;
+	while (p) {
+		cell = rb_entry(p, struct afs_cell, net_node);
+
+		n = strncasecmp(cell->name, name,
+				min_t(size_t, cell->name_len, namesz));
+		if (n == 0)
+			n = cell->name_len - namesz;
+		if (n < 0)
+			p = p->rb_left;
+		else if (n > 0)
+			p = p->rb_right;
+		else
+			goto found;
+	}
+
+	return ERR_PTR(-ENOENT);
+
+found:
+	return afs_use_cell(cell, reason);
+}
+
+/*
+ * Look up and get an activation reference on a cell record.
+ */
+struct afs_cell *afs_find_cell(struct afs_net *net,
+			       const char *name, unsigned int namesz,
+			       enum afs_cell_trace reason)
 {
 	struct afs_cell *cell;
-	struct key *key;
-	char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
-	char  *dvllist = NULL, *_vllist = NULL;
-	char  delimiter = ':';
-	int ret;
 
-	_enter("%*.*s,%s", namelen, namelen, name ?: "", vllist);
+	down_read(&net->cells_lock);
+	cell = afs_find_cell_locked(net, name, namesz, reason);
+	up_read(&net->cells_lock);
+	return cell;
+}
 
-	BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
+/*
+ * Set up a cell record and fill in its name, VL server address list and
+ * allocate an anonymous key
+ */
+static struct afs_cell *afs_alloc_cell(struct afs_net *net,
+				       const char *name, unsigned int namelen,
+				       const char *addresses)
+{
+	struct afs_vlserver_list *vllist = NULL;
+	struct afs_cell *cell;
+	int i, ret;
 
+	ASSERT(name);
+	if (namelen == 0)
+		return ERR_PTR(-EINVAL);
 	if (namelen > AFS_MAXCELLNAME) {
 		_leave(" = -ENAMETOOLONG");
 		return ERR_PTR(-ENAMETOOLONG);
 	}
 
-	/* allocate and initialise a cell record */
-	cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL);
+	/* Prohibit cell names that contain unprintable chars, '/' and '@' or
+	 * that begin with a dot.  This also precludes "@cell".
+	 */
+	if (name[0] == '.')
+		return ERR_PTR(-EINVAL);
+	for (i = 0; i < namelen; i++) {
+		char ch = name[i];
+		if (!isprint(ch) || ch == '/' || ch == '@')
+			return ERR_PTR(-EINVAL);
+	}
+
+	_enter("%*.*s,%s", namelen, namelen, name, addresses);
+
+	cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
 	if (!cell) {
 		_leave(" = -ENOMEM");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	memcpy(cell->name, name, namelen);
-	cell->name[namelen] = 0;
-
-	atomic_set(&cell->usage, 1);
-	INIT_LIST_HEAD(&cell->link);
-	rwlock_init(&cell->servers_lock);
-	INIT_LIST_HEAD(&cell->servers);
-	init_rwsem(&cell->vl_sem);
-	INIT_LIST_HEAD(&cell->vl_list);
-	spin_lock_init(&cell->vl_lock);
-
-	/* if the ip address is invalid, try dns query */
-	if (!vllist || strlen(vllist) < 7) {
-		ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL);
-		if (ret < 0) {
-			if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY)
-				/* translate these errors into something
-				 * userspace might understand */
-				ret = -EDESTADDRREQ;
-			_leave(" = %d", ret);
-			return ERR_PTR(ret);
-		}
-		_vllist = dvllist;
+	/* Allocate the cell name and the key name in one go. */
+	cell->name = kmalloc(1 + namelen + 1 +
+			     4 + namelen + 1, GFP_KERNEL);
+	if (!cell->name) {
+		kfree(cell);
+		return ERR_PTR(-ENOMEM);
+	}
 
-		/* change the delimiter for user-space reply */
-		delimiter = ',';
+	cell->name[0] = '.';
+	cell->name++;
+	cell->name_len = namelen;
+	for (i = 0; i < namelen; i++)
+		cell->name[i] = tolower(name[i]);
+	cell->name[i++] = 0;
+
+	cell->key_desc = cell->name + i;
+	memcpy(cell->key_desc, "afs@", 4);
+	memcpy(cell->key_desc + 4, cell->name, cell->name_len + 1);
+
+	cell->net = net;
+	refcount_set(&cell->ref, 1);
+	atomic_set(&cell->active, 0);
+	INIT_WORK(&cell->destroyer, afs_destroy_cell_work);
+	INIT_WORK(&cell->manager, afs_manage_cell_work);
+	timer_setup(&cell->management_timer, afs_cell_timer, 0);
+	init_rwsem(&cell->vs_lock);
+	cell->volumes = RB_ROOT;
+	INIT_HLIST_HEAD(&cell->proc_volumes);
+	seqlock_init(&cell->volume_lock);
+	cell->fs_servers = RB_ROOT;
+	init_rwsem(&cell->fs_lock);
+	rwlock_init(&cell->vl_servers_lock);
+	cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);
+
+	/* Provide a VL server list, filling it in if we were given a list of
+	 * addresses to use.
+	 */
+	if (addresses) {
+		vllist = afs_parse_text_addrs(net,
+					      addresses, strlen(addresses), ':',
+					      VL_SERVICE, AFS_VL_PORT);
+		if (IS_ERR(vllist)) {
+			ret = PTR_ERR(vllist);
+			vllist = NULL;
+			goto parse_failed;
+		}
 
+		vllist->source = DNS_RECORD_FROM_CONFIG;
+		vllist->status = DNS_LOOKUP_NOT_DONE;
+		cell->dns_expiry = TIME64_MAX;
 	} else {
-		_vllist = vllist;
+		ret = -ENOMEM;
+		vllist = afs_alloc_vlserver_list(0);
+		if (!vllist)
+			goto error;
+		vllist->source = DNS_RECORD_UNAVAILABLE;
+		vllist->status = DNS_LOOKUP_NOT_DONE;
+		cell->dns_expiry = ktime_get_real_seconds();
 	}
 
-	/* fill in the VL server list from the rest of the string */
-	do {
-		unsigned a, b, c, d;
-
-		next = strchr(_vllist, delimiter);
-		if (next)
-			*next++ = 0;
-
-		if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
-			goto bad_address;
-
-		if (a > 255 || b > 255 || c > 255 || d > 255)
-			goto bad_address;
-
-		cell->vl_addrs[cell->vl_naddrs++].s_addr =
-			htonl((a << 24) | (b << 16) | (c << 8) | d);
+	rcu_assign_pointer(cell->vl_servers, vllist);
 
-	} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next));
-
-	/* create a key to represent an anonymous user */
-	memcpy(keyname, "afs@", 4);
-	dp = keyname + 4;
-	cp = cell->name;
-	do {
-		*dp++ = toupper(*cp);
-	} while (*cp++);
-
-	key = rxrpc_get_null_key(keyname);
-	if (IS_ERR(key)) {
-		_debug("no key");
-		ret = PTR_ERR(key);
+	cell->dns_source = vllist->source;
+	cell->dns_status = vllist->status;
+	smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */
+	atomic_inc(&net->cells_outstanding);
+	ret = idr_alloc_cyclic(&net->cells_dyn_ino, cell,
+			       2, INT_MAX / 2, GFP_KERNEL);
+	if (ret < 0)
 		goto error;
-	}
-	cell->anonymous_key = key;
+	cell->dynroot_ino = ret;
+	cell->debug_id = atomic_inc_return(&cell_debug_id);
 
-	_debug("anon key %p{%x}",
-	       cell->anonymous_key, key_serial(cell->anonymous_key));
+	trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc);
 
 	_leave(" = %p", cell);
 	return cell;
 
-bad_address:
-	printk(KERN_ERR "kAFS: bad VL server IP address\n");
-	ret = -EINVAL;
+parse_failed:
+	if (ret == -EINVAL)
+		printk(KERN_ERR "kAFS: bad VL server IP address\n");
 error:
-	key_put(cell->anonymous_key);
-	kfree(dvllist);
+	afs_put_vlserverlist(cell->net, vllist);
+	kfree(cell->name - 1);
 	kfree(cell);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
 
 /*
- * afs_cell_crate() - create a cell record
- * @name:	is the name of the cell.
- * @namsesz:	is the strlen of the cell name.
- * @vllist:	is a colon separated list of IP addresses in "a.b.c.d" format.
- * @retref:	is T to return the cell reference when the cell exists.
+ * afs_lookup_cell - Look up or create a cell record.
+ * @net:	The network namespace
+ * @name:	The name of the cell.
+ * @namesz:	The strlen of the cell name.
+ * @vllist:	A colon/comma separated list of numeric IP addresses or NULL.
+ * @reason:	The reason we're doing the lookup
+ * @trace:	The reason to be logged if the lookup is successful.
+ *
+ * Look up a cell record by name and query the DNS for VL server addresses if
+ * needed.  Note that that actual DNS query is punted off to the manager thread
+ * so that this function can return immediately if interrupted whilst allowing
+ * cell records to be shared even if not yet fully constructed.
  */
-struct afs_cell *afs_cell_create(const char *name, unsigned namesz,
-				 char *vllist, bool retref)
+struct afs_cell *afs_lookup_cell(struct afs_net *net,
+				 const char *name, unsigned int namesz,
+				 const char *vllist,
+				 enum afs_lookup_cell_for reason,
+				 enum afs_cell_trace trace)
 {
-	struct afs_cell *cell;
-	int ret;
-
-	_enter("%*.*s,%s", namesz, namesz, name ?: "", vllist);
-
-	down_write(&afs_cells_sem);
-	read_lock(&afs_cells_lock);
-	list_for_each_entry(cell, &afs_cells, link) {
-		if (strncasecmp(cell->name, name, namesz) == 0)
-			goto duplicate_name;
+	struct afs_cell *cell, *candidate, *cursor;
+	struct rb_node *parent, **pp;
+	enum afs_cell_state state;
+	int ret, n;
+
+	_enter("%s,%s,%u", name, vllist, reason);
+
+	if (reason != AFS_LOOKUP_CELL_PRELOAD) {
+		cell = afs_find_cell(net, name, namesz, trace);
+		if (!IS_ERR(cell)) {
+			if (reason == AFS_LOOKUP_CELL_DYNROOT)
+				goto no_wait;
+			if (cell->state == AFS_CELL_SETTING_UP ||
+			    cell->state == AFS_CELL_UNLOOKED)
+				goto lookup_cell;
+			goto wait_for_cell;
+		}
 	}
-	read_unlock(&afs_cells_lock);
 
-	cell = afs_cell_alloc(name, namesz, vllist);
-	if (IS_ERR(cell)) {
-		_leave(" = %ld", PTR_ERR(cell));
-		up_write(&afs_cells_sem);
-		return cell;
+	/* Assume we're probably going to create a cell and preallocate and
+	 * mostly set up a candidate record.  We can then use this to stash the
+	 * name, the net namespace and VL server addresses.
+	 *
+	 * We also want to do this before we hold any locks as it may involve
+	 * upcalling to userspace to make DNS queries.
+	 */
+	candidate = afs_alloc_cell(net, name, namesz, vllist);
+	if (IS_ERR(candidate)) {
+		_leave(" = %ld", PTR_ERR(candidate));
+		return candidate;
 	}
 
-	/* add a proc directory for this cell */
-	ret = afs_proc_cell_setup(cell);
-	if (ret < 0)
-		goto error;
+	/* Find the insertion point and check to see if someone else added a
+	 * cell whilst we were allocating.
+	 */
+	down_write(&net->cells_lock);
+
+	pp = &net->cells.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		cursor = rb_entry(parent, struct afs_cell, net_node);
+
+		n = strncasecmp(cursor->name, name,
+				min_t(size_t, cursor->name_len, namesz));
+		if (n == 0)
+			n = cursor->name_len - namesz;
+		if (n < 0)
+			pp = &(*pp)->rb_left;
+		else if (n > 0)
+			pp = &(*pp)->rb_right;
+		else
+			goto cell_already_exists;
+	}
 
-#ifdef CONFIG_AFS_FSCACHE
-	/* put it up for caching (this never returns an error) */
-	cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
-					     &afs_cell_cache_index_def,
-					     cell);
-#endif
+	cell = candidate;
+	candidate = NULL;
+	afs_use_cell(cell, trace);
+	rb_link_node_rcu(&cell->net_node, parent, pp);
+	rb_insert_color(&cell->net_node, &net->cells);
+	up_write(&net->cells_lock);
+
+lookup_cell:
+	if (reason != AFS_LOOKUP_CELL_PRELOAD &&
+	    reason != AFS_LOOKUP_CELL_ROOTCELL) {
+		set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
+		afs_queue_cell(cell, afs_cell_trace_queue_new);
+	}
 
-	/* add to the cell lists */
-	write_lock(&afs_cells_lock);
-	list_add_tail(&cell->link, &afs_cells);
-	write_unlock(&afs_cells_lock);
+wait_for_cell:
+	state = smp_load_acquire(&cell->state); /* vs error */
+	switch (state) {
+	case AFS_CELL_ACTIVE:
+	case AFS_CELL_DEAD:
+		break;
+	case AFS_CELL_UNLOOKED:
+	default:
+		if (reason == AFS_LOOKUP_CELL_PRELOAD ||
+		    reason == AFS_LOOKUP_CELL_ROOTCELL)
+			break;
+		_debug("wait_for_cell");
+		afs_see_cell(cell, afs_cell_trace_wait);
+		wait_var_event(&cell->state,
+			       ({
+				       state = smp_load_acquire(&cell->state); /* vs error */
+				       state == AFS_CELL_ACTIVE || state == AFS_CELL_DEAD;
+			       }));
+		_debug("waited_for_cell %d %d", cell->state, cell->error);
+	}
 
-	down_write(&afs_proc_cells_sem);
-	list_add_tail(&cell->proc_link, &afs_proc_cells);
-	up_write(&afs_proc_cells_sem);
-	up_write(&afs_cells_sem);
+no_wait:
+	/* Check the state obtained from the wait check. */
+	state = smp_load_acquire(&cell->state); /* vs error */
+	if (state == AFS_CELL_DEAD) {
+		ret = cell->error;
+		goto error;
+	}
+	if (state == AFS_CELL_ACTIVE) {
+		switch (cell->dns_status) {
+		case DNS_LOOKUP_NOT_DONE:
+			if (cell->dns_source == DNS_RECORD_FROM_CONFIG) {
+				ret = 0;
+				break;
+			}
+			fallthrough;
+		default:
+			ret = -EIO;
+			goto error;
+		case DNS_LOOKUP_GOOD:
+		case DNS_LOOKUP_GOOD_WITH_BAD:
+			ret = 0;
+			break;
+		case DNS_LOOKUP_GOT_NOT_FOUND:
+			ret = -ENOENT;
+			goto error;
+		case DNS_LOOKUP_BAD:
+			ret = -EREMOTEIO;
+			goto error;
+		case DNS_LOOKUP_GOT_LOCAL_FAILURE:
+		case DNS_LOOKUP_GOT_TEMP_FAILURE:
+		case DNS_LOOKUP_GOT_NS_FAILURE:
+			ret = -EDESTADDRREQ;
+			goto error;
+		}
+	}
 
-	_leave(" = %p", cell);
+	_leave(" = %p [cell]", cell);
 	return cell;
 
+cell_already_exists:
+	_debug("cell exists");
+	cell = cursor;
+	if (reason == AFS_LOOKUP_CELL_PRELOAD) {
+		ret = -EEXIST;
+	} else {
+		afs_use_cell(cursor, trace);
+		ret = 0;
+	}
+	up_write(&net->cells_lock);
+	if (candidate)
+		afs_put_cell(candidate, afs_cell_trace_put_candidate);
+	if (ret == 0)
+		goto wait_for_cell;
+	goto error_noput;
 error:
-	up_write(&afs_cells_sem);
-	key_put(cell->anonymous_key);
-	kfree(cell);
-	_leave(" = %d", ret);
+	afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_error);
+error_noput:
+	_leave(" = %d [error]", ret);
 	return ERR_PTR(ret);
-
-duplicate_name:
-	if (retref && !IS_ERR(cell))
-		afs_get_cell(cell);
-
-	read_unlock(&afs_cells_lock);
-	up_write(&afs_cells_sem);
-
-	if (retref) {
-		_leave(" = %p", cell);
-		return cell;
-	}
-
-	_leave(" = -EEXIST");
-	return ERR_PTR(-EEXIST);
 }
 
 /*
@@ -223,10 +405,11 @@ duplicate_name:
  * - can be called with a module parameter string
  * - can be called from a write to /proc/fs/afs/rootcell
  */
-int afs_cell_init(char *rootcell)
+int afs_cell_init(struct afs_net *net, const char *rootcell)
 {
 	struct afs_cell *old_root, *new_root;
-	char *cp;
+	const char *cp, *vllist;
+	size_t len;
 
 	_enter("");
 
@@ -239,222 +422,503 @@ int afs_cell_init(char *rootcell)
 	}
 
 	cp = strchr(rootcell, ':');
-	if (!cp)
+	if (!cp) {
 		_debug("kAFS: no VL server IP addresses specified");
-	else
-		*cp++ = 0;
+		vllist = NULL;
+		len = strlen(rootcell);
+	} else {
+		vllist = cp + 1;
+		len = cp - rootcell;
+	}
 
-	/* allocate a cell record for the root cell */
-	new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false);
+	if (len == 0 || !rootcell[0] || rootcell[0] == '.' || rootcell[len - 1] == '.')
+		return -EINVAL;
+	if (memchr(rootcell, '/', len))
+		return -EINVAL;
+	cp = strstr(rootcell, "..");
+	if (cp && cp < rootcell + len)
+		return -EINVAL;
+
+	/* allocate a cell record for the root/workstation cell */
+	new_root = afs_lookup_cell(net, rootcell, len, vllist,
+				   AFS_LOOKUP_CELL_ROOTCELL,
+				   afs_cell_trace_use_lookup_ws);
 	if (IS_ERR(new_root)) {
 		_leave(" = %ld", PTR_ERR(new_root));
 		return PTR_ERR(new_root);
 	}
 
+	if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags))
+		afs_use_cell(new_root, afs_cell_trace_use_pin);
+
 	/* install the new cell */
-	write_lock(&afs_cells_lock);
-	old_root = afs_cell_root;
-	afs_cell_root = new_root;
-	write_unlock(&afs_cells_lock);
-	afs_put_cell(old_root);
+	down_write(&net->cells_lock);
+	old_root = rcu_replace_pointer(net->ws_cell, new_root,
+				       lockdep_is_held(&net->cells_lock));
+	up_write(&net->cells_lock);
 
+	afs_unuse_cell(old_root, afs_cell_trace_unuse_ws);
 	_leave(" = 0");
 	return 0;
 }
 
 /*
- * lookup a cell record
+ * Update a cell's VL server address list from the DNS.
  */
-struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz,
-				 bool dns_cell)
+static int afs_update_cell(struct afs_cell *cell)
 {
-	struct afs_cell *cell;
-
-	_enter("\"%*.*s\",", namesz, namesz, name ?: "");
-
-	down_read(&afs_cells_sem);
-	read_lock(&afs_cells_lock);
+	struct afs_vlserver_list *vllist, *old = NULL, *p;
+	unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl);
+	unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl);
+	time64_t now, expiry = 0;
+	int ret = 0;
+
+	_enter("%s", cell->name);
+
+	vllist = afs_dns_query(cell, &expiry);
+	if (IS_ERR(vllist)) {
+		ret = PTR_ERR(vllist);
+
+		_debug("%s: fail %d", cell->name, ret);
+		if (ret == -ENOMEM)
+			goto out_wake;
+
+		vllist = afs_alloc_vlserver_list(0);
+		if (!vllist) {
+			if (ret >= 0)
+				ret = -ENOMEM;
+			goto out_wake;
+		}
 
-	if (name) {
-		/* if the cell was named, look for it in the cell record list */
-		list_for_each_entry(cell, &afs_cells, link) {
-			if (strncmp(cell->name, name, namesz) == 0) {
-				afs_get_cell(cell);
-				goto found;
-			}
+		switch (ret) {
+		case -ENODATA:
+		case -EDESTADDRREQ:
+			vllist->status = DNS_LOOKUP_GOT_NOT_FOUND;
+			break;
+		case -EAGAIN:
+		case -ECONNREFUSED:
+			vllist->status = DNS_LOOKUP_GOT_TEMP_FAILURE;
+			break;
+		default:
+			vllist->status = DNS_LOOKUP_GOT_LOCAL_FAILURE;
+			break;
 		}
-		cell = ERR_PTR(-ENOENT);
-		if (dns_cell)
-			goto create_cell;
-	found:
-		;
-	} else {
-		cell = afs_cell_root;
-		if (!cell) {
-			/* this should not happen unless user tries to mount
-			 * when root cell is not set. Return an impossibly
-			 * bizarre errno to alert the user. Things like
-			 * ENOENT might be "more appropriate" but they happen
-			 * for other reasons.
+	}
+
+	_debug("%s: got list %d %d", cell->name, vllist->source, vllist->status);
+	cell->dns_status = vllist->status;
+
+	now = ktime_get_real_seconds();
+	if (min_ttl > max_ttl)
+		max_ttl = min_ttl;
+	if (expiry < now + min_ttl)
+		expiry = now + min_ttl;
+	else if (expiry > now + max_ttl)
+		expiry = now + max_ttl;
+
+	_debug("%s: status %d", cell->name, vllist->status);
+	if (vllist->source == DNS_RECORD_UNAVAILABLE) {
+		switch (vllist->status) {
+		case DNS_LOOKUP_GOT_NOT_FOUND:
+			/* The DNS said that the cell does not exist or there
+			 * weren't any addresses to be had.
 			 */
-			cell = ERR_PTR(-EDESTADDRREQ);
-		} else {
-			afs_get_cell(cell);
+			cell->dns_expiry = expiry;
+			break;
+
+		case DNS_LOOKUP_BAD:
+		case DNS_LOOKUP_GOT_LOCAL_FAILURE:
+		case DNS_LOOKUP_GOT_TEMP_FAILURE:
+		case DNS_LOOKUP_GOT_NS_FAILURE:
+		default:
+			cell->dns_expiry = now + 10;
+			break;
 		}
+	} else {
+		cell->dns_expiry = expiry;
+	}
 
+	/* Replace the VL server list if the new record has servers or the old
+	 * record doesn't.
+	 */
+	write_lock(&cell->vl_servers_lock);
+	p = rcu_dereference_protected(cell->vl_servers, true);
+	if (vllist->nr_servers > 0 || p->nr_servers == 0) {
+		rcu_assign_pointer(cell->vl_servers, vllist);
+		cell->dns_source = vllist->source;
+		old = p;
 	}
+	write_unlock(&cell->vl_servers_lock);
+	afs_put_vlserverlist(cell->net, old);
 
-	read_unlock(&afs_cells_lock);
-	up_read(&afs_cells_sem);
-	_leave(" = %p", cell);
-	return cell;
+out_wake:
+	smp_store_release(&cell->dns_lookup_count,
+			  cell->dns_lookup_count + 1); /* vs source/status */
+	wake_up_var(&cell->dns_lookup_count);
+	_leave(" = %d", ret);
+	return ret;
+}
 
-create_cell:
-	read_unlock(&afs_cells_lock);
-	up_read(&afs_cells_sem);
+/*
+ * Destroy a cell record
+ */
+static void afs_cell_destroy(struct rcu_head *rcu)
+{
+	struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu);
+	struct afs_net *net = cell->net;
+	int r;
 
-	cell = afs_cell_create(name, namesz, NULL, true);
+	_enter("%p{%s}", cell, cell->name);
 
-	_leave(" = %p", cell);
+	r = refcount_read(&cell->ref);
+	ASSERTCMP(r, ==, 0);
+	trace_afs_cell(cell->debug_id, r, atomic_read(&cell->active), afs_cell_trace_free);
+
+	afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers));
+	afs_unuse_cell(cell->alias_of, afs_cell_trace_unuse_alias);
+	key_put(cell->anonymous_key);
+	idr_remove(&net->cells_dyn_ino, cell->dynroot_ino);
+	kfree(cell->name - 1);
+	kfree(cell);
+
+	afs_dec_cells_outstanding(net);
+	_leave(" [destroyed]");
+}
+
+static void afs_destroy_cell_work(struct work_struct *work)
+{
+	struct afs_cell *cell = container_of(work, struct afs_cell, destroyer);
+
+	afs_see_cell(cell, afs_cell_trace_destroy);
+	timer_delete_sync(&cell->management_timer);
+	cancel_work_sync(&cell->manager);
+	call_rcu(&cell->rcu, afs_cell_destroy);
+}
+
+/*
+ * Get a reference on a cell record.
+ */
+struct afs_cell *afs_get_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+	int r;
+
+	__refcount_inc(&cell->ref, &r);
+	trace_afs_cell(cell->debug_id, r + 1, atomic_read(&cell->active), reason);
 	return cell;
 }
 
-#if 0
 /*
- * try and get a cell record
+ * Drop a reference on a cell record.
  */
-struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell)
+void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason)
 {
-	write_lock(&afs_cells_lock);
+	if (cell) {
+		unsigned int debug_id = cell->debug_id;
+		unsigned int a;
+		bool zero;
+		int r;
+
+		a = atomic_read(&cell->active);
+		zero = __refcount_dec_and_test(&cell->ref, &r);
+		trace_afs_cell(debug_id, r - 1, a, reason);
+		if (zero) {
+			a = atomic_read(&cell->active);
+			WARN(a != 0, "Cell active count %u > 0\n", a);
+			WARN_ON(!queue_work(afs_wq, &cell->destroyer));
+		}
+	}
+}
 
-	if (cell && !list_empty(&cell->link))
-		afs_get_cell(cell);
-	else
-		cell = NULL;
+/*
+ * Note a cell becoming more active.
+ */
+struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+	int r, a;
 
-	write_unlock(&afs_cells_lock);
+	__refcount_inc(&cell->ref, &r);
+	a = atomic_inc_return(&cell->active);
+	trace_afs_cell(cell->debug_id, r + 1, a, reason);
 	return cell;
 }
-#endif  /*  0  */
 
 /*
- * destroy a cell record
+ * Record a cell becoming less active.  When the active counter reaches 1, it
+ * is scheduled for destruction, but may get reactivated.
  */
-void afs_put_cell(struct afs_cell *cell)
+void afs_unuse_cell(struct afs_cell *cell, enum afs_cell_trace reason)
 {
+	unsigned int debug_id;
+	time64_t now, expire_delay;
+	bool zero;
+	int r, a;
+
 	if (!cell)
 		return;
 
-	_enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
-
-	ASSERTCMP(atomic_read(&cell->usage), >, 0);
+	_enter("%s", cell->name);
 
-	/* to prevent a race, the decrement and the dequeue must be effectively
-	 * atomic */
-	write_lock(&afs_cells_lock);
+	now = ktime_get_real_seconds();
+	cell->last_inactive = now;
+	expire_delay = 0;
+	if (cell->vl_servers->nr_servers)
+		expire_delay = afs_cell_gc_delay;
 
-	if (likely(!atomic_dec_and_test(&cell->usage))) {
-		write_unlock(&afs_cells_lock);
-		_leave("");
-		return;
-	}
+	debug_id = cell->debug_id;
+	a = atomic_dec_return(&cell->active);
+	if (!a)
+		/* 'cell' may now be garbage collected. */
+		afs_set_cell_timer(cell, expire_delay);
 
-	ASSERT(list_empty(&cell->servers));
-	ASSERT(list_empty(&cell->vl_list));
+	zero = __refcount_dec_and_test(&cell->ref, &r);
+	trace_afs_cell(debug_id, r - 1, a, reason);
+	if (zero)
+		WARN_ON(!queue_work(afs_wq, &cell->destroyer));
+}
 
-	write_unlock(&afs_cells_lock);
+/*
+ * Note that a cell has been seen.
+ */
+void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+	int r, a;
 
-	wake_up(&afs_cells_freeable_wq);
+	r = refcount_read(&cell->ref);
+	a = atomic_read(&cell->active);
+	trace_afs_cell(cell->debug_id, r, a, reason);
+}
 
-	_leave(" [unused]");
+/*
+ * Queue a cell for management, giving the workqueue a ref to hold.
+ */
+void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason)
+{
+	queue_work(afs_wq, &cell->manager);
 }
 
 /*
- * destroy a cell record
- * - must be called with the afs_cells_sem write-locked
- * - cell->link should have been broken by the caller
+ * Cell-specific management timer.
  */
-static void afs_cell_destroy(struct afs_cell *cell)
+static void afs_cell_timer(struct timer_list *timer)
 {
-	_enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
+	struct afs_cell *cell = container_of(timer, struct afs_cell, management_timer);
 
-	ASSERTCMP(atomic_read(&cell->usage), >=, 0);
-	ASSERT(list_empty(&cell->link));
+	afs_see_cell(cell, afs_cell_trace_see_mgmt_timer);
+	if (refcount_read(&cell->ref) > 0 && cell->net->live)
+		queue_work(afs_wq, &cell->manager);
+}
 
-	/* wait for everyone to stop using the cell */
-	if (atomic_read(&cell->usage) > 0) {
-		DECLARE_WAITQUEUE(myself, current);
+/*
+ * Set/reduce the cell timer.
+ */
+void afs_set_cell_timer(struct afs_cell *cell, unsigned int delay_secs)
+{
+	timer_reduce(&cell->management_timer, jiffies + delay_secs * HZ);
+}
 
-		_debug("wait for cell %s", cell->name);
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		add_wait_queue(&afs_cells_freeable_wq, &myself);
+/*
+ * Activate a cell.
+ */
+static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
+{
+	struct hlist_node **p;
+	struct afs_cell *pcell;
+	int ret;
 
-		while (atomic_read(&cell->usage) > 0) {
-			schedule();
-			set_current_state(TASK_UNINTERRUPTIBLE);
-		}
+	ret = afs_proc_cell_setup(cell);
+	if (ret < 0)
+		return ret;
 
-		remove_wait_queue(&afs_cells_freeable_wq, &myself);
-		set_current_state(TASK_RUNNING);
+	mutex_lock(&net->proc_cells_lock);
+	for (p = &net->proc_cells.first; *p; p = &(*p)->next) {
+		pcell = hlist_entry(*p, struct afs_cell, proc_link);
+		if (strcmp(cell->name, pcell->name) < 0)
+			break;
 	}
 
-	_debug("cell dead");
-	ASSERTCMP(atomic_read(&cell->usage), ==, 0);
-	ASSERT(list_empty(&cell->servers));
-	ASSERT(list_empty(&cell->vl_list));
+	cell->proc_link.pprev = p;
+	cell->proc_link.next = *p;
+	rcu_assign_pointer(*p, &cell->proc_link.next);
+	if (cell->proc_link.next)
+		cell->proc_link.next->pprev = &cell->proc_link.next;
+
+	mutex_unlock(&net->proc_cells_lock);
+	return 0;
+}
+
+/*
+ * Deactivate a cell.
+ */
+static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
+{
+	_enter("%s", cell->name);
 
 	afs_proc_cell_remove(cell);
 
-	down_write(&afs_proc_cells_sem);
-	list_del_init(&cell->proc_link);
-	up_write(&afs_proc_cells_sem);
+	mutex_lock(&net->proc_cells_lock);
+	if (!hlist_unhashed(&cell->proc_link))
+		hlist_del_rcu(&cell->proc_link);
+	mutex_unlock(&net->proc_cells_lock);
 
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(cell->cache, 0);
-#endif
-	key_put(cell->anonymous_key);
-	kfree(cell);
+	_leave("");
+}
 
-	_leave(" [destroyed]");
+static bool afs_has_cell_expired(struct afs_cell *cell, time64_t *_next_manage)
+{
+	const struct afs_vlserver_list *vllist;
+	time64_t expire_at = cell->last_inactive;
+	time64_t now = ktime_get_real_seconds();
+
+	if (atomic_read(&cell->active))
+		return false;
+	if (!cell->net->live)
+		return true;
+
+	vllist = rcu_dereference_protected(cell->vl_servers, true);
+	if (vllist && vllist->nr_servers > 0)
+		expire_at += afs_cell_gc_delay;
+
+	if (expire_at <= now)
+		return true;
+	if (expire_at < *_next_manage)
+		*_next_manage = expire_at;
+	return false;
 }
 
 /*
- * purge in-memory cell database on module unload or afs_init() failure
- * - the timeout daemon is stopped before calling this
+ * Manage a cell record, initialising and destroying it, maintaining its DNS
+ * records.
  */
-void afs_cell_purge(void)
+static bool afs_manage_cell(struct afs_cell *cell)
 {
-	struct afs_cell *cell;
+	struct afs_net *net = cell->net;
+	time64_t next_manage = TIME64_MAX;
+	int ret;
 
-	_enter("");
+	_enter("%s", cell->name);
+
+	_debug("state %u", cell->state);
+	switch (cell->state) {
+	case AFS_CELL_SETTING_UP:
+		goto set_up_cell;
+	case AFS_CELL_UNLOOKED:
+	case AFS_CELL_ACTIVE:
+		goto cell_is_active;
+	case AFS_CELL_REMOVING:
+		WARN_ON_ONCE(1);
+		return false;
+	case AFS_CELL_DEAD:
+		return false;
+	default:
+		_debug("bad state %u", cell->state);
+		WARN_ON_ONCE(1); /* Unhandled state */
+		return false;
+	}
 
-	afs_put_cell(afs_cell_root);
+set_up_cell:
+	ret = afs_activate_cell(net, cell);
+	if (ret < 0) {
+		cell->error = ret;
+		goto remove_cell;
+	}
 
-	down_write(&afs_cells_sem);
+	afs_set_cell_state(cell, AFS_CELL_UNLOOKED);
 
-	while (!list_empty(&afs_cells)) {
-		cell = NULL;
+cell_is_active:
+	if (afs_has_cell_expired(cell, &next_manage))
+		goto remove_cell;
 
-		/* remove the next cell from the front of the list */
-		write_lock(&afs_cells_lock);
+	if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
+		ret = afs_update_cell(cell);
+		if (ret < 0)
+			cell->error = ret;
+		if (cell->state == AFS_CELL_UNLOOKED)
+			afs_set_cell_state(cell, AFS_CELL_ACTIVE);
+	}
 
-		if (!list_empty(&afs_cells)) {
-			cell = list_entry(afs_cells.next,
-					  struct afs_cell, link);
-			list_del_init(&cell->link);
-		}
+	if (next_manage < TIME64_MAX && cell->net->live) {
+		time64_t now = ktime_get_real_seconds();
 
-		write_unlock(&afs_cells_lock);
+		if (next_manage - now <= 0)
+			afs_queue_cell(cell, afs_cell_trace_queue_again);
+		else
+			afs_set_cell_timer(cell, next_manage - now);
+	}
+	_leave(" [done %u]", cell->state);
+	return false;
 
-		if (cell) {
-			_debug("PURGING CELL %s (%d)",
-			       cell->name, atomic_read(&cell->usage));
+remove_cell:
+	down_write(&net->cells_lock);
 
-			/* now the cell should be left with no references */
-			afs_cell_destroy(cell);
-		}
+	if (atomic_read(&cell->active)) {
+		up_write(&net->cells_lock);
+		goto cell_is_active;
+	}
+
+	/* Make sure that the expiring server records are going to see the fact
+	 * that the cell is caput.
+	 */
+	afs_set_cell_state(cell, AFS_CELL_REMOVING);
+
+	afs_deactivate_cell(net, cell);
+	afs_purge_servers(cell);
+
+	rb_erase(&cell->net_node, &net->cells);
+	afs_see_cell(cell, afs_cell_trace_unuse_delete);
+	up_write(&net->cells_lock);
+
+	/* The root volume is pinning the cell */
+	afs_put_volume(cell->root_volume, afs_volume_trace_put_cell_root);
+	cell->root_volume = NULL;
+
+	afs_set_cell_state(cell, AFS_CELL_DEAD);
+	return true;
+}
+
+static void afs_manage_cell_work(struct work_struct *work)
+{
+	struct afs_cell *cell = container_of(work, struct afs_cell, manager);
+	bool final_put;
+
+	afs_see_cell(cell, afs_cell_trace_manage);
+	final_put = afs_manage_cell(cell);
+	afs_see_cell(cell, afs_cell_trace_managed);
+	if (final_put)
+		afs_put_cell(cell, afs_cell_trace_put_final);
+}
+
+/*
+ * Purge in-memory cell database.
+ */
+void afs_cell_purge(struct afs_net *net)
+{
+	struct afs_cell *ws;
+	struct rb_node *cursor;
+
+	_enter("");
+
+	down_write(&net->cells_lock);
+	ws = rcu_replace_pointer(net->ws_cell, NULL,
+				 lockdep_is_held(&net->cells_lock));
+	up_write(&net->cells_lock);
+	afs_unuse_cell(ws, afs_cell_trace_unuse_ws);
+
+	_debug("kick cells");
+	down_read(&net->cells_lock);
+	for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) {
+		struct afs_cell *cell = rb_entry(cursor, struct afs_cell, net_node);
+
+		afs_see_cell(cell, afs_cell_trace_purge);
+
+		if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags))
+			afs_unuse_cell(cell, afs_cell_trace_unuse_pin);
+
+		afs_queue_cell(cell, afs_cell_trace_queue_purge);
 	}
+	up_read(&net->cells_lock);
 
-	up_write(&afs_cells_sem);
+	_debug("wait");
+	wait_var_event(&net->cells_outstanding,
+		       !atomic_read(&net->cells_outstanding));
 	_leave("");
 }
diff --git a/fs/afs/cm_security.c b/fs/afs/cm_security.c
new file mode 100644
index 000000000000..edcbd249d202
--- /dev/null
+++ b/fs/afs/cm_security.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Cache manager security.
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <crypto/krb5.h>
+#include "internal.h"
+#include "afs_cm.h"
+#include "afs_fs.h"
+#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
+
+#define RXGK_SERVER_ENC_TOKEN 1036U // 0x40c
+#define xdr_round_up(x) (round_up((x), sizeof(__be32)))
+#define xdr_len_object(x) (4 + round_up((x), sizeof(__be32)))
+
+#ifdef CONFIG_RXGK
+static int afs_create_yfs_cm_token(struct sk_buff *challenge,
+				   struct afs_server *server);
+#endif
+
+/*
+ * Respond to an RxGK challenge, adding appdata.
+ */
+static int afs_respond_to_challenge(struct sk_buff *challenge)
+{
+#ifdef CONFIG_RXGK
+	struct krb5_buffer appdata = {};
+	struct afs_server *server;
+#endif
+	struct rxrpc_peer *peer;
+	unsigned long peer_data;
+	u16 service_id;
+	u8 security_index;
+
+	rxrpc_kernel_query_challenge(challenge, &peer, &peer_data,
+				     &service_id, &security_index);
+
+	_enter("%u,%u", service_id, security_index);
+
+	switch (service_id) {
+		/* We don't send CM_SERVICE RPCs, so don't expect a challenge
+		 * therefrom.
+		 */
+	case FS_SERVICE:
+	case VL_SERVICE:
+	case YFS_FS_SERVICE:
+	case YFS_VL_SERVICE:
+		break;
+	default:
+		pr_warn("Can't respond to unknown challenge %u:%u",
+			service_id, security_index);
+		return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO,
+						     afs_abort_unsupported_sec_class);
+	}
+
+	switch (security_index) {
+#ifdef CONFIG_RXKAD
+	case RXRPC_SECURITY_RXKAD:
+		return rxkad_kernel_respond_to_challenge(challenge);
+#endif
+
+#ifdef CONFIG_RXGK
+	case RXRPC_SECURITY_RXGK:
+		return rxgk_kernel_respond_to_challenge(challenge, &appdata);
+
+	case RXRPC_SECURITY_YFS_RXGK:
+		switch (service_id) {
+		case FS_SERVICE:
+		case YFS_FS_SERVICE:
+			server = (struct afs_server *)peer_data;
+			if (!server->cm_rxgk_appdata.data) {
+				mutex_lock(&server->cm_token_lock);
+				if (!server->cm_rxgk_appdata.data)
+					afs_create_yfs_cm_token(challenge, server);
+				mutex_unlock(&server->cm_token_lock);
+			}
+			if (server->cm_rxgk_appdata.data)
+				appdata = server->cm_rxgk_appdata;
+			break;
+		}
+		return rxgk_kernel_respond_to_challenge(challenge, &appdata);
+#endif
+
+	default:
+		return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO,
+						     afs_abort_unsupported_sec_class);
+	}
+}
+
+/*
+ * Process the OOB message queue, processing challenge packets.
+ */
+void afs_process_oob_queue(struct work_struct *work)
+{
+	struct afs_net *net = container_of(work, struct afs_net, rx_oob_work);
+	struct sk_buff *oob;
+	enum rxrpc_oob_type type;
+
+	while ((oob = rxrpc_kernel_dequeue_oob(net->socket, &type))) {
+		switch (type) {
+		case RXRPC_OOB_CHALLENGE:
+			afs_respond_to_challenge(oob);
+			break;
+		}
+		rxrpc_kernel_free_oob(oob);
+	}
+}
+
+#ifdef CONFIG_RXGK
+/*
+ * Create a securities keyring for the cache manager and attach a key to it for
+ * the RxGK tokens we want to use to secure the callback connection back from
+ * the fileserver.
+ */
+int afs_create_token_key(struct afs_net *net, struct socket *socket)
+{
+	const struct krb5_enctype *krb5;
+	struct key *ring;
+	key_ref_t key;
+	char K0[32], *desc;
+	int ret;
+
+	ring = keyring_alloc("kafs",
+			     GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+			     KEY_POS_SEARCH | KEY_POS_WRITE |
+			     KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH,
+			     KEY_ALLOC_NOT_IN_QUOTA,
+			     NULL, NULL);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
+
+	ret = rxrpc_sock_set_security_keyring(socket->sk, ring);
+	if (ret < 0)
+		goto out;
+
+	ret = -ENOPKG;
+	krb5 = crypto_krb5_find_enctype(KRB5_ENCTYPE_AES128_CTS_HMAC_SHA1_96);
+	if (!krb5)
+		goto out;
+
+	if (WARN_ON_ONCE(krb5->key_len > sizeof(K0)))
+		goto out;
+
+	ret = -ENOMEM;
+	desc = kasprintf(GFP_KERNEL, "%u:%u:%u:%u",
+			 YFS_CM_SERVICE, RXRPC_SECURITY_YFS_RXGK, 1, krb5->etype);
+	if (!desc)
+		goto out;
+
+	wait_for_random_bytes();
+	get_random_bytes(K0, krb5->key_len);
+
+	key = key_create(make_key_ref(ring, true),
+			 "rxrpc_s", desc,
+			 K0, krb5->key_len,
+			 KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_USR_VIEW,
+			 KEY_ALLOC_NOT_IN_QUOTA);
+	kfree(desc);
+	if (IS_ERR(key)) {
+		ret = PTR_ERR(key);
+		goto out;
+	}
+
+	net->fs_cm_token_key = key_ref_to_ptr(key);
+	ret = 0;
+out:
+	key_put(ring);
+	return ret;
+}
+
+/*
+ * Create an YFS RxGK GSS token to use as a ticket to the specified fileserver.
+ */
+static int afs_create_yfs_cm_token(struct sk_buff *challenge,
+				   struct afs_server *server)
+{
+	const struct krb5_enctype *conn_krb5, *token_krb5;
+	const struct krb5_buffer *token_key;
+	struct crypto_aead *aead;
+	struct scatterlist sg;
+	struct afs_net *net = server->cell->net;
+	const struct key *key = net->fs_cm_token_key;
+	size_t keysize, uuidsize, authsize, toksize, encsize, contsize, adatasize, offset;
+	__be32 caps[1] = {
+		[0] = htonl(AFS_CAP_ERROR_TRANSLATION),
+	};
+	__be32 *xdr;
+	void *appdata, *K0, *encbase;
+	u32 enctype;
+	int ret;
+
+	if (!key)
+		return -ENOKEY;
+
+	/* Assume that the fileserver is happy to use the same encoding type as
+	 * we were told to use by the token obtained by the user.
+	 */
+	enctype = rxgk_kernel_query_challenge(challenge);
+
+	conn_krb5 = crypto_krb5_find_enctype(enctype);
+	if (!conn_krb5)
+		return -ENOPKG;
+	token_krb5 = key->payload.data[0];
+	token_key = (const struct krb5_buffer *)&key->payload.data[2];
+
+	/* struct rxgk_key {
+	 *	afs_uint32	enctype;
+	 *	opaque		key<>;
+	 * };
+	 */
+	keysize = 4 + xdr_len_object(conn_krb5->key_len);
+
+	/* struct RXGK_AuthName {
+	 *	afs_int32	kind;
+	 *	opaque		data<AUTHDATAMAX>;
+	 *	opaque		display<AUTHPRINTABLEMAX>;
+	 * };
+	 */
+	uuidsize = sizeof(server->uuid);
+	authsize = 4 + xdr_len_object(uuidsize) + xdr_len_object(0);
+
+	/* struct RXGK_Token {
+	 *	rxgk_key		K0;
+	 *	RXGK_Level		level;
+	 *	rxgkTime		starttime;
+	 *	afs_int32		lifetime;
+	 *	afs_int32		bytelife;
+	 *	rxgkTime		expirationtime;
+	 *	struct RXGK_AuthName	identities<>;
+	 * };
+	 */
+	toksize = keysize + 8 + 4 + 4 + 8 + xdr_len_object(authsize);
+
+	offset = 0;
+	encsize = crypto_krb5_how_much_buffer(token_krb5, KRB5_ENCRYPT_MODE, toksize, &offset);
+
+	/* struct RXGK_TokenContainer {
+	 *	afs_int32	kvno;
+	 *	afs_int32	enctype;
+	 *	opaque		encrypted_token<>;
+	 * };
+	 */
+	contsize = 4 + 4 + xdr_len_object(encsize);
+
+	/* struct YFSAppData {
+	 *	opr_uuid	initiatorUuid;
+	 *	opr_uuid	acceptorUuid;
+	 *	Capabilities	caps;
+	 *	afs_int32	enctype;
+	 *	opaque		callbackKey<>;
+	 *	opaque		callbackToken<>;
+	 * };
+	 */
+	adatasize = 16 + 16 +
+		xdr_len_object(sizeof(caps)) +
+		4 +
+		xdr_len_object(conn_krb5->key_len) +
+		xdr_len_object(contsize);
+
+	ret = -ENOMEM;
+	appdata = kzalloc(adatasize, GFP_KERNEL);
+	if (!appdata)
+		goto out;
+	xdr = appdata;
+
+	memcpy(xdr, &net->uuid, 16);		/* appdata.initiatorUuid */
+	xdr += 16 / 4;
+	memcpy(xdr, &server->uuid, 16);		/* appdata.acceptorUuid */
+	xdr += 16 / 4;
+	*xdr++ = htonl(ARRAY_SIZE(caps));	/* appdata.caps.len */
+	memcpy(xdr, &caps, sizeof(caps));	/* appdata.caps */
+	xdr += ARRAY_SIZE(caps);
+	*xdr++ = htonl(conn_krb5->etype);	/* appdata.enctype */
+
+	*xdr++ = htonl(conn_krb5->key_len);	/* appdata.callbackKey.len */
+	K0 = xdr;
+	get_random_bytes(K0, conn_krb5->key_len); /* appdata.callbackKey.data */
+	xdr += xdr_round_up(conn_krb5->key_len) / 4;
+
+	*xdr++ = htonl(contsize);		/* appdata.callbackToken.len */
+	*xdr++ = htonl(1);			/* cont.kvno */
+	*xdr++ = htonl(token_krb5->etype);	/* cont.enctype */
+	*xdr++ = htonl(encsize);		/* cont.encrypted_token.len */
+
+	encbase = xdr;
+	xdr += offset / 4;
+	*xdr++ = htonl(conn_krb5->etype);	/* token.K0.enctype */
+	*xdr++ = htonl(conn_krb5->key_len);	/* token.K0.key.len */
+	memcpy(xdr, K0, conn_krb5->key_len);	/* token.K0.key.data */
+	xdr += xdr_round_up(conn_krb5->key_len) / 4;
+
+	*xdr++ = htonl(RXRPC_SECURITY_ENCRYPT);	/* token.level */
+	*xdr++ = htonl(0);			/* token.starttime */
+	*xdr++ = htonl(0);			/* " */
+	*xdr++ = htonl(0);			/* token.lifetime */
+	*xdr++ = htonl(0);			/* token.bytelife */
+	*xdr++ = htonl(0);			/* token.expirationtime */
+	*xdr++ = htonl(0);			/* " */
+	*xdr++ = htonl(1);			/* token.identities.count */
+	*xdr++ = htonl(0);			/* token.identities[0].kind */
+	*xdr++ = htonl(uuidsize);		/* token.identities[0].data.len */
+	memcpy(xdr, &server->uuid, uuidsize);
+	xdr += xdr_round_up(uuidsize) / 4;
+	*xdr++ = htonl(0);			/* token.identities[0].display.len */
+
+	xdr = encbase + xdr_round_up(encsize);
+
+	if ((unsigned long)xdr - (unsigned long)appdata != adatasize)
+		pr_err("Appdata size incorrect %lx != %zx\n",
+		       (unsigned long)xdr - (unsigned long)appdata, adatasize);
+
+	aead = crypto_krb5_prepare_encryption(token_krb5, token_key, RXGK_SERVER_ENC_TOKEN,
+					      GFP_KERNEL);
+	if (IS_ERR(aead)) {
+		ret = PTR_ERR(aead);
+		goto out_token;
+	}
+
+	sg_init_one(&sg, encbase, encsize);
+	ret = crypto_krb5_encrypt(token_krb5, aead, &sg, 1, encsize, offset, toksize, false);
+	if (ret < 0)
+		goto out_aead;
+
+	server->cm_rxgk_appdata.len  = adatasize;
+	server->cm_rxgk_appdata.data = appdata;
+	appdata = NULL;
+
+out_aead:
+	crypto_free_aead(aead);
+out_token:
+	kfree(appdata);
+out:
+	return ret;
+}
+#endif /* CONFIG_RXGK */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 1c8c6cc6de30..1a906805a9e3 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS Cache Manager Service
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -16,21 +12,24 @@
 #include <linux/ip.h>
 #include "internal.h"
 #include "afs_cm.h"
-
-#if 0
-struct workqueue_struct *afs_cm_workqueue;
-#endif  /*  0  */
-
-static int afs_deliver_cb_init_call_back_state(struct afs_call *,
-					       struct sk_buff *, bool);
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
-						struct sk_buff *, bool);
-static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
-						 struct sk_buff *, bool);
+#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
+
+static int afs_deliver_cb_init_call_back_state(struct afs_call *);
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
+static int afs_deliver_cb_probe(struct afs_call *);
+static int afs_deliver_cb_callback(struct afs_call *);
+static int afs_deliver_cb_probe_uuid(struct afs_call *);
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *);
 static void afs_cm_destructor(struct afs_call *);
+static void SRXAFSCB_CallBack(struct work_struct *);
+static void SRXAFSCB_InitCallBackState(struct work_struct *);
+static void SRXAFSCB_Probe(struct work_struct *);
+static void SRXAFSCB_ProbeUuid(struct work_struct *);
+static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
+
+static int afs_deliver_yfs_cb_callback(struct afs_call *);
 
 /*
  * CB.CallBack operation type
@@ -38,8 +37,8 @@ static void afs_cm_destructor(struct afs_call *);
 static const struct afs_call_type afs_SRXCBCallBack = {
 	.name		= "CB.CallBack",
 	.deliver	= afs_deliver_cb_callback,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
+	.work		= SRXAFSCB_CallBack,
 };
 
 /*
@@ -48,8 +47,8 @@ static const struct afs_call_type afs_SRXCBCallBack = {
 static const struct afs_call_type afs_SRXCBInitCallBackState = {
 	.name		= "CB.InitCallBackState",
 	.deliver	= afs_deliver_cb_init_call_back_state,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
+	.work		= SRXAFSCB_InitCallBackState,
 };
 
 /*
@@ -58,8 +57,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState = {
 static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
 	.name		= "CB.InitCallBackState3",
 	.deliver	= afs_deliver_cb_init_call_back_state3,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
+	.work		= SRXAFSCB_InitCallBackState,
 };
 
 /*
@@ -68,8 +67,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
 static const struct afs_call_type afs_SRXCBProbe = {
 	.name		= "CB.Probe",
 	.deliver	= afs_deliver_cb_probe,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
+	.work		= SRXAFSCB_Probe,
 };
 
 /*
@@ -78,8 +77,8 @@ static const struct afs_call_type afs_SRXCBProbe = {
 static const struct afs_call_type afs_SRXCBProbeUuid = {
 	.name		= "CB.ProbeUuid",
 	.deliver	= afs_deliver_cb_probe_uuid,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
+	.work		= SRXAFSCB_ProbeUuid,
 };
 
 /*
@@ -88,8 +87,18 @@ static const struct afs_call_type afs_SRXCBProbeUuid = {
 static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
 	.name		= "CB.TellMeAboutYourself",
 	.deliver	= afs_deliver_cb_tell_me_about_yourself,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
+	.work		= SRXAFSCB_TellMeAboutYourself,
+};
+
+/*
+ * YFS CB.CallBack operation type
+ */
+static const struct afs_call_type afs_SRXYFSCB_CallBack = {
+	.name		= "YFSCB.CallBack",
+	.deliver	= afs_deliver_yfs_cb_callback,
+	.destructor	= afs_cm_destructor,
+	.work		= SRXAFSCB_CallBack,
 };
 
 /*
@@ -98,11 +107,9 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
  */
 bool afs_cm_incoming_call(struct afs_call *call)
 {
-	u32 operation_id = ntohl(call->operation_ID);
-
-	_enter("{CB.OP %u}", operation_id);
+	_enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
 
-	switch (operation_id) {
+	switch (call->operation_ID) {
 	case CBCallBack:
 		call->type = &afs_SRXCBCallBack;
 		return true;
@@ -115,29 +122,44 @@ bool afs_cm_incoming_call(struct afs_call *call)
 	case CBProbe:
 		call->type = &afs_SRXCBProbe;
 		return true;
+	case CBProbeUuid:
+		call->type = &afs_SRXCBProbeUuid;
+		return true;
 	case CBTellMeAboutYourself:
 		call->type = &afs_SRXCBTellMeAboutYourself;
 		return true;
+	case YFSCBCallBack:
+		if (call->service_id != YFS_CM_SERVICE)
+			return false;
+		call->type = &afs_SRXYFSCB_CallBack;
+		return true;
 	default:
 		return false;
 	}
 }
 
 /*
- * clean up a cache manager call
+ * Clean up a cache manager call.
  */
 static void afs_cm_destructor(struct afs_call *call)
 {
-	_enter("");
-
-	afs_put_server(call->server);
-	call->server = NULL;
 	kfree(call->buffer);
 	call->buffer = NULL;
 }
 
 /*
- * allow the fileserver to see if the cache manager is still alive
+ * Abort a service call from within an action function.
+ */
+static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error,
+				   enum rxrpc_abort_reason why)
+{
+	rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+				abort_code, error, why);
+	afs_set_call_complete(call, error, 0);
+}
+
+/*
+ * The server supplied a list of callbacks that it wanted to break.
  */
 static void SRXAFSCB_CallBack(struct work_struct *work)
 {
@@ -145,71 +167,69 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
 
 	_enter("");
 
-	/* be sure to send the reply *before* attempting to spam the AFS server
-	 * with FSFetchStatus requests on the vnodes with broken callbacks lest
-	 * the AFS server get into a vicious cycle of trying to break further
-	 * callbacks because it hadn't received completion of the CBCallBack op
-	 * yet */
-	afs_send_empty_reply(call);
+	/* We need to break the callbacks before sending the reply as the
+	 * server holds up change visibility till it receives our reply so as
+	 * to maintain cache coherency.
+	 */
+	if (call->server) {
+		trace_afs_server(call->server->debug_id,
+				 refcount_read(&call->server->ref),
+				 atomic_read(&call->server->active),
+				 afs_server_trace_callback);
+		afs_break_callbacks(call->server, call->count, call->request);
+	}
 
-	afs_break_callbacks(call->server, call->count, call->request);
+	afs_send_empty_reply(call);
+	afs_put_call(call);
 	_leave("");
 }
 
 /*
  * deliver request data to a CB.CallBack call
  */
-static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
-				   bool last)
+static int afs_deliver_cb_callback(struct afs_call *call)
 {
-	struct afs_callback *cb;
-	struct afs_server *server;
-	struct in_addr addr;
+	struct afs_callback_break *cb;
 	__be32 *bp;
-	u32 tmp;
 	int ret, loop;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* extract the FID array and its count in two steps */
+		fallthrough;
 	case 1:
 		_debug("extract FID count");
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("FID count: %u", call->count);
 		if (call->count > AFSCBMAX)
-			return -EBADMSG;
+			return afs_protocol_error(call, afs_eproto_cb_fid_count);
 
-		call->buffer = kmalloc(call->count * 3 * 4, GFP_KERNEL);
+		call->buffer = kmalloc(array3_size(call->count, 3, 4),
+				       GFP_KERNEL);
 		if (!call->buffer)
 			return -ENOMEM;
-		call->offset = 0;
+		afs_extract_to_buf(call, call->count * 3 * 4);
 		call->unmarshall++;
 
+		fallthrough;
 	case 2:
 		_debug("extract FID array");
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       call->count * 3 * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		_debug("unmarshall FID array");
 		call->request = kcalloc(call->count,
-					sizeof(struct afs_callback),
+					sizeof(struct afs_callback_break),
 					GFP_KERNEL);
 		if (!call->request)
 			return -ENOMEM;
@@ -220,76 +240,45 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
 			cb->fid.vid	= ntohl(*bp++);
 			cb->fid.vnode	= ntohl(*bp++);
 			cb->fid.unique	= ntohl(*bp++);
-			cb->type	= AFSCM_CB_UNTYPED;
 		}
 
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* extract the callback array and its count in two steps */
+		fallthrough;
 	case 3:
 		_debug("extract CB count");
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
-
-		tmp = ntohl(call->tmp);
-		_debug("CB count: %u", tmp);
-		if (tmp != call->count && tmp != 0)
-			return -EBADMSG;
-		call->offset = 0;
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->count2 = ntohl(call->tmp);
+		_debug("CB count: %u", call->count2);
+		if (call->count2 != call->count && call->count2 != 0)
+			return afs_protocol_error(call, afs_eproto_cb_count);
+		call->iter = &call->def_iter;
+		iov_iter_discard(&call->def_iter, ITER_DEST, call->count2 * 3 * 4);
 		call->unmarshall++;
-		if (tmp == 0)
-			goto empty_cb_array;
 
+		fallthrough;
 	case 4:
-		_debug("extract CB array");
-		ret = afs_extract_data(call, skb, last, call->request,
-				       call->count * 3 * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		_debug("extract discard %zu/%u",
+		       iov_iter_count(call->iter), call->count2 * 3 * 4);
 
-		_debug("unmarshall CB array");
-		cb = call->request;
-		bp = call->buffer;
-		for (loop = call->count; loop > 0; loop--, cb++) {
-			cb->version	= ntohl(*bp++);
-			cb->expiry	= ntohl(*bp++);
-			cb->type	= ntohl(*bp++);
-		}
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
 
-	empty_cb_array:
-		call->offset = 0;
 		call->unmarshall++;
+		fallthrough;
 
 	case 5:
-		_debug("trailer");
-		if (skb->len != 0)
-			return -EBADMSG;
 		break;
 	}
 
-	if (!last)
-		return 0;
-
-	call->state = AFS_CALL_REPLYING;
-
-	/* we'll need the file server record as that tells us which set of
-	 * vnodes to operate upon */
-	memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-	server = afs_find_server(&addr);
-	if (!server)
-		return -ENOTCONN;
-	call->server = server;
-
-	INIT_WORK(&call->work, SRXAFSCB_CallBack);
-	queue_work(afs_wq, &call->work);
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return afs_io_error(call, afs_io_error_cm_reply);
 	return 0;
 }
 
@@ -302,72 +291,86 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
 
 	_enter("{%p}", call->server);
 
-	afs_init_callback_state(call->server);
+	if (call->server)
+		afs_init_callback_state(call->server);
 	afs_send_empty_reply(call);
+	afs_put_call(call);
 	_leave("");
 }
 
 /*
  * deliver request data to a CB.InitCallBackState call
  */
-static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
-					       struct sk_buff *skb,
-					       bool last)
+static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 {
-	struct afs_server *server;
-	struct in_addr addr;
-
-	_enter(",{%u},%d", skb->len, last);
-
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
-
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
-
-	/* we'll need the file server record as that tells us which set of
-	 * vnodes to operate upon */
-	memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-	server = afs_find_server(&addr);
-	if (!server)
-		return -ENOTCONN;
-	call->server = server;
+	_enter("");
 
-	INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
-	queue_work(afs_wq, &call->work);
-	return 0;
+	afs_extract_discard(call, 0);
+	return afs_extract_data(call, false);
 }
 
 /*
  * deliver request data to a CB.InitCallBackState3 call
  */
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
-						struct sk_buff *skb,
-						bool last)
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 {
-	struct afs_server *server;
-	struct in_addr addr;
+	struct afs_uuid *r;
+	unsigned loop;
+	__be32 *b;
+	int ret;
 
-	_enter(",{%u},%d", skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
-	if (!last)
-		return 0;
+	switch (call->unmarshall) {
+	case 0:
+		call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
+		if (!call->buffer)
+			return -ENOMEM;
+		afs_extract_to_buf(call, 11 * sizeof(__be32));
+		call->unmarshall++;
+
+		fallthrough;
+	case 1:
+		_debug("extract UUID");
+		ret = afs_extract_data(call, false);
+		switch (ret) {
+		case 0:		break;
+		case -EAGAIN:	return 0;
+		default:	return ret;
+		}
+
+		_debug("unmarshall UUID");
+		call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
+		if (!call->request)
+			return -ENOMEM;
+
+		b = call->buffer;
+		r = call->request;
+		r->time_low			= b[0];
+		r->time_mid			= htons(ntohl(b[1]));
+		r->time_hi_and_version		= htons(ntohl(b[2]));
+		r->clock_seq_hi_and_reserved 	= ntohl(b[3]);
+		r->clock_seq_low		= ntohl(b[4]);
 
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
+		for (loop = 0; loop < 6; loop++)
+			r->node[loop] = ntohl(b[loop + 5]);
 
-	/* we'll need the file server record as that tells us which set of
-	 * vnodes to operate upon */
-	memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-	server = afs_find_server(&addr);
-	if (!server)
-		return -ENOTCONN;
-	call->server = server;
+		call->unmarshall++;
+		fallthrough;
+
+	case 2:
+		break;
+	}
+
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return afs_io_error(call, afs_io_error_cm_reply);
+
+	if (memcmp(call->request, &call->server->_uuid, sizeof(call->server->_uuid)) != 0) {
+		pr_notice("Callback UUID does not match fileserver UUID\n");
+		trace_afs_cm_no_server_u(call, call->request);
+		return 0;
+	}
 
-	INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
-	queue_work(afs_wq, &call->work);
 	return 0;
 }
 
@@ -380,84 +383,73 @@ static void SRXAFSCB_Probe(struct work_struct *work)
 
 	_enter("");
 	afs_send_empty_reply(call);
+	afs_put_call(call);
 	_leave("");
 }
 
 /*
  * deliver request data to a CB.Probe call
  */
-static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
-				bool last)
+static int afs_deliver_cb_probe(struct afs_call *call)
 {
-	_enter(",{%u},%d", skb->len, last);
+	int ret;
 
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	_enter("");
 
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
+	afs_extract_discard(call, 0);
+	ret = afs_extract_data(call, false);
+	if (ret < 0)
+		return ret;
 
-	INIT_WORK(&call->work, SRXAFSCB_Probe);
-	queue_work(afs_wq, &call->work);
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return afs_io_error(call, afs_io_error_cm_reply);
 	return 0;
 }
 
 /*
- * allow the fileserver to quickly find out if the fileserver has been rebooted
+ * Allow the fileserver to quickly find out if the cache manager has been
+ * rebooted.
  */
 static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 {
 	struct afs_call *call = container_of(work, struct afs_call, work);
 	struct afs_uuid *r = call->request;
 
-	struct {
-		__be32	match;
-	} reply;
-
 	_enter("");
 
-
-	if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
-		reply.match = htonl(0);
+	if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
+		afs_send_empty_reply(call);
 	else
-		reply.match = htonl(1);
+		afs_abort_service_call(call, 1, 1, afs_abort_probeuuid_negative);
 
-	afs_send_simple_reply(call, &reply, sizeof(reply));
+	afs_put_call(call);
 	_leave("");
 }
 
 /*
  * deliver request data to a CB.ProbeUuid call
  */
-static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
-				     bool last)
+static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 {
 	struct afs_uuid *r;
 	unsigned loop;
 	__be32 *b;
 	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
-		call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
+		call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
 		if (!call->buffer)
 			return -ENOMEM;
+		afs_extract_to_buf(call, 11 * sizeof(__be32));
 		call->unmarshall++;
 
+		fallthrough;
 	case 1:
 		_debug("extract UUID");
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       11 * sizeof(__be32));
+		ret = afs_extract_data(call, false);
 		switch (ret) {
 		case 0:		break;
 		case -EAGAIN:	return 0;
@@ -471,32 +463,24 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
 
 		b = call->buffer;
 		r = call->request;
-		r->time_low			= ntohl(b[0]);
-		r->time_mid			= ntohl(b[1]);
-		r->time_hi_and_version		= ntohl(b[2]);
+		r->time_low			= b[0];
+		r->time_mid			= htons(ntohl(b[1]));
+		r->time_hi_and_version		= htons(ntohl(b[2]));
 		r->clock_seq_hi_and_reserved 	= ntohl(b[3]);
 		r->clock_seq_low		= ntohl(b[4]);
 
 		for (loop = 0; loop < 6; loop++)
 			r->node[loop] = ntohl(b[loop + 5]);
 
-		call->offset = 0;
 		call->unmarshall++;
+		fallthrough;
 
 	case 2:
-		_debug("trailer");
-		if (skb->len != 0)
-			return -EBADMSG;
 		break;
 	}
 
-	if (!last)
-		return 0;
-
-	call->state = AFS_CALL_REPLYING;
-
-	INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
-	queue_work(afs_wq, &call->work);
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return afs_io_error(call, afs_io_error_cm_reply);
 	return 0;
 }
 
@@ -505,9 +489,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
  */
 static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
 {
-	struct afs_interface *ifs;
 	struct afs_call *call = container_of(work, struct afs_call, work);
-	int loop, nifs;
+	int loop;
 
 	struct {
 		struct /* InterfaceAddr */ {
@@ -525,61 +508,112 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
 
 	_enter("");
 
-	nifs = 0;
-	ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL);
-	if (ifs) {
-		nifs = afs_get_ipv4_interfaces(ifs, 32, false);
-		if (nifs < 0) {
-			kfree(ifs);
-			ifs = NULL;
-			nifs = 0;
-		}
-	}
-
 	memset(&reply, 0, sizeof(reply));
-	reply.ia.nifs = htonl(nifs);
 
-	reply.ia.uuid[0] = htonl(afs_uuid.time_low);
-	reply.ia.uuid[1] = htonl(afs_uuid.time_mid);
-	reply.ia.uuid[2] = htonl(afs_uuid.time_hi_and_version);
-	reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved);
-	reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low);
+	reply.ia.uuid[0] = call->net->uuid.time_low;
+	reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid));
+	reply.ia.uuid[2] = htonl(ntohs(call->net->uuid.time_hi_and_version));
+	reply.ia.uuid[3] = htonl((s8) call->net->uuid.clock_seq_hi_and_reserved);
+	reply.ia.uuid[4] = htonl((s8) call->net->uuid.clock_seq_low);
 	for (loop = 0; loop < 6; loop++)
-		reply.ia.uuid[loop + 5] = htonl((s8) afs_uuid.node[loop]);
-
-	if (ifs) {
-		for (loop = 0; loop < nifs; loop++) {
-			reply.ia.ifaddr[loop] = ifs[loop].address.s_addr;
-			reply.ia.netmask[loop] = ifs[loop].netmask.s_addr;
-			reply.ia.mtu[loop] = htonl(ifs[loop].mtu);
-		}
-		kfree(ifs);
-	}
+		reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]);
 
 	reply.cap.capcount = htonl(1);
 	reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION);
 	afs_send_simple_reply(call, &reply, sizeof(reply));
-
+	afs_put_call(call);
 	_leave("");
 }
 
 /*
  * deliver request data to a CB.TellMeAboutYourself call
  */
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
-						 struct sk_buff *skb, bool last)
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 {
-	_enter(",{%u},%d", skb->len, last);
+	int ret;
 
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	_enter("");
+
+	afs_extract_discard(call, 0);
+	ret = afs_extract_data(call, false);
+	if (ret < 0)
+		return ret;
+
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return afs_io_error(call, afs_io_error_cm_reply);
+	return 0;
+}
+
+/*
+ * deliver request data to a YFS CB.CallBack call
+ */
+static int afs_deliver_yfs_cb_callback(struct afs_call *call)
+{
+	struct afs_callback_break *cb;
+	struct yfs_xdr_YFSFid *bp;
+	size_t size;
+	int ret, loop;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
 
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
+		/* extract the FID array and its count in two steps */
+		fallthrough;
+	case 1:
+		_debug("extract FID count");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->count = ntohl(call->tmp);
+		_debug("FID count: %u", call->count);
+		if (call->count > YFSCBMAX)
+			return afs_protocol_error(call, afs_eproto_cb_fid_count);
+
+		size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid));
+		call->buffer = kmalloc(size, GFP_KERNEL);
+		if (!call->buffer)
+			return -ENOMEM;
+		afs_extract_to_buf(call, size);
+		call->unmarshall++;
+
+		fallthrough;
+	case 2:
+		_debug("extract FID array");
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		_debug("unmarshall FID array");
+		call->request = kcalloc(call->count,
+					sizeof(struct afs_callback_break),
+					GFP_KERNEL);
+		if (!call->request)
+			return -ENOMEM;
+
+		cb = call->request;
+		bp = call->buffer;
+		for (loop = call->count; loop > 0; loop--, cb++) {
+			cb->fid.vid	= xdr_to_u64(bp->volume);
+			cb->fid.vnode	= xdr_to_u64(bp->vnode.lo);
+			cb->fid.vnode_hi = ntohl(bp->vnode.hi);
+			cb->fid.unique	= ntohl(bp->vnode.unique);
+			bp++;
+		}
+
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+	case 3:
+		break;
+	}
 
-	INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself);
-	queue_work(afs_wq, &call->work);
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return afs_io_error(call, afs_io_error_cm_reply);
 	return 0;
 }
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 34494fbead0a..f4e9e12373ac 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1,49 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* dir.c: AFS filesystem directory handling
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2018 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
+#include <linux/swap.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
+#include <linux/iversion.h>
+#include <linux/iov_iter.h>
+#include <linux/task_io_accounting_ops.h>
 #include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
 
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 				 unsigned int flags);
 static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, struct dir_context *ctx);
-static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
+static int afs_d_revalidate(struct inode *dir, const struct qstr *name,
+			    struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
-static void afs_d_release(struct dentry *dentry);
-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
+static void afs_d_iput(struct dentry *dentry, struct inode *inode);
+static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
-static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		      bool excl);
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
+static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
+			      loff_t fpos, u64 ino, unsigned dtype);
+static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
+		      struct dentry *dentry, umode_t mode, bool excl);
+static struct dentry *afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+				struct dentry *dentry, umode_t mode);
 static int afs_rmdir(struct inode *dir, struct dentry *dentry);
 static int afs_unlink(struct inode *dir, struct dentry *dentry);
 static int afs_link(struct dentry *from, struct inode *dir,
 		    struct dentry *dentry);
-static int afs_symlink(struct inode *dir, struct dentry *dentry,
-		       const char *content);
-static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
-		      struct inode *new_dir, struct dentry *new_dentry);
+static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
+		       struct dentry *dentry, const char *content);
+static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
+		      struct dentry *old_dentry, struct inode *new_dir,
+		      struct dentry *new_dentry, unsigned int flags);
 
 const struct file_operations afs_dir_file_operations = {
 	.open		= afs_dir_open,
 	.release	= afs_release,
-	.iterate	= afs_readdir,
+	.iterate_shared	= afs_readdir,
 	.lock		= afs_lock,
 	.llseek		= generic_file_llseek,
 };
@@ -62,151 +67,146 @@ const struct inode_operations afs_dir_inode_operations = {
 	.setattr	= afs_setattr,
 };
 
+const struct address_space_operations afs_dir_aops = {
+	.writepages	= afs_single_writepages,
+};
+
 const struct dentry_operations afs_fs_dentry_operations = {
 	.d_revalidate	= afs_d_revalidate,
 	.d_delete	= afs_d_delete,
 	.d_release	= afs_d_release,
 	.d_automount	= afs_d_automount,
+	.d_iput		= afs_d_iput,
 };
 
-#define AFS_DIR_HASHTBL_SIZE	128
-#define AFS_DIR_DIRENT_SIZE	32
-#define AFS_DIRENT_PER_BLOCK	64
-
-union afs_dirent {
-	struct {
-		uint8_t		valid;
-		uint8_t		unused[1];
-		__be16		hash_next;
-		__be32		vnode;
-		__be32		unique;
-		uint8_t		name[16];
-		uint8_t		overflow[4];	/* if any char of the name (inc
-						 * NUL) reaches here, consume
-						 * the next dirent too */
-	} u;
-	uint8_t	extended_name[32];
+struct afs_lookup_one_cookie {
+	struct dir_context	ctx;
+	struct qstr		name;
+	bool			found;
+	struct afs_fid		fid;
 };
 
-/* AFS directory page header (one at the beginning of every 2048-byte chunk) */
-struct afs_dir_pagehdr {
-	__be16		npages;
-	__be16		magic;
-#define AFS_DIR_MAGIC htons(1234)
-	uint8_t		nentries;
-	uint8_t		bitmap[8];
-	uint8_t		pad[19];
+struct afs_lookup_cookie {
+	struct dir_context	ctx;
+	struct qstr		name;
+	unsigned short		nr_fids;
+	struct afs_fid		fids[50];
 };
 
-/* directory block layout */
-union afs_dir_block {
-
-	struct afs_dir_pagehdr pagehdr;
-
-	struct {
-		struct afs_dir_pagehdr	pagehdr;
-		uint8_t			alloc_ctrs[128];
-		/* dir hash table */
-		uint16_t		hashtable[AFS_DIR_HASHTBL_SIZE];
-	} hdr;
+static void afs_dir_unuse_cookie(struct afs_vnode *dvnode, int ret)
+{
+	if (ret == 0) {
+		struct afs_vnode_cache_aux aux;
+		loff_t i_size = i_size_read(&dvnode->netfs.inode);
+
+		afs_set_cache_aux(dvnode, &aux);
+		fscache_unuse_cookie(afs_vnode_cache(dvnode), &aux, &i_size);
+	} else {
+		fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+	}
+}
 
-	union afs_dirent dirents[AFS_DIRENT_PER_BLOCK];
-};
+/*
+ * Iterate through a kmapped directory segment, dumping a summary of
+ * the contents.
+ */
+static size_t afs_dir_dump_step(void *iter_base, size_t progress, size_t len,
+				void *priv, void *priv2)
+{
+	do {
+		union afs_xdr_dir_block *block = iter_base;
 
-/* layout on a linux VM page */
-struct afs_dir_page {
-	union afs_dir_block blocks[PAGE_SIZE / sizeof(union afs_dir_block)];
-};
+		pr_warn("[%05zx] %32phN\n", progress, block);
+		iter_base += AFS_DIR_BLOCK_SIZE;
+		progress += AFS_DIR_BLOCK_SIZE;
+		len -= AFS_DIR_BLOCK_SIZE;
+	} while (len > 0);
 
-struct afs_lookup_cookie {
-	struct dir_context ctx;
-	struct afs_fid	fid;
-	struct qstr name;
-	int		found;
-};
+	return len;
+}
 
 /*
- * check that a directory page is valid
+ * Dump the contents of a directory.
  */
-static inline void afs_dir_check_page(struct inode *dir, struct page *page)
+static void afs_dir_dump(struct afs_vnode *dvnode)
 {
-	struct afs_dir_page *dbuf;
-	loff_t latter;
-	int tmp, qty;
-
-#if 0
-	/* check the page count */
-	qty = desc.size / sizeof(dbuf->blocks[0]);
-	if (qty == 0)
-		goto error;
+	struct iov_iter iter;
+	unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
 
-	if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
-		printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
-		       __func__, dir->i_ino, qty,
-		       ntohs(dbuf->blocks[0].pagehdr.npages));
-		goto error;
-	}
-#endif
+	pr_warn("DIR %llx:%llx is=%llx\n",
+		dvnode->fid.vid, dvnode->fid.vnode, i_size);
 
-	/* determine how many magic numbers there should be in this page */
-	latter = dir->i_size - page_offset(page);
-	if (latter >= PAGE_SIZE)
-		qty = PAGE_SIZE;
-	else
-		qty = latter;
-	qty /= sizeof(union afs_dir_block);
-
-	/* check them */
-	dbuf = page_address(page);
-	for (tmp = 0; tmp < qty; tmp++) {
-		if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
-			printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
-			       __func__, dir->i_ino, tmp, qty,
-			       ntohs(dbuf->blocks[tmp].pagehdr.magic));
-			goto error;
-		}
-	}
+	iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+	iterate_folioq(&iter, iov_iter_count(&iter), NULL, NULL,
+		       afs_dir_dump_step);
+}
 
-	SetPageChecked(page);
-	return;
+/*
+ * check that a directory folio is valid
+ */
+static bool afs_dir_check_block(struct afs_vnode *dvnode, size_t progress,
+				union afs_xdr_dir_block *block)
+{
+	if (block->hdr.magic != AFS_DIR_MAGIC) {
+		pr_warn("%s(%lx): [%zx] bad magic %04x\n",
+		       __func__, dvnode->netfs.inode.i_ino,
+		       progress, ntohs(block->hdr.magic));
+		trace_afs_dir_check_failed(dvnode, progress);
+		trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
+		return false;
+	}
 
-error:
-	SetPageChecked(page);
-	SetPageError(page);
+	/* Make sure each block is NUL terminated so we can reasonably
+	 * use string functions on it.  The filenames in the folio
+	 * *should* be NUL-terminated anyway.
+	 */
+	((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
+	afs_stat_v(dvnode, n_read_dir);
+	return true;
 }
 
 /*
- * discard a page cached in the pagecache
+ * Iterate through a kmapped directory segment, checking the content.
  */
-static inline void afs_dir_put_page(struct page *page)
+static size_t afs_dir_check_step(void *iter_base, size_t progress, size_t len,
+				 void *priv, void *priv2)
 {
-	kunmap(page);
-	page_cache_release(page);
+	struct afs_vnode *dvnode = priv;
+
+	if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+			 len % AFS_DIR_BLOCK_SIZE))
+		return len;
+
+	do {
+		if (!afs_dir_check_block(dvnode, progress, iter_base))
+			break;
+		iter_base += AFS_DIR_BLOCK_SIZE;
+		len -= AFS_DIR_BLOCK_SIZE;
+	} while (len > 0);
+
+	return len;
 }
 
 /*
- * get a page into the pagecache
+ * Check all the blocks in a directory.
  */
-static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
-				     struct key *key)
+static int afs_dir_check(struct afs_vnode *dvnode)
 {
-	struct page *page;
-	_enter("{%lu},%lu", dir->i_ino, index);
-
-	page = read_cache_page(dir->i_mapping, index, afs_page_filler, key);
-	if (!IS_ERR(page)) {
-		kmap(page);
-		if (!PageChecked(page))
-			afs_dir_check_page(dir, page);
-		if (PageError(page))
-			goto fail;
-	}
-	return page;
+	struct iov_iter iter;
+	unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
+	size_t checked = 0;
 
-fail:
-	afs_dir_put_page(page);
-	_leave(" = -EIO");
-	return ERR_PTR(-EIO);
+	if (unlikely(!i_size))
+		return 0;
+
+	iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+	checked = iterate_folioq(&iter, iov_iter_count(&iter), dvnode, NULL,
+				 afs_dir_check_step);
+	if (checked != i_size) {
+		afs_dir_dump(dvnode);
+		return -EIO;
+	}
+	return 0;
 }
 
 /*
@@ -216,8 +216,8 @@ static int afs_dir_open(struct inode *inode, struct file *file)
 {
 	_enter("{%lu}", inode->i_ino);
 
-	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
-	BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
+	BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
+	BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
 
 	if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags))
 		return -ENOENT;
@@ -226,153 +226,327 @@ static int afs_dir_open(struct inode *inode, struct file *file)
 }
 
 /*
+ * Read a file in a single download.
+ */
+static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
+{
+	struct iov_iter iter;
+	ssize_t ret;
+	loff_t i_size;
+	bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+		       !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
+
+	i_size = i_size_read(&dvnode->netfs.inode);
+	if (is_dir) {
+		if (i_size < AFS_DIR_BLOCK_SIZE)
+			return afs_bad(dvnode, afs_file_error_dir_small);
+		if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
+			trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+			return -EFBIG;
+		}
+	} else {
+		if (i_size > AFSPATHMAX) {
+			trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+			return -EFBIG;
+		}
+	}
+
+	/* Expand the storage.  TODO: Shrink the storage too. */
+	if (dvnode->directory_size < i_size) {
+		size_t cur_size = dvnode->directory_size;
+
+		ret = netfs_alloc_folioq_buffer(NULL,
+						&dvnode->directory, &cur_size, i_size,
+						mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
+		dvnode->directory_size = cur_size;
+		if (ret < 0)
+			return ret;
+	}
+
+	iov_iter_folio_queue(&iter, ITER_DEST, dvnode->directory, 0, 0, dvnode->directory_size);
+
+	/* AFS requires us to perform the read of a directory synchronously as
+	 * a single unit to avoid issues with the directory contents being
+	 * changed between reads.
+	 */
+	ret = netfs_read_single(&dvnode->netfs.inode, file, &iter);
+	if (ret >= 0) {
+		i_size = i_size_read(&dvnode->netfs.inode);
+		if (i_size > ret) {
+			/* The content has grown, so we need to expand the
+			 * buffer.
+			 */
+			ret = -ESTALE;
+		} else if (is_dir) {
+			int ret2 = afs_dir_check(dvnode);
+
+			if (ret2 < 0)
+				ret = ret2;
+		} else if (i_size < folioq_folio_size(dvnode->directory, 0)) {
+			/* NUL-terminate a symlink. */
+			char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0);
+
+			symlink[i_size] = 0;
+			kunmap_local(symlink);
+		}
+	}
+
+	return ret;
+}
+
+ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
+{
+	ssize_t ret;
+
+	fscache_use_cookie(afs_vnode_cache(dvnode), false);
+	ret = afs_do_read_single(dvnode, file);
+	fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+	return ret;
+}
+
+/*
+ * Read the directory into a folio_queue buffer in one go, scrubbing the
+ * previous contents.  We return -ESTALE if the caller needs to call us again.
+ */
+ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
+	__acquires(&dvnode->validate_lock)
+{
+	ssize_t ret;
+	loff_t i_size;
+
+	i_size = i_size_read(&dvnode->netfs.inode);
+
+	ret = -ERESTARTSYS;
+	if (down_read_killable(&dvnode->validate_lock) < 0)
+		goto error;
+
+	/* We only need to reread the data if it became invalid - or if we
+	 * haven't read it yet.
+	 */
+	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+	    test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+		ret = i_size;
+		goto valid;
+	}
+
+	up_read(&dvnode->validate_lock);
+	if (down_write_killable(&dvnode->validate_lock) < 0)
+		goto error;
+
+	if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+		afs_invalidate_cache(dvnode, 0);
+
+	if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) ||
+	    !test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+		trace_afs_reload_dir(dvnode);
+		ret = afs_read_single(dvnode, file);
+		if (ret < 0)
+			goto error_unlock;
+
+		// TODO: Trim excess pages
+
+		set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+		set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
+	} else {
+		ret = i_size;
+	}
+
+	downgrade_write(&dvnode->validate_lock);
+valid:
+	return ret;
+
+error_unlock:
+	up_write(&dvnode->validate_lock);
+error:
+	_leave(" = %zd", ret);
+	return ret;
+}
+
+/*
  * deal with one block in an AFS directory
  */
-static int afs_dir_iterate_block(struct dir_context *ctx,
-				 union afs_dir_block *block,
-				 unsigned blkoff)
+static int afs_dir_iterate_block(struct afs_vnode *dvnode,
+				 struct dir_context *ctx,
+				 union afs_xdr_dir_block *block)
 {
-	union afs_dirent *dire;
-	unsigned offset, next, curr;
+	union afs_xdr_dirent *dire;
+	unsigned int blknum, base, hdr, pos, next, nr_slots;
 	size_t nlen;
 	int tmp;
 
-	_enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block);
+	blknum	= ctx->pos / AFS_DIR_BLOCK_SIZE;
+	base	= blknum * AFS_DIR_SLOTS_PER_BLOCK;
+	hdr	= (blknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+	pos	= DIV_ROUND_UP(ctx->pos, AFS_DIR_DIRENT_SIZE) - base;
 
-	curr = (ctx->pos - blkoff) / sizeof(union afs_dirent);
+	_enter("%llx,%x", ctx->pos, blknum);
 
 	/* walk through the block, an entry at a time */
-	for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries;
-	     offset < AFS_DIRENT_PER_BLOCK;
-	     offset = next
-	     ) {
-		next = offset + 1;
-
+	for (unsigned int slot = hdr; slot < AFS_DIR_SLOTS_PER_BLOCK; slot = next) {
 		/* skip entries marked unused in the bitmap */
-		if (!(block->pagehdr.bitmap[offset / 8] &
-		      (1 << (offset % 8)))) {
-			_debug("ENT[%Zu.%u]: unused",
-			       blkoff / sizeof(union afs_dir_block), offset);
-			if (offset >= curr)
-				ctx->pos = blkoff +
-					next * sizeof(union afs_dirent);
+		if (!(block->hdr.bitmap[slot / 8] &
+		      (1 << (slot % 8)))) {
+			_debug("ENT[%x]: Unused", base + slot);
+			next = slot + 1;
+			if (next >= pos)
+				ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
 			continue;
 		}
 
 		/* got a valid entry */
-		dire = &block->dirents[offset];
+		dire = &block->dirents[slot];
 		nlen = strnlen(dire->u.name,
-			       sizeof(*block) -
-			       offset * sizeof(union afs_dirent));
+			       (unsigned long)(block + 1) - (unsigned long)dire->u.name - 1);
+		if (nlen > AFSNAMEMAX - 1) {
+			_debug("ENT[%x]: Name too long (len %zx)",
+			       base + slot, nlen);
+			return afs_bad(dvnode, afs_file_error_dir_name_too_long);
+		}
 
-		_debug("ENT[%Zu.%u]: %s %Zu \"%s\"",
-		       blkoff / sizeof(union afs_dir_block), offset,
-		       (offset < curr ? "skip" : "fill"),
+		_debug("ENT[%x]: %s %zx \"%s\"",
+		       base + slot, (slot < pos ? "skip" : "fill"),
 		       nlen, dire->u.name);
 
-		/* work out where the next possible entry is */
-		for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_dirent)) {
-			if (next >= AFS_DIRENT_PER_BLOCK) {
-				_debug("ENT[%Zu.%u]:"
-				       " %u travelled beyond end dir block"
-				       " (len %u/%Zu)",
-				       blkoff / sizeof(union afs_dir_block),
-				       offset, next, tmp, nlen);
-				return -EIO;
-			}
-			if (!(block->pagehdr.bitmap[next / 8] &
-			      (1 << (next % 8)))) {
-				_debug("ENT[%Zu.%u]:"
-				       " %u unmarked extension (len %u/%Zu)",
-				       blkoff / sizeof(union afs_dir_block),
-				       offset, next, tmp, nlen);
-				return -EIO;
-			}
+		nr_slots = afs_dir_calc_slots(nlen);
+		next = slot + nr_slots;
+		if (next > AFS_DIR_SLOTS_PER_BLOCK) {
+			_debug("ENT[%x]: extends beyond end dir block (len %zx)",
+			       base + slot, nlen);
+			return afs_bad(dvnode, afs_file_error_dir_over_end);
+		}
 
-			_debug("ENT[%Zu.%u]: ext %u/%Zu",
-			       blkoff / sizeof(union afs_dir_block),
-			       next, tmp, nlen);
-			next++;
+		/* Check that the name-extension dirents are all allocated */
+		for (tmp = 1; tmp < nr_slots; tmp++) {
+			unsigned int xslot = slot + tmp;
+
+			if (!(block->hdr.bitmap[xslot / 8] & (1 << (xslot % 8)))) {
+				_debug("ENT[%x]: Unmarked extension (%x/%x)",
+				       base + slot, tmp, nr_slots);
+				return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
+			}
 		}
 
 		/* skip if starts before the current position */
-		if (offset < curr)
+		if (slot < pos) {
+			if (next > pos)
+				ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
 			continue;
+		}
 
 		/* found the next entry */
 		if (!dir_emit(ctx, dire->u.name, nlen,
 			      ntohl(dire->u.vnode),
-			      ctx->actor == afs_lookup_filldir ?
+			      (ctx->actor == afs_lookup_filldir ||
+			       ctx->actor == afs_lookup_one_filldir)?
 			      ntohl(dire->u.unique) : DT_UNKNOWN)) {
 			_leave(" = 0 [full]");
 			return 0;
 		}
 
-		ctx->pos = blkoff + next * sizeof(union afs_dirent);
+		ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
 	}
 
 	_leave(" = 1 [more]");
 	return 1;
 }
 
+struct afs_dir_iteration_ctx {
+	struct dir_context	*dir_ctx;
+	int			error;
+};
+
 /*
- * iterate through the data blob that lists the contents of an AFS directory
+ * Iterate through a kmapped directory segment.
  */
-static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
-			   struct key *key)
+static size_t afs_dir_iterate_step(void *iter_base, size_t progress, size_t len,
+				   void *priv, void *priv2)
 {
-	union afs_dir_block *dblock;
-	struct afs_dir_page *dbuf;
-	struct page *page;
-	unsigned blkoff, limit;
+	struct afs_dir_iteration_ctx *ctx = priv2;
+	struct afs_vnode *dvnode = priv;
 	int ret;
 
-	_enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
-
-	if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
-		_leave(" = -ESTALE");
-		return -ESTALE;
+	if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+			 len % AFS_DIR_BLOCK_SIZE)) {
+		pr_err("Mis-iteration prog=%zx len=%zx\n",
+		       progress % AFS_DIR_BLOCK_SIZE,
+		       len % AFS_DIR_BLOCK_SIZE);
+		return len;
 	}
 
-	/* round the file position up to the next entry boundary */
-	ctx->pos += sizeof(union afs_dirent) - 1;
-	ctx->pos &= ~(sizeof(union afs_dirent) - 1);
+	do {
+		ret = afs_dir_iterate_block(dvnode, ctx->dir_ctx, iter_base);
+		if (ret != 1)
+			break;
 
-	/* walk through the blocks in sequence */
-	ret = 0;
-	while (ctx->pos < dir->i_size) {
-		blkoff = ctx->pos & ~(sizeof(union afs_dir_block) - 1);
+		ctx->dir_ctx->pos = round_up(ctx->dir_ctx->pos, AFS_DIR_BLOCK_SIZE);
+		iter_base += AFS_DIR_BLOCK_SIZE;
+		len -= AFS_DIR_BLOCK_SIZE;
+	} while (len > 0);
 
-		/* fetch the appropriate page from the directory */
-		page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key);
-		if (IS_ERR(page)) {
-			ret = PTR_ERR(page);
-			break;
-		}
+	return len;
+}
 
-		limit = blkoff & ~(PAGE_SIZE - 1);
+/*
+ * Iterate through the directory folios.
+ */
+static int afs_dir_iterate_contents(struct inode *dir, struct dir_context *dir_ctx)
+{
+	struct afs_dir_iteration_ctx ctx = { .dir_ctx = dir_ctx };
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	struct iov_iter iter;
+	unsigned long long i_size = i_size_read(dir);
 
-		dbuf = page_address(page);
+	/* Round the file position up to the next entry boundary */
+	dir_ctx->pos = round_up(dir_ctx->pos, sizeof(union afs_xdr_dirent));
 
-		/* deal with the individual blocks stashed on this page */
-		do {
-			dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) /
-					       sizeof(union afs_dir_block)];
-			ret = afs_dir_iterate_block(ctx, dblock, blkoff);
-			if (ret != 1) {
-				afs_dir_put_page(page);
-				goto out;
-			}
+	if (i_size <= 0 || dir_ctx->pos >= i_size)
+		return 0;
 
-			blkoff += sizeof(union afs_dir_block);
+	iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+	iov_iter_advance(&iter, round_down(dir_ctx->pos, AFS_DIR_BLOCK_SIZE));
 
-		} while (ctx->pos < dir->i_size && blkoff < limit);
+	iterate_folioq(&iter, iov_iter_count(&iter), dvnode, &ctx,
+		       afs_dir_iterate_step);
 
-		afs_dir_put_page(page);
-		ret = 0;
-	}
+	if (ctx.error == -ESTALE)
+		afs_invalidate_dir(dvnode, afs_dir_invalid_iter_stale);
+	return ctx.error;
+}
+
+/*
+ * iterate through the data blob that lists the contents of an AFS directory
+ */
+static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
+			   struct file *file, afs_dataversion_t *_dir_version)
+{
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	int retry_limit = 100;
+	int ret;
+
+	_enter("{%lu},%llx,,", dir->i_ino, ctx->pos);
+
+	do {
+		if (--retry_limit < 0) {
+			pr_warn("afs_read_dir(): Too many retries\n");
+			ret = -ESTALE;
+			break;
+		}
+		ret = afs_read_dir(dvnode, file);
+		if (ret < 0) {
+			if (ret != -ESTALE)
+				break;
+			if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
+				ret = -ESTALE;
+				break;
+			}
+			continue;
+		}
+		*_dir_version = inode_peek_iversion_raw(dir);
+
+		ret = afs_dir_iterate_contents(dir, ctx);
+		up_read(&dvnode->validate_lock);
+	} while (ret == -ESTALE);
 
-out:
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -382,109 +556,408 @@ out:
  */
 static int afs_readdir(struct file *file, struct dir_context *ctx)
 {
-	return afs_dir_iterate(file_inode(file), 
-			      ctx, file->private_data);
+	afs_dataversion_t dir_version;
+
+	return afs_dir_iterate(file_inode(file), ctx, file, &dir_version);
 }
 
 /*
- * search the directory for a name
+ * Search the directory for a single name
  * - if afs_dir_iterate_block() spots this function, it'll pass the FID
  *   uniquifier through dtype
  */
-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
-			      loff_t fpos, u64 ino, unsigned dtype)
+static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
+				  int nlen, loff_t fpos, u64 ino, unsigned dtype)
 {
-	struct afs_lookup_cookie *cookie = _cookie;
+	struct afs_lookup_one_cookie *cookie =
+		container_of(ctx, struct afs_lookup_one_cookie, ctx);
 
 	_enter("{%s,%u},%s,%u,,%llu,%u",
 	       cookie->name.name, cookie->name.len, name, nlen,
 	       (unsigned long long) ino, dtype);
 
 	/* insanity checks first */
-	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
-	BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
+	BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
+	BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
 
 	if (cookie->name.len != nlen ||
 	    memcmp(cookie->name.name, name, nlen) != 0) {
-		_leave(" = 0 [no]");
-		return 0;
+		_leave(" = true [keep looking]");
+		return true;
 	}
 
 	cookie->fid.vnode = ino;
 	cookie->fid.unique = dtype;
 	cookie->found = 1;
 
-	_leave(" = -1 [found]");
-	return -1;
+	_leave(" = false [found]");
+	return false;
 }
 
 /*
- * do a lookup in a directory
+ * Do a lookup of a single name in a directory
  * - just returns the FID the dentry name maps to if found
  */
-static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
-			 struct afs_fid *fid, struct key *key)
+static int afs_do_lookup_one(struct inode *dir, const struct qstr *name,
+			     struct afs_fid *fid,
+			     afs_dataversion_t *_dir_version)
 {
 	struct afs_super_info *as = dir->i_sb->s_fs_info;
-	struct afs_lookup_cookie cookie = {
-		.ctx.actor = afs_lookup_filldir,
-		.name = dentry->d_name,
+	struct afs_lookup_one_cookie cookie = {
+		.ctx.actor = afs_lookup_one_filldir,
+		.name = *name,
 		.fid.vid = as->volume->vid
 	};
 	int ret;
 
-	_enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name);
+	_enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name);
 
 	/* search the directory */
-	ret = afs_dir_iterate(dir, &cookie.ctx, key);
+	ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version);
 	if (ret < 0) {
 		_leave(" = %d [iter]", ret);
 		return ret;
 	}
 
-	ret = -ENOENT;
 	if (!cookie.found) {
 		_leave(" = -ENOENT [not found]");
 		return -ENOENT;
 	}
 
 	*fid = cookie.fid;
-	_leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique);
+	_leave(" = 0 { vn=%llu u=%u }", fid->vnode, fid->unique);
 	return 0;
 }
 
 /*
- * Try to auto mount the mountpoint with pseudo directory, if the autocell
- * operation is setted.
+ * search the directory for a name
+ * - if afs_dir_iterate_block() spots this function, it'll pass the FID
+ *   uniquifier through dtype
+ */
+static bool afs_lookup_filldir(struct dir_context *ctx, const char *name,
+			      int nlen, loff_t fpos, u64 ino, unsigned dtype)
+{
+	struct afs_lookup_cookie *cookie =
+		container_of(ctx, struct afs_lookup_cookie, ctx);
+
+	_enter("{%s,%u},%s,%u,,%llu,%u",
+	       cookie->name.name, cookie->name.len, name, nlen,
+	       (unsigned long long) ino, dtype);
+
+	/* insanity checks first */
+	BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
+	BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
+
+	if (cookie->nr_fids < 50) {
+		cookie->fids[cookie->nr_fids].vnode	= ino;
+		cookie->fids[cookie->nr_fids].unique	= dtype;
+		cookie->nr_fids++;
+	}
+
+	return cookie->nr_fids < 50;
+}
+
+/*
+ * Deal with the result of a successful lookup operation.  Turn all the files
+ * into inodes and save the first one - which is the one we actually want.
  */
-static struct inode *afs_try_auto_mntpt(
-	int ret, struct dentry *dentry, struct inode *dir, struct key *key,
-	struct afs_fid *fid)
+static void afs_do_lookup_success(struct afs_operation *op)
 {
-	const char *devname = dentry->d_name.name;
-	struct afs_vnode *vnode = AFS_FS_I(dir);
+	struct afs_vnode_param *vp;
+	struct afs_vnode *vnode;
 	struct inode *inode;
+	u32 abort_code;
+	int i;
+
+	_enter("");
+
+	for (i = 0; i < op->nr_files; i++) {
+		switch (i) {
+		case 0:
+			vp = &op->file[0];
+			abort_code = vp->scb.status.abort_code;
+			if (abort_code != 0) {
+				op->call_abort_code = abort_code;
+				afs_op_set_error(op, afs_abort_to_error(abort_code));
+				op->cumul_error.abort_code = abort_code;
+			}
+			break;
+
+		case 1:
+			vp = &op->file[1];
+			break;
+
+		default:
+			vp = &op->more_files[i - 2];
+			break;
+		}
+
+		if (vp->scb.status.abort_code)
+			trace_afs_bulkstat_error(op, &vp->fid, i, vp->scb.status.abort_code);
+		if (!vp->scb.have_status && !vp->scb.have_error)
+			continue;
+
+		_debug("do [%u]", i);
+		if (vp->vnode) {
+			if (!test_bit(AFS_VNODE_UNSET, &vp->vnode->flags))
+				afs_vnode_commit_status(op, vp);
+		} else if (vp->scb.status.abort_code == 0) {
+			inode = afs_iget(op, vp);
+			if (!IS_ERR(inode)) {
+				vnode = AFS_FS_I(inode);
+				afs_cache_permit(vnode, op->key,
+						 0 /* Assume vnode->cb_break is 0 */ +
+						 op->cb_v_break,
+						 &vp->scb);
+				vp->vnode = vnode;
+				vp->put_vnode = true;
+			}
+		} else {
+			_debug("- abort %d %llx:%llx.%x",
+			       vp->scb.status.abort_code,
+			       vp->fid.vid, vp->fid.vnode, vp->fid.unique);
+		}
+	}
+
+	_leave("");
+}
+
+static const struct afs_operation_ops afs_inline_bulk_status_operation = {
+	.issue_afs_rpc	= afs_fs_inline_bulk_status,
+	.issue_yfs_rpc	= yfs_fs_inline_bulk_status,
+	.success	= afs_do_lookup_success,
+};
+
+static const struct afs_operation_ops afs_lookup_fetch_status_operation = {
+	.issue_afs_rpc	= afs_fs_fetch_status,
+	.issue_yfs_rpc	= yfs_fs_fetch_status,
+	.success	= afs_do_lookup_success,
+	.aborted	= afs_check_for_remote_deletion,
+};
+
+/*
+ * See if we know that the server we expect to use doesn't support
+ * FS.InlineBulkStatus.
+ */
+static bool afs_server_supports_ibulk(struct afs_vnode *dvnode)
+{
+	struct afs_server_list *slist;
+	struct afs_volume *volume = dvnode->volume;
+	struct afs_server *server;
+	bool ret = true;
+	int i;
+
+	if (!test_bit(AFS_VOLUME_MAYBE_NO_IBULK, &volume->flags))
+		return true;
+
+	rcu_read_lock();
+	slist = rcu_dereference(volume->servers);
 
-	_enter("%d, %p{%s}, {%x:%u}, %p",
-	       ret, dentry, devname, vnode->fid.vid, vnode->fid.vnode, key);
+	for (i = 0; i < slist->nr_servers; i++) {
+		server = slist->servers[i].server;
+		if (server == dvnode->cb_server) {
+			if (test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags))
+				ret = false;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
 
-	if (ret != -ENOENT ||
-	    !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
+/*
+ * Do a lookup in a directory.  We make use of bulk lookup to query a slew of
+ * files in one go and create inodes for them.  The inode of the file we were
+ * asked for is returned.
+ */
+static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
+{
+	struct afs_lookup_cookie *cookie;
+	struct afs_vnode_param *vp;
+	struct afs_operation *op;
+	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
+	struct inode *inode = NULL, *ti;
+	afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
+	bool supports_ibulk, isnew;
+	long ret;
+	int i;
+
+	_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
+
+	cookie = kzalloc(sizeof(struct afs_lookup_cookie), GFP_KERNEL);
+	if (!cookie)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < ARRAY_SIZE(cookie->fids); i++)
+		cookie->fids[i].vid = dvnode->fid.vid;
+	cookie->ctx.actor = afs_lookup_filldir;
+	cookie->name = dentry->d_name;
+	cookie->nr_fids = 2; /* slot 1 is saved for the fid we actually want
+			      * and slot 0 for the directory */
+
+	/* Search the directory for the named entry using the hash table... */
+	ret = afs_dir_search(dvnode, &dentry->d_name, &cookie->fids[1], &data_version);
+	if (ret < 0)
 		goto out;
 
-	inode = afs_iget_autocell(dir, devname, strlen(devname), key);
-	if (IS_ERR(inode)) {
-		ret = PTR_ERR(inode);
+	supports_ibulk = afs_server_supports_ibulk(dvnode);
+	if (supports_ibulk) {
+		/* ...then scan linearly from that point for entries to lookup-ahead. */
+		cookie->ctx.pos = (ret + 1) * AFS_DIR_DIRENT_SIZE;
+		afs_dir_iterate(dir, &cookie->ctx, NULL, &data_version);
+	}
+
+	dentry->d_fsdata = (void *)(unsigned long)data_version;
+
+	/* Check to see if we already have an inode for the primary fid. */
+	inode = ilookup5(dir->i_sb, cookie->fids[1].vnode,
+			 afs_ilookup5_test_by_fid, &cookie->fids[1]);
+	if (inode)
+		goto out; /* We do */
+
+	/* Okay, we didn't find it.  We need to query the server - and whilst
+	 * we're doing that, we're going to attempt to look up a bunch of other
+	 * vnodes also.
+	 */
+	op = afs_alloc_operation(NULL, dvnode->volume);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
 		goto out;
 	}
 
-	*fid = AFS_FS_I(inode)->fid;
-	_leave("= %p", inode);
-	return inode;
+	afs_op_set_vnode(op, 0, dvnode);
+	afs_op_set_fid(op, 1, &cookie->fids[1]);
+
+	op->nr_files = cookie->nr_fids;
+	_debug("nr_files %u", op->nr_files);
+
+	/* Need space for examining all the selected files */
+	if (op->nr_files > 2) {
+		op->more_files = kvcalloc(op->nr_files - 2,
+					  sizeof(struct afs_vnode_param),
+					  GFP_KERNEL);
+		if (!op->more_files) {
+			afs_op_nomem(op);
+			goto out_op;
+		}
+
+		for (i = 2; i < op->nr_files; i++) {
+			vp = &op->more_files[i - 2];
+			vp->fid = cookie->fids[i];
+
+			/* Find any inodes that already exist and get their
+			 * callback counters.
+			 */
+			ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
+					     afs_ilookup5_test_by_fid, &vp->fid, &isnew);
+			if (!IS_ERR_OR_NULL(ti)) {
+				vnode = AFS_FS_I(ti);
+				vp->dv_before = vnode->status.data_version;
+				vp->cb_break_before = afs_calc_vnode_cb_break(vnode);
+				vp->vnode = vnode;
+				vp->put_vnode = true;
+				vp->speculative = true; /* vnode not locked */
+			}
+		}
+	}
+
+	/* Try FS.InlineBulkStatus first.  Abort codes for the individual
+	 * lookups contained therein are stored in the reply without aborting
+	 * the whole operation.
+	 */
+	afs_op_set_error(op, -ENOTSUPP);
+	if (supports_ibulk) {
+		op->ops = &afs_inline_bulk_status_operation;
+		afs_begin_vnode_operation(op);
+		afs_wait_for_operation(op);
+	}
 
+	if (afs_op_error(op) == -ENOTSUPP) {
+		/* We could try FS.BulkStatus next, but this aborts the entire
+		 * op if any of the lookups fails - so, for the moment, revert
+		 * to FS.FetchStatus for op->file[1].
+		 */
+		op->fetch_status.which = 1;
+		op->ops = &afs_lookup_fetch_status_operation;
+		afs_begin_vnode_operation(op);
+		afs_wait_for_operation(op);
+	}
+
+out_op:
+	if (!afs_op_error(op)) {
+		if (op->file[1].scb.status.abort_code) {
+			afs_op_accumulate_error(op, -ECONNABORTED,
+						op->file[1].scb.status.abort_code);
+		} else {
+			inode = &op->file[1].vnode->netfs.inode;
+			op->file[1].vnode = NULL;
+		}
+	}
+
+	if (op->file[0].scb.have_status)
+		dentry->d_fsdata = (void *)(unsigned long)op->file[0].scb.status.data_version;
+	else
+		dentry->d_fsdata = (void *)(unsigned long)op->file[0].dv_before;
+	ret = afs_put_operation(op);
 out:
-	_leave("= %d", ret);
-	return ERR_PTR(ret);
+	kfree(cookie);
+	_leave("");
+	return inode ?: ERR_PTR(ret);
+}
+
+/*
+ * Look up an entry in a directory with @sys substitution.
+ */
+static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry)
+{
+	struct afs_sysnames *subs;
+	struct afs_net *net = afs_i2net(dir);
+	struct dentry *ret;
+	char *buf, *p, *name;
+	int len, i;
+
+	_enter("");
+
+	ret = ERR_PTR(-ENOMEM);
+	p = buf = kmalloc(AFSNAMEMAX, GFP_KERNEL);
+	if (!buf)
+		goto out_p;
+	if (dentry->d_name.len > 4) {
+		memcpy(p, dentry->d_name.name, dentry->d_name.len - 4);
+		p += dentry->d_name.len - 4;
+	}
+
+	/* There is an ordered list of substitutes that we have to try. */
+	read_lock(&net->sysnames_lock);
+	subs = net->sysnames;
+	refcount_inc(&subs->usage);
+	read_unlock(&net->sysnames_lock);
+
+	for (i = 0; i < subs->nr; i++) {
+		name = subs->subs[i];
+		len = dentry->d_name.len - 4 + strlen(name);
+		if (len >= AFSNAMEMAX) {
+			ret = ERR_PTR(-ENAMETOOLONG);
+			goto out_s;
+		}
+
+		strcpy(p, name);
+		ret = lookup_noperm(&QSTR(buf), dentry->d_parent);
+		if (IS_ERR(ret) || d_is_positive(ret))
+			goto out_s;
+		dput(ret);
+	}
+
+	/* We don't want to d_add() the @sys dentry here as we don't want to
+	 * the cached dentry to hide changes to the sysnames list.
+	 */
+	ret = NULL;
+out_s:
+	afs_put_sysnames(subs);
+	kfree(buf);
+out_p:
+	return ret;
 }
 
 /*
@@ -493,79 +966,89 @@ out:
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 				 unsigned int flags)
 {
-	struct afs_vnode *vnode;
-	struct afs_fid fid;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	struct afs_fid fid = {};
 	struct inode *inode;
-	struct key *key;
+	struct dentry *d;
 	int ret;
 
-	vnode = AFS_FS_I(dir);
-
-	_enter("{%x:%u},%p{%s},",
-	       vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name);
+	_enter("{%llx:%llu},%p{%pd},",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry);
 
-	ASSERTCMP(dentry->d_inode, ==, NULL);
+	ASSERTCMP(d_inode(dentry), ==, NULL);
 
 	if (dentry->d_name.len >= AFSNAMEMAX) {
 		_leave(" = -ENAMETOOLONG");
 		return ERR_PTR(-ENAMETOOLONG);
 	}
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+	if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) {
 		_leave(" = -ESTALE");
 		return ERR_PTR(-ESTALE);
 	}
 
-	key = afs_request_key(vnode->volume->cell);
-	if (IS_ERR(key)) {
-		_leave(" = %ld [key]", PTR_ERR(key));
-		return ERR_CAST(key);
-	}
-
-	ret = afs_validate(vnode, key);
+	ret = afs_validate(dvnode, NULL);
 	if (ret < 0) {
-		key_put(key);
+		afs_dir_unuse_cookie(dvnode, ret);
 		_leave(" = %d [val]", ret);
 		return ERR_PTR(ret);
 	}
 
-	ret = afs_do_lookup(dir, dentry, &fid, key);
-	if (ret < 0) {
-		inode = afs_try_auto_mntpt(ret, dentry, dir, key, &fid);
-		if (!IS_ERR(inode)) {
-			key_put(key);
-			goto success;
-		}
-
-		ret = PTR_ERR(inode);
-		key_put(key);
-		if (ret == -ENOENT) {
-			d_add(dentry, NULL);
-			_leave(" = NULL [negative]");
-			return NULL;
-		}
-		_leave(" = %d [do]", ret);
-		return ERR_PTR(ret);
+	if (dentry->d_name.len >= 4 &&
+	    dentry->d_name.name[dentry->d_name.len - 4] == '@' &&
+	    dentry->d_name.name[dentry->d_name.len - 3] == 's' &&
+	    dentry->d_name.name[dentry->d_name.len - 2] == 'y' &&
+	    dentry->d_name.name[dentry->d_name.len - 1] == 's')
+		return afs_lookup_atsys(dir, dentry);
+
+	afs_stat_v(dvnode, n_lookup);
+	inode = afs_do_lookup(dir, dentry);
+	if (inode == ERR_PTR(-ENOENT))
+		inode = NULL;
+	else if (!IS_ERR_OR_NULL(inode))
+		fid = AFS_FS_I(inode)->fid;
+
+	_debug("splice %p", dentry->d_inode);
+	d = d_splice_alias(inode, dentry);
+	if (!IS_ERR_OR_NULL(d)) {
+		d->d_fsdata = dentry->d_fsdata;
+		trace_afs_lookup(dvnode, &d->d_name, &fid);
+	} else {
+		trace_afs_lookup(dvnode, &dentry->d_name, &fid);
 	}
-	dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version;
+	_leave("");
+	return d;
+}
 
-	/* instantiate the dentry */
-	inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL);
-	key_put(key);
-	if (IS_ERR(inode)) {
-		_leave(" = %ld", PTR_ERR(inode));
-		return ERR_CAST(inode);
-	}
+/*
+ * Check the validity of a dentry under RCU conditions.
+ */
+static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry)
+{
+	long dir_version, de_version;
+
+	_enter("%p", dentry);
+
+	if (test_bit(AFS_VNODE_DELETED, &dvnode->flags))
+		return -ECHILD;
 
-success:
-	d_add(dentry, inode);
-	_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }",
-	       fid.vnode,
-	       fid.unique,
-	       dentry->d_inode->i_ino,
-	       dentry->d_inode->i_generation);
+	if (!afs_check_validity(dvnode))
+		return -ECHILD;
 
-	return NULL;
+	/* We only need to invalidate a dentry if the server's copy changed
+	 * behind our back.  If we made the change, it's no problem.  Note that
+	 * on a 32-bit system, we only have 32 bits in the dentry to store the
+	 * version.
+	 */
+	dir_version = (long)READ_ONCE(dvnode->status.data_version);
+	de_version = (long)READ_ONCE(dentry->d_fsdata);
+	if (de_version != dir_version) {
+		dir_version = (long)READ_ONCE(dvnode->invalid_before);
+		if (de_version - dir_version < 0)
+			return -ECHILD;
+	}
+
+	return 1; /* Still valid */
 }
 
 /*
@@ -573,71 +1056,83 @@ success:
  * - NOTE! the hit can be a negative hit too, so we can't assume we have an
  *   inode
  */
-static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
+static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name,
+			    struct dentry *dentry, unsigned int flags)
 {
-	struct afs_vnode *vnode, *dir;
-	struct afs_fid uninitialized_var(fid);
-	struct dentry *parent;
+	struct afs_vnode *vnode, *dir = AFS_FS_I(parent_dir);
+	struct afs_fid fid;
+	struct inode *inode;
 	struct key *key;
-	void *dir_version;
+	afs_dataversion_t dir_version, invalid_before;
+	long de_version;
 	int ret;
 
 	if (flags & LOOKUP_RCU)
-		return -ECHILD;
+		return afs_d_revalidate_rcu(dir, dentry);
 
-	vnode = AFS_FS_I(dentry->d_inode);
-
-	if (dentry->d_inode)
-		_enter("{v={%x:%u} n=%s fl=%lx},",
-		       vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+	if (d_really_is_positive(dentry)) {
+		vnode = AFS_FS_I(d_inode(dentry));
+		_enter("{v={%llx:%llu} n=%pd fl=%lx},",
+		       vnode->fid.vid, vnode->fid.vnode, dentry,
 		       vnode->flags);
-	else
-		_enter("{neg n=%s}", dentry->d_name.name);
+	} else {
+		_enter("{neg n=%pd}", dentry);
+	}
 
 	key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
 	if (IS_ERR(key))
 		key = NULL;
 
-	/* lock down the parent dentry so we can peer at it */
-	parent = dget_parent(dentry);
-	if (!parent->d_inode)
-		goto out_bad;
-
-	dir = AFS_FS_I(parent->d_inode);
-
 	/* validate the parent directory */
-	if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
-		afs_validate(dir, key);
+	ret = afs_validate(dir, key);
+	if (ret == -ERESTARTSYS) {
+		key_put(key);
+		return ret;
+	}
 
 	if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
-		_debug("%s: parent dir deleted", dentry->d_name.name);
-		goto out_bad;
+		_debug("%pd: parent dir deleted", dentry);
+		goto not_found;
 	}
 
-	dir_version = (void *) (unsigned long) dir->status.data_version;
-	if (dentry->d_fsdata == dir_version)
-		goto out_valid; /* the dir contents are unchanged */
+	/* We only need to invalidate a dentry if the server's copy changed
+	 * behind our back.  If we made the change, it's no problem.  Note that
+	 * on a 32-bit system, we only have 32 bits in the dentry to store the
+	 * version.
+	 */
+	dir_version = dir->status.data_version;
+	de_version = (long)dentry->d_fsdata;
+	if (de_version == (long)dir_version)
+		goto out_valid_noupdate;
+
+	invalid_before = dir->invalid_before;
+	if (de_version - (long)invalid_before >= 0)
+		goto out_valid;
 
 	_debug("dir modified");
+	afs_stat_v(dir, n_reval);
 
 	/* search the directory for this vnode */
-	ret = afs_do_lookup(&dir->vfs_inode, dentry, &fid, key);
+	ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, &dir_version);
 	switch (ret) {
 	case 0:
 		/* the filename maps to something */
-		if (!dentry->d_inode)
-			goto out_bad;
-		if (is_bad_inode(dentry->d_inode)) {
-			printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
-			       parent->d_name.name, dentry->d_name.name);
-			goto out_bad;
+		if (d_really_is_negative(dentry))
+			goto not_found;
+		inode = d_inode(dentry);
+		if (is_bad_inode(inode)) {
+			printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
+			       dentry);
+			goto not_found;
 		}
 
+		vnode = AFS_FS_I(inode);
+
 		/* if the vnode ID has changed, then the dirent points to a
 		 * different file */
 		if (fid.vnode != vnode->fid.vnode) {
-			_debug("%s: dirent changed [%u != %u]",
-			       dentry->d_name.name, fid.vnode,
+			_debug("%pd: dirent changed [%llu != %llu]",
+			       dentry, fid.vnode,
 			       vnode->fid.vnode);
 			goto not_found;
 		}
@@ -646,56 +1141,35 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		 * been deleted and replaced, and the original vnode ID has
 		 * been reused */
 		if (fid.unique != vnode->fid.unique) {
-			_debug("%s: file deleted (uq %u -> %u I:%u)",
-			       dentry->d_name.name, fid.unique,
+			_debug("%pd: file deleted (uq %u -> %u I:%u)",
+			       dentry, fid.unique,
 			       vnode->fid.unique,
-			       dentry->d_inode->i_generation);
-			spin_lock(&vnode->lock);
-			set_bit(AFS_VNODE_DELETED, &vnode->flags);
-			spin_unlock(&vnode->lock);
+			       vnode->netfs.inode.i_generation);
 			goto not_found;
 		}
 		goto out_valid;
 
 	case -ENOENT:
 		/* the filename is unknown */
-		_debug("%s: dirent not found", dentry->d_name.name);
-		if (dentry->d_inode)
+		_debug("%pd: dirent not found", dentry);
+		if (d_really_is_positive(dentry))
 			goto not_found;
 		goto out_valid;
 
 	default:
-		_debug("failed to iterate dir %s: %d",
-		       parent->d_name.name, ret);
-		goto out_bad;
+		_debug("failed to iterate parent %pd2: %d", dentry, ret);
+		goto not_found;
 	}
 
 out_valid:
-	dentry->d_fsdata = dir_version;
-out_skip:
-	dput(parent);
+	dentry->d_fsdata = (void *)(unsigned long)dir_version;
+out_valid_noupdate:
 	key_put(key);
 	_leave(" = 1 [valid]");
 	return 1;
 
-	/* the dirent, if it exists, now points to a different vnode */
 not_found:
-	spin_lock(&dentry->d_lock);
-	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
-	spin_unlock(&dentry->d_lock);
-
-out_bad:
-	if (dentry->d_inode) {
-		/* don't unhash if we have submounts */
-		if (have_submounts(dentry))
-			goto out_skip;
-	}
-
-	_debug("dropping dentry %s/%s",
-	       parent->d_name.name, dentry->d_name.name);
-	shrink_dcache_parent(dentry);
-	d_drop(dentry);
-	dput(parent);
+	_debug("dropping dentry %pd2", dentry);
 	key_put(key);
 
 	_leave(" = 0 [bad]");
@@ -710,14 +1184,14 @@ out_bad:
  */
 static int afs_d_delete(const struct dentry *dentry)
 {
-	_enter("%s", dentry->d_name.name);
+	_enter("%pd", dentry);
 
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
 		goto zap;
 
-	if (dentry->d_inode &&
-	    (test_bit(AFS_VNODE_DELETED,   &AFS_FS_I(dentry->d_inode)->flags) ||
-	     test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(dentry->d_inode)->flags)))
+	if (d_really_is_positive(dentry) &&
+	    (test_bit(AFS_VNODE_DELETED,   &AFS_FS_I(d_inode(dentry))->flags) ||
+	     test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(d_inode(dentry))->flags)))
 		goto zap;
 
 	_leave(" = 0 [keep]");
@@ -729,332 +1203,588 @@ zap:
 }
 
 /*
- * handle dentry release
+ * Clean up sillyrename files on dentry removal.
  */
-static void afs_d_release(struct dentry *dentry)
+static void afs_d_iput(struct dentry *dentry, struct inode *inode)
 {
-	_enter("%s", dentry->d_name.name);
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		afs_silly_iput(dentry, inode);
+	iput(inode);
 }
 
 /*
- * create a directory on an AFS filesystem
+ * handle dentry release
  */
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+void afs_d_release(struct dentry *dentry)
 {
-	struct afs_file_status status;
-	struct afs_callback cb;
-	struct afs_server *server;
-	struct afs_vnode *dvnode, *vnode;
-	struct afs_fid fid;
-	struct inode *inode;
-	struct key *key;
-	int ret;
+	_enter("%pd", dentry);
+}
 
-	dvnode = AFS_FS_I(dir);
+void afs_check_for_remote_deletion(struct afs_operation *op)
+{
+	struct afs_vnode *vnode = op->file[0].vnode;
 
-	_enter("{%x:%u},{%s},%ho",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
+	switch (afs_op_abort_code(op)) {
+	case VNOVNODE:
+		set_bit(AFS_VNODE_DELETED, &vnode->flags);
+		clear_nlink(&vnode->netfs.inode);
+		afs_break_callback(vnode, afs_cb_break_for_deleted);
+	}
+}
 
-	ret = -ENAMETOOLONG;
-	if (dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
+/*
+ * Create a new inode for create/mkdir/symlink
+ */
+static void afs_vnode_new_inode(struct afs_operation *op)
+{
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	struct afs_vnode *vnode;
+	struct inode *inode;
 
-	key = afs_request_key(dvnode->volume->cell);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
-		goto error;
-	}
+	_enter("");
 
-	mode |= S_IFDIR;
-	ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
-			       mode, &fid, &status, &cb, &server);
-	if (ret < 0)
-		goto mkdir_error;
+	ASSERTCMP(afs_op_error(op), ==, 0);
 
-	inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
+	inode = afs_iget(op, vp);
 	if (IS_ERR(inode)) {
-		/* ENOMEM at a really inconvenient time - just abandon the new
-		 * directory on the server */
-		ret = PTR_ERR(inode);
-		goto iget_error;
+		/* ENOMEM or EINTR at a really inconvenient time - just abandon
+		 * the new directory on the server.
+		 */
+		afs_op_accumulate_error(op, PTR_ERR(inode), 0);
+		return;
 	}
 
-	/* apply the status report we've got for the new vnode */
 	vnode = AFS_FS_I(inode);
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-	afs_vnode_finalise_status_update(vnode, server);
-	afs_put_server(server);
-
-	d_instantiate(dentry, inode);
-	if (d_unhashed(dentry)) {
-		_debug("not hashed");
-		d_rehash(dentry);
+	set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+	if (S_ISDIR(inode->i_mode))
+		afs_mkdir_init_dir(vnode, dvp->vnode);
+	else if (S_ISLNK(inode->i_mode))
+		afs_init_new_symlink(vnode, op);
+	if (!afs_op_error(op))
+		afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb);
+	d_instantiate(op->dentry, inode);
+}
+
+static void afs_create_success(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+	op->ctime = op->file[0].scb.status.mtime_client;
+	afs_vnode_commit_status(op, &op->file[0]);
+	afs_update_dentry_version(op, &op->file[0], op->dentry);
+	afs_vnode_new_inode(op);
+}
+
+static void afs_create_edit_dir(struct afs_operation *op)
+{
+	struct netfs_cache_resources cres = {};
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	struct afs_vnode *dvnode = dvp->vnode;
+
+	_enter("op=%08x", op->debug_id);
+
+	fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
+	down_write(&dvnode->validate_lock);
+	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+	    dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
+		afs_edit_dir_add(dvnode, &op->dentry->d_name, &vp->fid,
+				 op->create.reason);
+	up_write(&dvnode->validate_lock);
+	fscache_end_operation(&cres);
+}
+
+static void afs_create_put(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+
+	if (afs_op_error(op))
+		d_drop(op->dentry);
+}
+
+static const struct afs_operation_ops afs_mkdir_operation = {
+	.issue_afs_rpc	= afs_fs_make_dir,
+	.issue_yfs_rpc	= yfs_fs_make_dir,
+	.success	= afs_create_success,
+	.aborted	= afs_check_for_remote_deletion,
+	.edit_dir	= afs_create_edit_dir,
+	.put		= afs_create_put,
+};
+
+/*
+ * create a directory on an AFS filesystem
+ */
+static struct dentry *afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+				struct dentry *dentry, umode_t mode)
+{
+	struct afs_operation *op;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	int ret;
+
+	_enter("{%llx:%llu},{%pd},%ho",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
+
+	op = afs_alloc_operation(NULL, dvnode->volume);
+	if (IS_ERR(op)) {
+		d_drop(dentry);
+		return ERR_CAST(op);
 	}
-	key_put(key);
-	_leave(" = 0");
-	return 0;
 
-iget_error:
-	afs_put_server(server);
-mkdir_error:
-	key_put(key);
-error:
-	d_drop(dentry);
-	_leave(" = %d", ret);
-	return ret;
+	fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
+	afs_op_set_vnode(op, 0, dvnode);
+	op->file[0].dv_delta = 1;
+	op->file[0].modification = true;
+	op->file[0].update_ctime = true;
+	op->dentry	= dentry;
+	op->create.mode	= S_IFDIR | mode;
+	op->create.reason = afs_edit_dir_for_mkdir;
+	op->mtime	= current_time(dir);
+	op->ops		= &afs_mkdir_operation;
+	ret = afs_do_sync_operation(op);
+	afs_dir_unuse_cookie(dvnode, ret);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Remove a subdir from a directory.
+ */
+static void afs_dir_remove_subdir(struct dentry *dentry)
+{
+	if (d_really_is_positive(dentry)) {
+		struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+
+		clear_nlink(&vnode->netfs.inode);
+		set_bit(AFS_VNODE_DELETED, &vnode->flags);
+		afs_clear_cb_promise(vnode, afs_cb_promise_clear_rmdir);
+		afs_invalidate_dir(vnode, afs_dir_invalid_subdir_removed);
+	}
 }
 
+static void afs_rmdir_success(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+	op->ctime = op->file[0].scb.status.mtime_client;
+	afs_vnode_commit_status(op, &op->file[0]);
+	afs_update_dentry_version(op, &op->file[0], op->dentry);
+}
+
+static void afs_rmdir_edit_dir(struct afs_operation *op)
+{
+	struct netfs_cache_resources cres = {};
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode *dvnode = dvp->vnode;
+
+	_enter("op=%08x", op->debug_id);
+	afs_dir_remove_subdir(op->dentry);
+
+	fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
+	down_write(&dvnode->validate_lock);
+	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+	    dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
+		afs_edit_dir_remove(dvnode, &op->dentry->d_name,
+				    afs_edit_dir_for_rmdir);
+	up_write(&dvnode->validate_lock);
+	fscache_end_operation(&cres);
+}
+
+static void afs_rmdir_put(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+	if (op->file[1].vnode)
+		up_write(&op->file[1].vnode->rmdir_lock);
+}
+
+static const struct afs_operation_ops afs_rmdir_operation = {
+	.issue_afs_rpc	= afs_fs_remove_dir,
+	.issue_yfs_rpc	= yfs_fs_remove_dir,
+	.success	= afs_rmdir_success,
+	.aborted	= afs_check_for_remote_deletion,
+	.edit_dir	= afs_rmdir_edit_dir,
+	.put		= afs_rmdir_put,
+};
+
 /*
  * remove a directory from an AFS filesystem
  */
 static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	struct afs_vnode *dvnode, *vnode;
-	struct key *key;
+	struct afs_operation *op;
+	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
 	int ret;
 
-	dvnode = AFS_FS_I(dir);
+	_enter("{%llx:%llu},{%pd}",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
-	_enter("{%x:%u},{%s}",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+	op = afs_alloc_operation(NULL, dvnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
 
-	ret = -ENAMETOOLONG;
-	if (dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
+	fscache_use_cookie(afs_vnode_cache(dvnode), true);
 
-	key = afs_request_key(dvnode->volume->cell);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
-		goto error;
-	}
+	afs_op_set_vnode(op, 0, dvnode);
+	op->file[0].dv_delta = 1;
+	op->file[0].modification = true;
+	op->file[0].update_ctime = true;
 
-	ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true);
-	if (ret < 0)
-		goto rmdir_error;
+	op->dentry	= dentry;
+	op->ops		= &afs_rmdir_operation;
 
-	if (dentry->d_inode) {
-		vnode = AFS_FS_I(dentry->d_inode);
-		clear_nlink(&vnode->vfs_inode);
-		set_bit(AFS_VNODE_DELETED, &vnode->flags);
-		afs_discard_callback_on_delete(vnode);
+	/* Try to make sure we have a callback promise on the victim. */
+	if (d_really_is_positive(dentry)) {
+		vnode = AFS_FS_I(d_inode(dentry));
+		ret = afs_validate(vnode, op->key);
+		if (ret < 0)
+			goto error;
 	}
 
-	key_put(key);
-	_leave(" = 0");
-	return 0;
+	if (vnode) {
+		ret = down_write_killable(&vnode->rmdir_lock);
+		if (ret < 0)
+			goto error;
+		op->file[1].vnode = vnode;
+	}
 
-rmdir_error:
-	key_put(key);
-error:
-	_leave(" = %d", ret);
+	ret = afs_do_sync_operation(op);
+
+	/* Not all systems that can host afs servers have ENOTEMPTY. */
+	if (ret == -EEXIST)
+		ret = -ENOTEMPTY;
+out:
+	afs_dir_unuse_cookie(dvnode, ret);
 	return ret;
+
+error:
+	ret = afs_put_operation(op);
+	goto out;
 }
 
 /*
- * remove a file from an AFS filesystem
+ * Remove a link to a file or symlink from a directory.
+ *
+ * If the file was not deleted due to excess hard links, the fileserver will
+ * break the callback promise on the file - if it had one - before it returns
+ * to us, and if it was deleted, it won't
+ *
+ * However, if we didn't have a callback promise outstanding, or it was
+ * outstanding on a different server, then it won't break it either...
  */
-static int afs_unlink(struct inode *dir, struct dentry *dentry)
+static void afs_dir_remove_link(struct afs_operation *op)
 {
-	struct afs_vnode *dvnode, *vnode;
-	struct key *key;
+	struct afs_vnode *dvnode = op->file[0].vnode;
+	struct afs_vnode *vnode = op->file[1].vnode;
+	struct dentry *dentry = op->dentry;
 	int ret;
 
-	dvnode = AFS_FS_I(dir);
+	if (afs_op_error(op) ||
+	    (op->file[1].scb.have_status && op->file[1].scb.have_error))
+		return;
+	if (d_really_is_positive(dentry))
+		return;
 
-	_enter("{%x:%u},{%s}",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+		/* Already done */
+	} else if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+		write_seqlock(&vnode->cb_lock);
+		drop_nlink(&vnode->netfs.inode);
+		if (vnode->netfs.inode.i_nlink == 0) {
+			set_bit(AFS_VNODE_DELETED, &vnode->flags);
+			__afs_break_callback(vnode, afs_cb_break_for_unlink);
+		}
+		write_sequnlock(&vnode->cb_lock);
+	} else {
+		afs_break_callback(vnode, afs_cb_break_for_unlink);
 
-	ret = -ENAMETOOLONG;
-	if (dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
+		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+			_debug("AFS_VNODE_DELETED");
 
-	key = afs_request_key(dvnode->volume->cell);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
-		goto error;
+		ret = afs_validate(vnode, op->key);
+		if (ret != -ESTALE)
+			afs_op_set_error(op, ret);
 	}
 
-	if (dentry->d_inode) {
-		vnode = AFS_FS_I(dentry->d_inode);
+	_debug("nlink %d [val %d]", vnode->netfs.inode.i_nlink, afs_op_error(op));
+}
+
+static void afs_unlink_success(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+	op->ctime = op->file[0].scb.status.mtime_client;
+	afs_check_dir_conflict(op, &op->file[0]);
+	afs_vnode_commit_status(op, &op->file[0]);
+	afs_vnode_commit_status(op, &op->file[1]);
+	afs_update_dentry_version(op, &op->file[0], op->dentry);
+	afs_dir_remove_link(op);
+}
 
-		/* make sure we have a callback promise on the victim */
-		ret = afs_validate(vnode, key);
-		if (ret < 0)
-			goto error;
+static void afs_unlink_edit_dir(struct afs_operation *op)
+{
+	struct netfs_cache_resources cres = {};
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode *dvnode = dvp->vnode;
+
+	_enter("op=%08x", op->debug_id);
+	fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
+	down_write(&dvnode->validate_lock);
+	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+	    dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
+		afs_edit_dir_remove(dvnode, &op->dentry->d_name,
+				    afs_edit_dir_for_unlink);
+	up_write(&dvnode->validate_lock);
+	fscache_end_operation(&cres);
+}
+
+static void afs_unlink_put(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+	if (op->unlink.need_rehash && afs_op_error(op) < 0 && afs_op_error(op) != -ENOENT)
+		d_rehash(op->dentry);
+}
+
+static const struct afs_operation_ops afs_unlink_operation = {
+	.issue_afs_rpc	= afs_fs_remove_file,
+	.issue_yfs_rpc	= yfs_fs_remove_file,
+	.success	= afs_unlink_success,
+	.aborted	= afs_check_for_remote_deletion,
+	.edit_dir	= afs_unlink_edit_dir,
+	.put		= afs_unlink_put,
+};
+
+/*
+ * Remove a file or symlink from an AFS filesystem.
+ */
+static int afs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct afs_operation *op;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+	int ret;
+
+	_enter("{%llx:%llu},{%pd}",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry);
+
+	if (dentry->d_name.len >= AFSNAMEMAX)
+		return -ENAMETOOLONG;
+
+	op = afs_alloc_operation(NULL, dvnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
+	afs_op_set_vnode(op, 0, dvnode);
+	op->file[0].dv_delta = 1;
+	op->file[0].modification = true;
+	op->file[0].update_ctime = true;
+
+	/* Try to make sure we have a callback promise on the victim. */
+	ret = afs_validate(vnode, op->key);
+	if (ret < 0) {
+		afs_op_set_error(op, ret);
+		goto error;
 	}
 
-	ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false);
-	if (ret < 0)
-		goto remove_error;
-
-	if (dentry->d_inode) {
-		/* if the file wasn't deleted due to excess hard links, the
-		 * fileserver will break the callback promise on the file - if
-		 * it had one - before it returns to us, and if it was deleted,
-		 * it won't
-		 *
-		 * however, if we didn't have a callback promise outstanding,
-		 * or it was outstanding on a different server, then it won't
-		 * break it either...
-		 */
-		vnode = AFS_FS_I(dentry->d_inode);
-		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-			_debug("AFS_VNODE_DELETED");
-		if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
-			_debug("AFS_VNODE_CB_BROKEN");
-		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-		ret = afs_validate(vnode, key);
-		_debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
+	spin_lock(&dentry->d_lock);
+	if (d_count(dentry) > 1) {
+		spin_unlock(&dentry->d_lock);
+		/* Start asynchronous writeout of the inode */
+		write_inode_now(d_inode(dentry), 0);
+		afs_op_set_error(op, afs_sillyrename(dvnode, vnode, dentry, op->key));
+		goto error;
+	}
+	if (!d_unhashed(dentry)) {
+		/* Prevent a race with RCU lookup. */
+		__d_drop(dentry);
+		op->unlink.need_rehash = true;
 	}
+	spin_unlock(&dentry->d_lock);
 
-	key_put(key);
-	_leave(" = 0");
-	return 0;
+	op->file[1].vnode = vnode;
+	op->file[1].update_ctime = true;
+	op->file[1].op_unlinked = true;
+	op->dentry	= dentry;
+	op->ops		= &afs_unlink_operation;
+	afs_begin_vnode_operation(op);
+	afs_wait_for_operation(op);
+
+	/* If there was a conflict with a third party, check the status of the
+	 * unlinked vnode.
+	 */
+	if (afs_op_error(op) == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+		op->file[1].update_ctime = false;
+		op->fetch_status.which = 1;
+		op->ops = &afs_fetch_status_operation;
+		afs_begin_vnode_operation(op);
+		afs_wait_for_operation(op);
+	}
 
-remove_error:
-	key_put(key);
 error:
-	_leave(" = %d", ret);
+	ret = afs_put_operation(op);
+	afs_dir_unuse_cookie(dvnode, ret);
 	return ret;
 }
 
+static const struct afs_operation_ops afs_create_operation = {
+	.issue_afs_rpc	= afs_fs_create_file,
+	.issue_yfs_rpc	= yfs_fs_create_file,
+	.success	= afs_create_success,
+	.aborted	= afs_check_for_remote_deletion,
+	.edit_dir	= afs_create_edit_dir,
+	.put		= afs_create_put,
+};
+
 /*
  * create a regular file on an AFS filesystem
  */
-static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		      bool excl)
+static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
+		      struct dentry *dentry, umode_t mode, bool excl)
 {
-	struct afs_file_status status;
-	struct afs_callback cb;
-	struct afs_server *server;
-	struct afs_vnode *dvnode, *vnode;
-	struct afs_fid fid;
-	struct inode *inode;
-	struct key *key;
-	int ret;
+	struct afs_operation *op;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	int ret = -ENAMETOOLONG;
 
-	dvnode = AFS_FS_I(dir);
+	_enter("{%llx:%llu},{%pd},%ho",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
-	_enter("{%x:%u},{%s},%ho,",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
-
-	ret = -ENAMETOOLONG;
 	if (dentry->d_name.len >= AFSNAMEMAX)
 		goto error;
 
-	key = afs_request_key(dvnode->volume->cell);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	op = afs_alloc_operation(NULL, dvnode->volume);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
 		goto error;
 	}
 
-	mode |= S_IFREG;
-	ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
-			       mode, &fid, &status, &cb, &server);
-	if (ret < 0)
-		goto create_error;
+	fscache_use_cookie(afs_vnode_cache(dvnode), true);
 
-	inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
-	if (IS_ERR(inode)) {
-		/* ENOMEM at a really inconvenient time - just abandon the new
-		 * directory on the server */
-		ret = PTR_ERR(inode);
-		goto iget_error;
-	}
+	afs_op_set_vnode(op, 0, dvnode);
+	op->file[0].dv_delta = 1;
+	op->file[0].modification = true;
+	op->file[0].update_ctime = true;
 
-	/* apply the status report we've got for the new vnode */
-	vnode = AFS_FS_I(inode);
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-	afs_vnode_finalise_status_update(vnode, server);
-	afs_put_server(server);
-
-	d_instantiate(dentry, inode);
-	if (d_unhashed(dentry)) {
-		_debug("not hashed");
-		d_rehash(dentry);
-	}
-	key_put(key);
-	_leave(" = 0");
-	return 0;
+	op->dentry	= dentry;
+	op->create.mode	= S_IFREG | mode;
+	op->create.reason = afs_edit_dir_for_create;
+	op->mtime	= current_time(dir);
+	op->ops		= &afs_create_operation;
+	ret = afs_do_sync_operation(op);
+	afs_dir_unuse_cookie(dvnode, ret);
+	return ret;
 
-iget_error:
-	afs_put_server(server);
-create_error:
-	key_put(key);
 error:
 	d_drop(dentry);
 	_leave(" = %d", ret);
 	return ret;
 }
 
+static void afs_link_success(struct afs_operation *op)
+{
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+
+	_enter("op=%08x", op->debug_id);
+	op->ctime = dvp->scb.status.mtime_client;
+	afs_vnode_commit_status(op, dvp);
+	afs_vnode_commit_status(op, vp);
+	afs_update_dentry_version(op, dvp, op->dentry);
+	if (op->dentry_2->d_parent == op->dentry->d_parent)
+		afs_update_dentry_version(op, dvp, op->dentry_2);
+	ihold(&vp->vnode->netfs.inode);
+	d_instantiate(op->dentry, &vp->vnode->netfs.inode);
+}
+
+static void afs_link_put(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+	if (afs_op_error(op))
+		d_drop(op->dentry);
+}
+
+static const struct afs_operation_ops afs_link_operation = {
+	.issue_afs_rpc	= afs_fs_link,
+	.issue_yfs_rpc	= yfs_fs_link,
+	.success	= afs_link_success,
+	.aborted	= afs_check_for_remote_deletion,
+	.edit_dir	= afs_create_edit_dir,
+	.put		= afs_link_put,
+};
+
 /*
  * create a hard link between files in an AFS filesystem
  */
 static int afs_link(struct dentry *from, struct inode *dir,
 		    struct dentry *dentry)
 {
-	struct afs_vnode *dvnode, *vnode;
-	struct key *key;
-	int ret;
+	struct afs_operation *op;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	struct afs_vnode *vnode = AFS_FS_I(d_inode(from));
+	int ret = -ENAMETOOLONG;
 
-	vnode = AFS_FS_I(from->d_inode);
-	dvnode = AFS_FS_I(dir);
-
-	_enter("{%x:%u},{%x:%u},{%s}",
+	_enter("{%llx:%llu},{%llx:%llu},{%pd}",
 	       vnode->fid.vid, vnode->fid.vnode,
 	       dvnode->fid.vid, dvnode->fid.vnode,
-	       dentry->d_name.name);
+	       dentry);
 
-	ret = -ENAMETOOLONG;
 	if (dentry->d_name.len >= AFSNAMEMAX)
 		goto error;
 
-	key = afs_request_key(dvnode->volume->cell);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	op = afs_alloc_operation(NULL, dvnode->volume);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
 		goto error;
 	}
 
-	ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name);
-	if (ret < 0)
-		goto link_error;
+	fscache_use_cookie(afs_vnode_cache(dvnode), true);
 
-	ihold(&vnode->vfs_inode);
-	d_instantiate(dentry, &vnode->vfs_inode);
-	key_put(key);
-	_leave(" = 0");
-	return 0;
+	ret = afs_validate(vnode, op->key);
+	if (ret < 0)
+		goto error_op;
+
+	afs_op_set_vnode(op, 0, dvnode);
+	afs_op_set_vnode(op, 1, vnode);
+	op->file[0].dv_delta = 1;
+	op->file[0].modification = true;
+	op->file[0].update_ctime = true;
+	op->file[1].update_ctime = true;
+
+	op->dentry		= dentry;
+	op->dentry_2		= from;
+	op->ops			= &afs_link_operation;
+	op->create.reason	= afs_edit_dir_for_link;
+	ret = afs_do_sync_operation(op);
+	afs_dir_unuse_cookie(dvnode, ret);
+	return ret;
 
-link_error:
-	key_put(key);
+error_op:
+	afs_put_operation(op);
+	afs_dir_unuse_cookie(dvnode, ret);
 error:
 	d_drop(dentry);
 	_leave(" = %d", ret);
 	return ret;
 }
 
+static const struct afs_operation_ops afs_symlink_operation = {
+	.issue_afs_rpc	= afs_fs_symlink,
+	.issue_yfs_rpc	= yfs_fs_symlink,
+	.success	= afs_create_success,
+	.aborted	= afs_check_for_remote_deletion,
+	.edit_dir	= afs_create_edit_dir,
+	.put		= afs_create_put,
+};
+
 /*
  * create a symlink in an AFS filesystem
  */
-static int afs_symlink(struct inode *dir, struct dentry *dentry,
-		       const char *content)
+static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
+		       struct dentry *dentry, const char *content)
 {
-	struct afs_file_status status;
-	struct afs_server *server;
-	struct afs_vnode *dvnode, *vnode;
-	struct afs_fid fid;
-	struct inode *inode;
-	struct key *key;
+	struct afs_operation *op;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	int ret;
 
-	dvnode = AFS_FS_I(dir);
-
-	_enter("{%x:%u},{%s},%s",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name,
+	_enter("{%llx:%llu},{%pd},%s",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry,
 	       content);
 
 	ret = -ENAMETOOLONG;
@@ -1065,95 +1795,428 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 	if (strlen(content) >= AFSPATHMAX)
 		goto error;
 
-	key = afs_request_key(dvnode->volume->cell);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	op = afs_alloc_operation(NULL, dvnode->volume);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
 		goto error;
 	}
 
-	ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content,
-				&fid, &status, &server);
-	if (ret < 0)
-		goto create_error;
+	fscache_use_cookie(afs_vnode_cache(dvnode), true);
 
-	inode = afs_iget(dir->i_sb, key, &fid, &status, NULL);
-	if (IS_ERR(inode)) {
-		/* ENOMEM at a really inconvenient time - just abandon the new
-		 * directory on the server */
-		ret = PTR_ERR(inode);
-		goto iget_error;
-	}
+	afs_op_set_vnode(op, 0, dvnode);
+	op->file[0].dv_delta = 1;
 
-	/* apply the status report we've got for the new vnode */
-	vnode = AFS_FS_I(inode);
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-	afs_vnode_finalise_status_update(vnode, server);
-	afs_put_server(server);
-
-	d_instantiate(dentry, inode);
-	if (d_unhashed(dentry)) {
-		_debug("not hashed");
-		d_rehash(dentry);
-	}
-	key_put(key);
-	_leave(" = 0");
-	return 0;
+	op->dentry		= dentry;
+	op->ops			= &afs_symlink_operation;
+	op->create.reason	= afs_edit_dir_for_symlink;
+	op->create.symlink	= content;
+	op->mtime		= current_time(dir);
+	ret = afs_do_sync_operation(op);
+	afs_dir_unuse_cookie(dvnode, ret);
+	return ret;
 
-iget_error:
-	afs_put_server(server);
-create_error:
-	key_put(key);
 error:
 	d_drop(dentry);
 	_leave(" = %d", ret);
 	return ret;
 }
 
+static void afs_rename_success(struct afs_operation *op)
+{
+	struct afs_vnode *vnode = op->more_files[0].vnode;
+	struct afs_vnode *new_vnode = op->more_files[1].vnode;
+
+	_enter("op=%08x", op->debug_id);
+
+	op->ctime = op->file[0].scb.status.mtime_client;
+	afs_check_dir_conflict(op, &op->file[1]);
+	afs_vnode_commit_status(op, &op->file[0]);
+	if (op->file[1].vnode != op->file[0].vnode) {
+		op->ctime = op->file[1].scb.status.mtime_client;
+		afs_vnode_commit_status(op, &op->file[1]);
+	}
+	if (op->more_files[0].scb.have_status)
+		afs_vnode_commit_status(op, &op->more_files[0]);
+	if (op->more_files[1].scb.have_status)
+		afs_vnode_commit_status(op, &op->more_files[1]);
+
+	/* If we're moving a subdir between dirs, we need to update
+	 * its DV counter too as the ".." will be altered.
+	 */
+	if (op->file[0].vnode != op->file[1].vnode) {
+		if (S_ISDIR(vnode->netfs.inode.i_mode)) {
+			u64 new_dv;
+
+			write_seqlock(&vnode->cb_lock);
+
+			new_dv = vnode->status.data_version + 1;
+			trace_afs_set_dv(vnode, new_dv);
+			vnode->status.data_version = new_dv;
+			inode_set_iversion_raw(&vnode->netfs.inode, new_dv);
+
+			write_sequnlock(&vnode->cb_lock);
+		}
+
+		if ((op->rename.rename_flags & RENAME_EXCHANGE) &&
+		    S_ISDIR(new_vnode->netfs.inode.i_mode)) {
+			u64 new_dv;
+
+			write_seqlock(&new_vnode->cb_lock);
+
+			new_dv = new_vnode->status.data_version + 1;
+			new_vnode->status.data_version = new_dv;
+			inode_set_iversion_raw(&new_vnode->netfs.inode, new_dv);
+
+			write_sequnlock(&new_vnode->cb_lock);
+		}
+	}
+}
+
+static void afs_rename_edit_dir(struct afs_operation *op)
+{
+	struct netfs_cache_resources orig_cres = {}, new_cres = {};
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	struct afs_vnode *orig_dvnode = orig_dvp->vnode;
+	struct afs_vnode *new_dvnode = new_dvp->vnode;
+	struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry));
+	struct dentry *old_dentry = op->dentry;
+	struct dentry *new_dentry = op->dentry_2;
+	struct inode *new_inode;
+
+	_enter("op=%08x", op->debug_id);
+
+	if (op->rename.rehash) {
+		d_rehash(op->rename.rehash);
+		op->rename.rehash = NULL;
+	}
+
+	fscache_begin_write_operation(&orig_cres, afs_vnode_cache(orig_dvnode));
+	if (new_dvnode != orig_dvnode)
+		fscache_begin_write_operation(&new_cres, afs_vnode_cache(new_dvnode));
+
+	down_write(&orig_dvnode->validate_lock);
+	if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
+	    orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta)
+		afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
+				    afs_edit_dir_for_rename_0);
+
+	if (new_dvnode != orig_dvnode) {
+		up_write(&orig_dvnode->validate_lock);
+		down_write(&new_dvnode->validate_lock);
+	}
+
+	if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) &&
+	    new_dvnode->status.data_version == new_dvp->dv_before + new_dvp->dv_delta) {
+		if (!op->rename.new_negative)
+			afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
+					    afs_edit_dir_for_rename_1);
+
+		afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
+				 &vnode->fid, afs_edit_dir_for_rename_2);
+	}
+
+	if (S_ISDIR(vnode->netfs.inode.i_mode) &&
+	    new_dvnode != orig_dvnode &&
+	    test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+		afs_edit_dir_update(vnode, &dotdot_name, new_dvnode,
+				    afs_edit_dir_for_rename_sub);
+
+	new_inode = d_inode(new_dentry);
+	if (new_inode) {
+		spin_lock(&new_inode->i_lock);
+		if (S_ISDIR(new_inode->i_mode))
+			clear_nlink(new_inode);
+		else if (new_inode->i_nlink > 0)
+			drop_nlink(new_inode);
+		spin_unlock(&new_inode->i_lock);
+	}
+
+	/* Now we can update d_fsdata on the dentries to reflect their
+	 * new parent's data_version.
+	 */
+	afs_update_dentry_version(op, new_dvp, op->dentry);
+	afs_update_dentry_version(op, new_dvp, op->dentry_2);
+
+	d_move(old_dentry, new_dentry);
+
+	up_write(&new_dvnode->validate_lock);
+	fscache_end_operation(&orig_cres);
+	if (new_dvnode != orig_dvnode)
+		fscache_end_operation(&new_cres);
+}
+
+static void afs_rename_exchange_edit_dir(struct afs_operation *op)
+{
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	struct afs_vnode *orig_dvnode = orig_dvp->vnode;
+	struct afs_vnode *new_dvnode = new_dvp->vnode;
+	struct afs_vnode *old_vnode = op->more_files[0].vnode;
+	struct afs_vnode *new_vnode = op->more_files[1].vnode;
+	struct dentry *old_dentry = op->dentry;
+	struct dentry *new_dentry = op->dentry_2;
+
+	_enter("op=%08x", op->debug_id);
+
+	if (new_dvnode == orig_dvnode) {
+		down_write(&orig_dvnode->validate_lock);
+		if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
+		    orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta) {
+			afs_edit_dir_update(orig_dvnode, &old_dentry->d_name,
+					    new_vnode, afs_edit_dir_for_rename_0);
+			afs_edit_dir_update(orig_dvnode, &new_dentry->d_name,
+					    old_vnode, afs_edit_dir_for_rename_1);
+		}
+
+		d_exchange(old_dentry, new_dentry);
+		up_write(&orig_dvnode->validate_lock);
+	} else {
+		down_write(&orig_dvnode->validate_lock);
+		if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
+		    orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta)
+			afs_edit_dir_update(orig_dvnode, &old_dentry->d_name,
+					    new_vnode, afs_edit_dir_for_rename_0);
+
+		up_write(&orig_dvnode->validate_lock);
+		down_write(&new_dvnode->validate_lock);
+
+		if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) &&
+		    new_dvnode->status.data_version == new_dvp->dv_before + new_dvp->dv_delta)
+			afs_edit_dir_update(new_dvnode, &new_dentry->d_name,
+					    old_vnode, afs_edit_dir_for_rename_1);
+
+		if (S_ISDIR(old_vnode->netfs.inode.i_mode) &&
+		    test_bit(AFS_VNODE_DIR_VALID, &old_vnode->flags))
+			afs_edit_dir_update(old_vnode, &dotdot_name, new_dvnode,
+					    afs_edit_dir_for_rename_sub);
+
+		if (S_ISDIR(new_vnode->netfs.inode.i_mode) &&
+		    test_bit(AFS_VNODE_DIR_VALID, &new_vnode->flags))
+			afs_edit_dir_update(new_vnode, &dotdot_name, orig_dvnode,
+					    afs_edit_dir_for_rename_sub);
+
+		/* Now we can update d_fsdata on the dentries to reflect their
+		 * new parents' data_version.
+		 */
+		afs_update_dentry_version(op, new_dvp, old_dentry);
+		afs_update_dentry_version(op, orig_dvp, new_dentry);
+
+		d_exchange(old_dentry, new_dentry);
+		up_write(&new_dvnode->validate_lock);
+	}
+}
+
+static void afs_rename_put(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+	if (op->rename.rehash)
+		d_rehash(op->rename.rehash);
+	dput(op->rename.tmp);
+	if (afs_op_error(op))
+		d_rehash(op->dentry);
+}
+
+static const struct afs_operation_ops afs_rename_operation = {
+	.issue_afs_rpc	= afs_fs_rename,
+	.issue_yfs_rpc	= yfs_fs_rename,
+	.success	= afs_rename_success,
+	.edit_dir	= afs_rename_edit_dir,
+	.put		= afs_rename_put,
+};
+
+#if 0 /* Autoswitched in yfs_fs_rename_replace(). */
+static const struct afs_operation_ops afs_rename_replace_operation = {
+	.issue_afs_rpc	= NULL,
+	.issue_yfs_rpc	= yfs_fs_rename_replace,
+	.success	= afs_rename_success,
+	.edit_dir	= afs_rename_edit_dir,
+	.put		= afs_rename_put,
+};
+#endif
+
+static const struct afs_operation_ops afs_rename_noreplace_operation = {
+	.issue_afs_rpc	= NULL,
+	.issue_yfs_rpc	= yfs_fs_rename_noreplace,
+	.success	= afs_rename_success,
+	.edit_dir	= afs_rename_edit_dir,
+	.put		= afs_rename_put,
+};
+
+static const struct afs_operation_ops afs_rename_exchange_operation = {
+	.issue_afs_rpc	= NULL,
+	.issue_yfs_rpc	= yfs_fs_rename_exchange,
+	.success	= afs_rename_success,
+	.edit_dir	= afs_rename_exchange_edit_dir,
+	.put		= afs_rename_put,
+};
+
 /*
  * rename a file in an AFS filesystem and/or move it between directories
  */
-static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
-		      struct inode *new_dir, struct dentry *new_dentry)
+static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
+		      struct dentry *old_dentry, struct inode *new_dir,
+		      struct dentry *new_dentry, unsigned int flags)
 {
-	struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
-	struct key *key;
+	struct afs_operation *op;
+	struct afs_vnode *orig_dvnode, *new_dvnode, *vnode, *new_vnode = NULL;
 	int ret;
 
-	vnode = AFS_FS_I(old_dentry->d_inode);
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+		return -EINVAL;
+
+	/* Don't allow silly-rename files be moved around. */
+	if (old_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		return -EINVAL;
+
+	vnode = AFS_FS_I(d_inode(old_dentry));
 	orig_dvnode = AFS_FS_I(old_dir);
 	new_dvnode = AFS_FS_I(new_dir);
+	if (d_is_positive(new_dentry))
+		new_vnode = AFS_FS_I(d_inode(new_dentry));
 
-	_enter("{%x:%u},{%x:%u},{%x:%u},{%s}",
+	_enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}",
 	       orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
 	       vnode->fid.vid, vnode->fid.vnode,
 	       new_dvnode->fid.vid, new_dvnode->fid.vnode,
-	       new_dentry->d_name.name);
+	       new_dentry);
 
-	ret = -ENAMETOOLONG;
-	if (new_dentry->d_name.len >= AFSNAMEMAX)
+	op = afs_alloc_operation(NULL, orig_dvnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	fscache_use_cookie(afs_vnode_cache(orig_dvnode), true);
+	if (new_dvnode != orig_dvnode)
+		fscache_use_cookie(afs_vnode_cache(new_dvnode), true);
+
+	ret = afs_validate(vnode, op->key);
+	afs_op_set_error(op, ret);
+	if (ret < 0)
 		goto error;
 
-	key = afs_request_key(orig_dvnode->volume->cell);
-	if (IS_ERR(key)) {
-		ret = PTR_ERR(key);
+	ret = -ENOMEM;
+	op->more_files = kvcalloc(2, sizeof(struct afs_vnode_param), GFP_KERNEL);
+	if (!op->more_files)
 		goto error;
+
+	afs_op_set_vnode(op, 0, orig_dvnode);
+	afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */
+	op->file[0].dv_delta = 1;
+	op->file[1].dv_delta = 1;
+	op->file[0].modification = true;
+	op->file[1].modification = true;
+	op->file[0].update_ctime = true;
+	op->file[1].update_ctime = true;
+	op->more_files[0].vnode		= vnode;
+	op->more_files[0].speculative	= true;
+	op->more_files[1].vnode		= new_vnode;
+	op->more_files[1].speculative	= true;
+	op->nr_files = 4;
+
+	op->dentry		= old_dentry;
+	op->dentry_2		= new_dentry;
+	op->rename.rename_flags	= flags;
+	op->rename.new_negative	= d_is_negative(new_dentry);
+
+	if (flags & RENAME_NOREPLACE) {
+		op->ops		= &afs_rename_noreplace_operation;
+	} else if (flags & RENAME_EXCHANGE) {
+		op->ops		= &afs_rename_exchange_operation;
+		d_drop(new_dentry);
+	} else {
+		/* If we might displace the target, we might need to do silly
+		 * rename.
+		 */
+		op->ops	= &afs_rename_operation;
+
+		/* For non-directories, check whether the target is busy and if
+		 * so, make a copy of the dentry and then do a silly-rename.
+		 * If the silly-rename succeeds, the copied dentry is hashed
+		 * and becomes the new target.
+		 */
+		if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) {
+			/* To prevent any new references to the target during
+			 * the rename, we unhash the dentry in advance.
+			 */
+			if (!d_unhashed(new_dentry)) {
+				d_drop(new_dentry);
+				op->rename.rehash = new_dentry;
+			}
+
+			if (d_count(new_dentry) > 2) {
+				/* copy the target dentry's name */
+				op->rename.tmp = d_alloc(new_dentry->d_parent,
+							 &new_dentry->d_name);
+				if (!op->rename.tmp) {
+					afs_op_nomem(op);
+					goto error;
+				}
+
+				ret = afs_sillyrename(new_dvnode,
+						      AFS_FS_I(d_inode(new_dentry)),
+						      new_dentry, op->key);
+				if (ret) {
+					afs_op_set_error(op, ret);
+					goto error;
+				}
+
+				op->dentry_2 = op->rename.tmp;
+				op->rename.rehash = NULL;
+				op->rename.new_negative = true;
+			}
+		}
 	}
 
-	ret = afs_vnode_rename(orig_dvnode, new_dvnode, key,
-			       old_dentry->d_name.name,
-			       new_dentry->d_name.name);
-	if (ret < 0)
-		goto rename_error;
-	key_put(key);
-	_leave(" = 0");
-	return 0;
+	/* This bit is potentially nasty as there's a potential race with
+	 * afs_d_revalidate{,_rcu}().  We have to change d_fsdata on the dentry
+	 * to reflect it's new parent's new data_version after the op, but
+	 * d_revalidate may see old_dentry between the op having taken place
+	 * and the version being updated.
+	 *
+	 * So drop the old_dentry for now to make other threads go through
+	 * lookup instead - which we hold a lock against.
+	 */
+	d_drop(old_dentry);
+
+	ret = afs_do_sync_operation(op);
+	if (ret == -ENOTSUPP)
+		ret = -EINVAL;
+out:
+	afs_dir_unuse_cookie(orig_dvnode, ret);
+	if (new_dvnode != orig_dvnode)
+		afs_dir_unuse_cookie(new_dvnode, ret);
+	return ret;
 
-rename_error:
-	key_put(key);
 error:
-	d_drop(new_dentry);
-	_leave(" = %d", ret);
+	ret = afs_put_operation(op);
+	goto out;
+}
+
+/*
+ * Write the file contents to the cache as a single blob.
+ */
+int afs_single_writepages(struct address_space *mapping,
+			  struct writeback_control *wbc)
+{
+	struct afs_vnode *dvnode = AFS_FS_I(mapping->host);
+	struct iov_iter iter;
+	bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+		       !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
+	int ret = 0;
+
+	/* Need to lock to prevent the folio queue and folios from being thrown
+	 * away.
+	 */
+	down_read(&dvnode->validate_lock);
+
+	if (is_dir ?
+	    test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) :
+	    atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
+		iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0,
+				     i_size_read(&dvnode->netfs.inode));
+		ret = netfs_writeback_single(mapping, wbc, &iter);
+	}
+
+	up_read(&dvnode->validate_lock);
 	return ret;
 }
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
new file mode 100644
index 000000000000..fd3aa9f97ce6
--- /dev/null
+++ b/fs/afs/dir_edit.c
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS filesystem directory editing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/iversion.h>
+#include <linux/folio_queue.h>
+#include "internal.h"
+#include "xdr_fs.h"
+
+/*
+ * Find a number of contiguous clear bits in a directory block bitmask.
+ *
+ * There are 64 slots, which means we can load the entire bitmap into a
+ * variable.  The first bit doesn't count as it corresponds to the block header
+ * slot.  nr_slots is between 1 and 9.
+ */
+static int afs_find_contig_bits(union afs_xdr_dir_block *block, unsigned int nr_slots)
+{
+	u64 bitmap;
+	u32 mask;
+	int bit, n;
+
+	bitmap  = (u64)block->hdr.bitmap[0] << 0 * 8;
+	bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8;
+	bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8;
+	bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8;
+	bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8;
+	bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8;
+	bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8;
+	bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8;
+	bitmap >>= 1; /* The first entry is metadata */
+	bit = 1;
+	mask = (1 << nr_slots) - 1;
+
+	do {
+		if (sizeof(unsigned long) == 8)
+			n = ffz(bitmap);
+		else
+			n = ((u32)bitmap) != 0 ?
+				ffz((u32)bitmap) :
+				ffz((u32)(bitmap >> 32)) + 32;
+		bitmap >>= n;
+		bit += n;
+
+		if ((bitmap & mask) == 0) {
+			if (bit > 64 - nr_slots)
+				return -1;
+			return bit;
+		}
+
+		n = __ffs(bitmap);
+		bitmap >>= n;
+		bit += n;
+	} while (bitmap);
+
+	return -1;
+}
+
+/*
+ * Set a number of contiguous bits in the directory block bitmap.
+ */
+static void afs_set_contig_bits(union afs_xdr_dir_block *block,
+				int bit, unsigned int nr_slots)
+{
+	u64 mask;
+
+	mask = (1 << nr_slots) - 1;
+	mask <<= bit;
+
+	block->hdr.bitmap[0] |= (u8)(mask >> 0 * 8);
+	block->hdr.bitmap[1] |= (u8)(mask >> 1 * 8);
+	block->hdr.bitmap[2] |= (u8)(mask >> 2 * 8);
+	block->hdr.bitmap[3] |= (u8)(mask >> 3 * 8);
+	block->hdr.bitmap[4] |= (u8)(mask >> 4 * 8);
+	block->hdr.bitmap[5] |= (u8)(mask >> 5 * 8);
+	block->hdr.bitmap[6] |= (u8)(mask >> 6 * 8);
+	block->hdr.bitmap[7] |= (u8)(mask >> 7 * 8);
+}
+
+/*
+ * Clear a number of contiguous bits in the directory block bitmap.
+ */
+static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
+				  int bit, unsigned int nr_slots)
+{
+	u64 mask;
+
+	mask = (1 << nr_slots) - 1;
+	mask <<= bit;
+
+	block->hdr.bitmap[0] &= ~(u8)(mask >> 0 * 8);
+	block->hdr.bitmap[1] &= ~(u8)(mask >> 1 * 8);
+	block->hdr.bitmap[2] &= ~(u8)(mask >> 2 * 8);
+	block->hdr.bitmap[3] &= ~(u8)(mask >> 3 * 8);
+	block->hdr.bitmap[4] &= ~(u8)(mask >> 4 * 8);
+	block->hdr.bitmap[5] &= ~(u8)(mask >> 5 * 8);
+	block->hdr.bitmap[6] &= ~(u8)(mask >> 6 * 8);
+	block->hdr.bitmap[7] &= ~(u8)(mask >> 7 * 8);
+}
+
+/*
+ * Get a specific block, extending the directory storage to cover it as needed.
+ */
+static union afs_xdr_dir_block *afs_dir_get_block(struct afs_dir_iter *iter, size_t block)
+{
+	struct folio_queue *fq;
+	struct afs_vnode *dvnode = iter->dvnode;
+	struct folio *folio;
+	size_t blpos = block * AFS_DIR_BLOCK_SIZE;
+	size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
+	int ret;
+
+	if (dvnode->directory_size < blend) {
+		size_t cur_size = dvnode->directory_size;
+
+		ret = netfs_alloc_folioq_buffer(
+			NULL, &dvnode->directory, &cur_size, blend,
+			mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
+		dvnode->directory_size = cur_size;
+		if (ret < 0)
+			goto fail;
+	}
+
+	fq = iter->fq;
+	if (!fq)
+		fq = dvnode->directory;
+
+	/* Search the folio queue for the folio containing the block... */
+	for (; fq; fq = fq->next) {
+		for (int s = iter->fq_slot; s < folioq_count(fq); s++) {
+			size_t fsize = folioq_folio_size(fq, s);
+
+			if (blend <= fpos + fsize) {
+				/* ... and then return the mapped block. */
+				folio = folioq_folio(fq, s);
+				if (WARN_ON_ONCE(folio_pos(folio) != fpos))
+					goto fail;
+				iter->fq = fq;
+				iter->fq_slot = s;
+				iter->fpos = fpos;
+				return kmap_local_folio(folio, blpos - fpos);
+			}
+			fpos += fsize;
+		}
+		iter->fq_slot = 0;
+	}
+
+fail:
+	iter->fq = NULL;
+	iter->fq_slot = 0;
+	afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
+	return NULL;
+}
+
+/*
+ * Scan a directory block looking for a dirent of the right name.
+ */
+static int afs_dir_scan_block(const union afs_xdr_dir_block *block, const struct qstr *name,
+			      unsigned int blocknum)
+{
+	const union afs_xdr_dirent *de;
+	u64 bitmap;
+	int d, len, n;
+
+	_enter("");
+
+	bitmap  = (u64)block->hdr.bitmap[0] << 0 * 8;
+	bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8;
+	bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8;
+	bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8;
+	bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8;
+	bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8;
+	bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8;
+	bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8;
+
+	for (d = (blocknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+	     d < AFS_DIR_SLOTS_PER_BLOCK;
+	     d++) {
+		if (!((bitmap >> d) & 1))
+			continue;
+		de = &block->dirents[d];
+		if (de->u.valid != 1)
+			continue;
+
+		/* The block was NUL-terminated by afs_dir_check_page(). */
+		len = strlen(de->u.name);
+		if (len == name->len &&
+		    memcmp(de->u.name, name->name, name->len) == 0)
+			return d;
+
+		n = round_up(12 + len + 1 + 4, AFS_DIR_DIRENT_SIZE);
+		n /= AFS_DIR_DIRENT_SIZE;
+		d += n - 1;
+	}
+
+	return -1;
+}
+
+/*
+ * Initialise a new directory block.  Note that block 0 is special and contains
+ * some extra metadata.
+ */
+static void afs_edit_init_block(union afs_xdr_dir_block *meta,
+				union afs_xdr_dir_block *block, int block_num)
+{
+	memset(block, 0, sizeof(*block));
+	block->hdr.npages = htons(1);
+	block->hdr.magic = AFS_DIR_MAGIC;
+	block->hdr.bitmap[0] = 1;
+
+	if (block_num == 0) {
+		block->hdr.bitmap[0] = 0xff;
+		block->hdr.bitmap[1] = 0x1f;
+		memset(block->meta.alloc_ctrs,
+		       AFS_DIR_SLOTS_PER_BLOCK,
+		       sizeof(block->meta.alloc_ctrs));
+		meta->meta.alloc_ctrs[0] =
+			AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS0;
+	}
+
+	if (block_num < AFS_DIR_BLOCKS_WITH_CTR)
+		meta->meta.alloc_ctrs[block_num] =
+			AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS;
+}
+
+/*
+ * Edit a directory's file data to add a new directory entry.  Doing this after
+ * create, mkdir, symlink, link or rename if the data version number is
+ * incremented by exactly one avoids the need to re-download the entire
+ * directory contents.
+ *
+ * The caller must hold the inode locked.
+ */
+void afs_edit_dir_add(struct afs_vnode *vnode,
+		      const struct qstr *name, struct afs_fid *new_fid,
+		      enum afs_edit_dir_reason why)
+{
+	union afs_xdr_dir_block *meta, *block;
+	union afs_xdr_dirent *de;
+	struct afs_dir_iter iter = { .dvnode = vnode };
+	unsigned int nr_blocks, b, entry;
+	loff_t i_size;
+	int slot;
+
+	_enter(",,{%d,%s},", name->len, name->name);
+
+	i_size = i_size_read(&vnode->netfs.inode);
+	if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
+	    (i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
+		afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_bad_size);
+		return;
+	}
+
+	meta = afs_dir_get_block(&iter, 0);
+	if (!meta)
+		return;
+
+	/* Work out how many slots we're going to need. */
+	iter.nr_slots = afs_dir_calc_slots(name->len);
+
+	if (i_size == 0)
+		goto new_directory;
+	nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+	/* Find a block that has sufficient slots available.  Each folio
+	 * contains two or more directory blocks.
+	 */
+	for (b = 0; b < nr_blocks + 1; b++) {
+		/* If the directory extended into a new folio, then we need to
+		 * tack a new folio on the end.
+		 */
+		if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
+			goto error_too_many_blocks;
+
+		/* Lower dir blocks have a counter in the header we can check. */
+		if (b < AFS_DIR_BLOCKS_WITH_CTR &&
+		    meta->meta.alloc_ctrs[b] < iter.nr_slots)
+			continue;
+
+		block = afs_dir_get_block(&iter, b);
+		if (!block)
+			goto error;
+
+		/* Abandon the edit if we got a callback break. */
+		if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+			goto already_invalidated;
+
+		_debug("block %u: %2u %3u %u",
+		       b,
+		       (b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99,
+		       ntohs(block->hdr.npages),
+		       ntohs(block->hdr.magic));
+
+		/* Initialise the block if necessary. */
+		if (b == nr_blocks) {
+			_debug("init %u", b);
+			afs_edit_init_block(meta, block, b);
+			afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
+		}
+
+		/* We need to try and find one or more consecutive slots to
+		 * hold the entry.
+		 */
+		slot = afs_find_contig_bits(block, iter.nr_slots);
+		if (slot >= 0) {
+			_debug("slot %u", slot);
+			goto found_space;
+		}
+
+		kunmap_local(block);
+	}
+
+	/* There are no spare slots of sufficient size, yet the operation
+	 * succeeded.  Download the directory again.
+	 */
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name);
+	afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_no_slots);
+	goto out_unmap;
+
+new_directory:
+	afs_edit_init_block(meta, meta, 0);
+	i_size = AFS_DIR_BLOCK_SIZE;
+	afs_set_i_size(vnode, i_size);
+	slot = AFS_DIR_RESV_BLOCKS0;
+	block = afs_dir_get_block(&iter, 0);
+	nr_blocks = 1;
+	b = 0;
+
+found_space:
+	/* Set the dirent slot. */
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_create, b, slot,
+			   new_fid->vnode, new_fid->unique, name->name);
+	de = &block->dirents[slot];
+	de->u.valid	= 1;
+	de->u.unused[0]	= 0;
+	de->u.hash_next	= 0; // TODO: Really need to maintain this
+	de->u.vnode	= htonl(new_fid->vnode);
+	de->u.unique	= htonl(new_fid->unique);
+	memcpy(de->u.name, name->name, name->len + 1);
+	de->u.name[name->len] = 0;
+
+	/* Adjust the bitmap. */
+	afs_set_contig_bits(block, slot, iter.nr_slots);
+
+	/* Adjust the allocation counter. */
+	if (b < AFS_DIR_BLOCKS_WITH_CTR)
+		meta->meta.alloc_ctrs[b] -= iter.nr_slots;
+
+	/* Adjust the hash chain. */
+	entry = b * AFS_DIR_SLOTS_PER_BLOCK + slot;
+	iter.bucket = afs_dir_hash_name(name);
+	de->u.hash_next = meta->meta.hashtable[iter.bucket];
+	meta->meta.hashtable[iter.bucket] = htons(entry);
+	kunmap_local(block);
+
+	inode_inc_iversion_raw(&vnode->netfs.inode);
+	afs_stat_v(vnode, n_dir_cr);
+	_debug("Insert %s in %u[%u]", name->name, b, slot);
+
+	netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+
+out_unmap:
+	kunmap_local(meta);
+	_leave("");
+	return;
+
+already_invalidated:
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
+	kunmap_local(block);
+	goto out_unmap;
+
+error_too_many_blocks:
+	afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_too_many_blocks);
+error:
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name);
+	goto out_unmap;
+}
+
+/*
+ * Edit a directory's file data to remove a new directory entry.  Doing this
+ * after unlink, rmdir or rename if the data version number is incremented by
+ * exactly one avoids the need to re-download the entire directory contents.
+ *
+ * The caller must hold the inode locked.
+ */
+void afs_edit_dir_remove(struct afs_vnode *vnode,
+			 const struct qstr *name, enum afs_edit_dir_reason why)
+{
+	union afs_xdr_dir_block *meta, *block, *pblock;
+	union afs_xdr_dirent *de, *pde;
+	struct afs_dir_iter iter = { .dvnode = vnode };
+	struct afs_fid fid;
+	unsigned int b, slot, entry;
+	loff_t i_size;
+	__be16 next;
+	int found;
+
+	_enter(",,{%d,%s},", name->len, name->name);
+
+	i_size = i_size_read(&vnode->netfs.inode);
+	if (i_size < AFS_DIR_BLOCK_SIZE ||
+	    i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
+	    (i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
+		afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_bad_size);
+		return;
+	}
+
+	if (!afs_dir_init_iter(&iter, name))
+		return;
+
+	meta = afs_dir_find_block(&iter, 0);
+	if (!meta)
+		return;
+
+	/* Find the entry in the blob. */
+	found = afs_dir_search_bucket(&iter, name, &fid);
+	if (found < 0) {
+		/* Didn't find the dirent to clobber.  Re-download. */
+		trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
+				   0, 0, 0, 0, name->name);
+		afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_wrong_name);
+		goto out_unmap;
+	}
+
+	entry = found;
+	b    = entry / AFS_DIR_SLOTS_PER_BLOCK;
+	slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
+
+	block = afs_dir_find_block(&iter, b);
+	if (!block)
+		goto error;
+	if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+		goto already_invalidated;
+
+	/* Check and clear the entry. */
+	de = &block->dirents[slot];
+	if (de->u.valid != 1)
+		goto error_unmap;
+
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot,
+			   ntohl(de->u.vnode), ntohl(de->u.unique),
+			   name->name);
+
+	/* Adjust the bitmap. */
+	afs_clear_contig_bits(block, slot, iter.nr_slots);
+
+	/* Adjust the allocation counter. */
+	if (b < AFS_DIR_BLOCKS_WITH_CTR)
+		meta->meta.alloc_ctrs[b] += iter.nr_slots;
+
+	/* Clear the constituent entries. */
+	next = de->u.hash_next;
+	memset(de, 0, sizeof(*de) * iter.nr_slots);
+	kunmap_local(block);
+
+	/* Adjust the hash chain: if iter->prev_entry is 0, the hashtable head
+	 * index is previous; otherwise it's slot number of the previous entry.
+	 */
+	if (!iter.prev_entry) {
+		__be16 prev_next = meta->meta.hashtable[iter.bucket];
+
+		if (unlikely(prev_next != htons(entry))) {
+			pr_warn("%llx:%llx:%x: not head of chain b=%x p=%x,%x e=%x %*s",
+				vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+				iter.bucket, iter.prev_entry, prev_next, entry,
+				name->len, name->name);
+			goto error;
+		}
+		meta->meta.hashtable[iter.bucket] = next;
+	} else {
+		unsigned int pb = iter.prev_entry / AFS_DIR_SLOTS_PER_BLOCK;
+		unsigned int ps = iter.prev_entry % AFS_DIR_SLOTS_PER_BLOCK;
+		__be16 prev_next;
+
+		pblock = afs_dir_find_block(&iter, pb);
+		if (!pblock)
+			goto error;
+		pde = &pblock->dirents[ps];
+		prev_next = pde->u.hash_next;
+		if (prev_next != htons(entry)) {
+			kunmap_local(pblock);
+			pr_warn("%llx:%llx:%x: not prev in chain b=%x p=%x,%x e=%x %*s",
+				vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+				iter.bucket, iter.prev_entry, prev_next, entry,
+				name->len, name->name);
+			goto error;
+		}
+		pde->u.hash_next = next;
+		kunmap_local(pblock);
+	}
+
+	netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+
+	inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
+	afs_stat_v(vnode, n_dir_rm);
+	_debug("Remove %s from %u[%u]", name->name, b, slot);
+
+out_unmap:
+	kunmap_local(meta);
+	_leave("");
+	return;
+
+already_invalidated:
+	kunmap_local(block);
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
+			   0, 0, 0, 0, name->name);
+	goto out_unmap;
+
+error_unmap:
+	kunmap_local(block);
+error:
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error,
+			   0, 0, 0, 0, name->name);
+	goto out_unmap;
+}
+
+/*
+ * Edit an entry in a directory to update the vnode it refers to.  This is also
+ * used to update the ".." entry in a directory.
+ */
+void afs_edit_dir_update(struct afs_vnode *vnode, const struct qstr *name,
+			 struct afs_vnode *new_dvnode, enum afs_edit_dir_reason why)
+{
+	union afs_xdr_dir_block *block;
+	union afs_xdr_dirent *de;
+	struct afs_dir_iter iter = { .dvnode = vnode };
+	unsigned int nr_blocks, b;
+	loff_t i_size;
+	int slot;
+
+	_enter("");
+
+	i_size = i_size_read(&vnode->netfs.inode);
+	if (i_size < AFS_DIR_BLOCK_SIZE) {
+		afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_bad_size);
+		return;
+	}
+
+	nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+	/* Find a block that has sufficient slots available.  Each folio
+	 * contains two or more directory blocks.
+	 */
+	for (b = 0; b < nr_blocks; b++) {
+		block = afs_dir_get_block(&iter, b);
+		if (!block)
+			goto error;
+
+		/* Abandon the edit if we got a callback break. */
+		if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+			goto already_invalidated;
+
+		slot = afs_dir_scan_block(block, name, b);
+		if (slot >= 0)
+			goto found_dirent;
+
+		kunmap_local(block);
+	}
+
+	/* Didn't find the dirent to clobber.  Download the directory again. */
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_update_nodd,
+			   0, 0, 0, 0, name->name);
+	afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_no_dd);
+	goto out;
+
+found_dirent:
+	de = &block->dirents[slot];
+	de->u.vnode  = htonl(new_dvnode->fid.vnode);
+	de->u.unique = htonl(new_dvnode->fid.unique);
+
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_update_dd, b, slot,
+			   ntohl(de->u.vnode), ntohl(de->u.unique), name->name);
+
+	kunmap_local(block);
+	netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+	inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
+
+out:
+	_leave("");
+	return;
+
+already_invalidated:
+	kunmap_local(block);
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_update_inval,
+			   0, 0, 0, 0, name->name);
+	goto out;
+
+error:
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_update_error,
+			   0, 0, 0, 0, name->name);
+	goto out;
+}
+
+/*
+ * Initialise a new directory.  We need to fill in the "." and ".." entries.
+ */
+void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_dvnode)
+{
+	union afs_xdr_dir_block *meta;
+	struct afs_dir_iter iter = { .dvnode = dvnode };
+	union afs_xdr_dirent *de;
+	unsigned int slot = AFS_DIR_RESV_BLOCKS0;
+	loff_t i_size;
+
+	i_size = i_size_read(&dvnode->netfs.inode);
+	if (i_size != AFS_DIR_BLOCK_SIZE) {
+		afs_invalidate_dir(dvnode, afs_dir_invalid_edit_add_bad_size);
+		return;
+	}
+
+	meta = afs_dir_get_block(&iter, 0);
+	if (!meta)
+		return;
+
+	afs_edit_init_block(meta, meta, 0);
+
+	de = &meta->dirents[slot];
+	de->u.valid  = 1;
+	de->u.vnode  = htonl(dvnode->fid.vnode);
+	de->u.unique = htonl(dvnode->fid.unique);
+	memcpy(de->u.name, ".", 2);
+	trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
+			   dvnode->fid.vnode, dvnode->fid.unique, ".");
+	slot++;
+
+	de = &meta->dirents[slot];
+	de->u.valid  = 1;
+	de->u.vnode  = htonl(parent_dvnode->fid.vnode);
+	de->u.unique = htonl(parent_dvnode->fid.unique);
+	memcpy(de->u.name, "..", 3);
+	trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
+			   parent_dvnode->fid.vnode, parent_dvnode->fid.unique, "..");
+
+	afs_set_contig_bits(meta, AFS_DIR_RESV_BLOCKS0, 2);
+	meta->meta.alloc_ctrs[0] -= 2;
+	kunmap_local(meta);
+
+	netfs_single_mark_inode_dirty(&dvnode->netfs.inode);
+	set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+	set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
+}
diff --git a/fs/afs/dir_search.c b/fs/afs/dir_search.c
new file mode 100644
index 000000000000..d2516e55b5ed
--- /dev/null
+++ b/fs/afs/dir_search.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Search a directory's hash table.
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * https://tools.ietf.org/html/draft-keiser-afs3-directory-object-00
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+
+/*
+ * Calculate the name hash.
+ */
+unsigned int afs_dir_hash_name(const struct qstr *name)
+{
+	const unsigned char *p = name->name;
+	unsigned int hash = 0, i;
+	int bucket;
+
+	for (i = 0; i < name->len; i++)
+		hash = (hash * 173) + p[i];
+	bucket = hash & (AFS_DIR_HASHTBL_SIZE - 1);
+	if (hash > INT_MAX) {
+		bucket = AFS_DIR_HASHTBL_SIZE - bucket;
+		bucket &= (AFS_DIR_HASHTBL_SIZE - 1);
+	}
+	return bucket;
+}
+
+/*
+ * Reset a directory iterator.
+ */
+static bool afs_dir_reset_iter(struct afs_dir_iter *iter)
+{
+	unsigned long long i_size = i_size_read(&iter->dvnode->netfs.inode);
+	unsigned int nblocks;
+
+	/* Work out the maximum number of steps we can take. */
+	nblocks = umin(i_size / AFS_DIR_BLOCK_SIZE, AFS_DIR_MAX_BLOCKS);
+	if (!nblocks)
+		return false;
+	iter->loop_check = nblocks * (AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS);
+	iter->prev_entry = 0; /* Hash head is previous */
+	return true;
+}
+
+/*
+ * Initialise a directory iterator for looking up a name.
+ */
+bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name)
+{
+	iter->nr_slots = afs_dir_calc_slots(name->len);
+	iter->bucket = afs_dir_hash_name(name);
+	return afs_dir_reset_iter(iter);
+}
+
+/*
+ * Get a specific block.
+ */
+union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block)
+{
+	struct folio_queue *fq = iter->fq;
+	struct afs_vnode *dvnode = iter->dvnode;
+	struct folio *folio;
+	size_t blpos = block * AFS_DIR_BLOCK_SIZE;
+	size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
+	int slot = iter->fq_slot;
+
+	_enter("%zx,%d", block, slot);
+
+	if (iter->block) {
+		kunmap_local(iter->block);
+		iter->block = NULL;
+	}
+
+	if (dvnode->directory_size < blend)
+		goto fail;
+
+	if (!fq || blpos < fpos) {
+		fq = dvnode->directory;
+		slot = 0;
+		fpos = 0;
+	}
+
+	/* Search the folio queue for the folio containing the block... */
+	for (; fq; fq = fq->next) {
+		for (; slot < folioq_count(fq); slot++) {
+			size_t fsize = folioq_folio_size(fq, slot);
+
+			if (blend <= fpos + fsize) {
+				/* ... and then return the mapped block. */
+				folio = folioq_folio(fq, slot);
+				if (WARN_ON_ONCE(folio_pos(folio) != fpos))
+					goto fail;
+				iter->fq = fq;
+				iter->fq_slot = slot;
+				iter->fpos = fpos;
+				iter->block = kmap_local_folio(folio, blpos - fpos);
+				return iter->block;
+			}
+			fpos += fsize;
+		}
+		slot = 0;
+	}
+
+fail:
+	iter->fq = NULL;
+	iter->fq_slot = 0;
+	afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
+	return NULL;
+}
+
+/*
+ * Search through a directory bucket.
+ */
+int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
+			  struct afs_fid *_fid)
+{
+	const union afs_xdr_dir_block *meta;
+	unsigned int entry;
+	int ret = -ESTALE;
+
+	meta = afs_dir_find_block(iter, 0);
+	if (!meta)
+		return -ESTALE;
+
+	entry = ntohs(meta->meta.hashtable[iter->bucket & (AFS_DIR_HASHTBL_SIZE - 1)]);
+	_enter("%x,%x", iter->bucket, entry);
+
+	while (entry) {
+		const union afs_xdr_dir_block *block;
+		const union afs_xdr_dirent *dire;
+		unsigned int blnum = entry / AFS_DIR_SLOTS_PER_BLOCK;
+		unsigned int slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
+		unsigned int resv = (blnum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+
+		_debug("search %x", entry);
+
+		if (slot < resv) {
+			kdebug("slot out of range h=%x rs=%2x sl=%2x-%2x",
+			       iter->bucket, resv, slot, slot + iter->nr_slots - 1);
+			goto bad;
+		}
+
+		block = afs_dir_find_block(iter, blnum);
+		if (!block)
+			goto bad;
+		dire = &block->dirents[slot];
+
+		if (slot + iter->nr_slots <= AFS_DIR_SLOTS_PER_BLOCK &&
+		    memcmp(dire->u.name, name->name, name->len) == 0 &&
+		    dire->u.name[name->len] == '\0') {
+			_fid->vnode  = ntohl(dire->u.vnode);
+			_fid->unique = ntohl(dire->u.unique);
+			ret = entry;
+			goto found;
+		}
+
+		iter->prev_entry = entry;
+		entry = ntohs(dire->u.hash_next);
+		if (!--iter->loop_check) {
+			kdebug("dir chain loop h=%x", iter->bucket);
+			goto bad;
+		}
+	}
+
+	ret = -ENOENT;
+found:
+	if (iter->block) {
+		kunmap_local(iter->block);
+		iter->block = NULL;
+	}
+
+bad:
+	if (ret == -ESTALE)
+		afs_invalidate_dir(iter->dvnode, afs_dir_invalid_iter_stale);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Search the appropriate hash chain in the contents of an AFS directory.
+ */
+int afs_dir_search(struct afs_vnode *dvnode, const struct qstr *name,
+		   struct afs_fid *_fid, afs_dataversion_t *_dir_version)
+{
+	struct afs_dir_iter iter = { .dvnode = dvnode, };
+	int ret, retry_limit = 3;
+
+	_enter("{%lu},,,", dvnode->netfs.inode.i_ino);
+
+	if (!afs_dir_init_iter(&iter, name))
+		return -ENOENT;
+	do {
+		if (--retry_limit < 0) {
+			pr_warn("afs_read_dir(): Too many retries\n");
+			ret = -ESTALE;
+			break;
+		}
+		ret = afs_read_dir(dvnode, NULL);
+		if (ret < 0) {
+			if (ret != -ESTALE)
+				break;
+			if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) {
+				ret = -ESTALE;
+				break;
+			}
+			continue;
+		}
+		*_dir_version = inode_peek_iversion_raw(&dvnode->netfs.inode);
+
+		ret = afs_dir_search_bucket(&iter, name, _fid);
+		up_read(&dvnode->validate_lock);
+		if (ret == -ESTALE)
+			afs_dir_reset_iter(&iter);
+	} while (ret == -ESTALE);
+
+	_leave(" = %d", ret);
+	return ret;
+}
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
new file mode 100644
index 000000000000..014495d4b868
--- /dev/null
+++ b/fs/afs/dir_silly.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS silly rename handling
+ *
+ * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from NFS's sillyrename.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/fsnotify.h>
+#include "internal.h"
+
+static void afs_silly_rename_success(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+
+	afs_check_dir_conflict(op, &op->file[0]);
+	afs_vnode_commit_status(op, &op->file[0]);
+}
+
+static void afs_silly_rename_edit_dir(struct afs_operation *op)
+{
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode *dvnode = dvp->vnode;
+	struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry));
+	struct dentry *old = op->dentry;
+	struct dentry *new = op->dentry_2;
+
+	spin_lock(&old->d_lock);
+	old->d_flags |= DCACHE_NFSFS_RENAMED;
+	spin_unlock(&old->d_lock);
+	if (dvnode->silly_key != op->key) {
+		key_put(dvnode->silly_key);
+		dvnode->silly_key = key_get(op->key);
+	}
+
+	down_write(&dvnode->validate_lock);
+	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+	    dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) {
+		afs_edit_dir_remove(dvnode, &old->d_name,
+				    afs_edit_dir_for_silly_0);
+		afs_edit_dir_add(dvnode, &new->d_name,
+				 &vnode->fid, afs_edit_dir_for_silly_1);
+	}
+	up_write(&dvnode->validate_lock);
+}
+
+static const struct afs_operation_ops afs_silly_rename_operation = {
+	.issue_afs_rpc	= afs_fs_rename,
+	.issue_yfs_rpc	= yfs_fs_rename,
+	.success	= afs_silly_rename_success,
+	.edit_dir	= afs_silly_rename_edit_dir,
+};
+
+/*
+ * Actually perform the silly rename step.
+ */
+static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+			       struct dentry *old, struct dentry *new,
+			       struct key *key)
+{
+	struct afs_operation *op;
+
+	_enter("%pd,%pd", old, new);
+
+	op = afs_alloc_operation(key, dvnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	op->more_files = kvcalloc(2, sizeof(struct afs_vnode_param), GFP_KERNEL);
+	if (!op->more_files) {
+		afs_put_operation(op);
+		return -ENOMEM;
+	}
+
+	afs_op_set_vnode(op, 0, dvnode);
+	afs_op_set_vnode(op, 1, dvnode);
+	op->file[0].dv_delta = 1;
+	op->file[1].dv_delta = 1;
+	op->file[0].modification = true;
+	op->file[1].modification = true;
+	op->file[0].update_ctime = true;
+	op->file[1].update_ctime = true;
+	op->more_files[0].vnode		= AFS_FS_I(d_inode(old));
+	op->more_files[0].speculative	= true;
+	op->more_files[1].vnode		= AFS_FS_I(d_inode(new));
+	op->more_files[1].speculative	= true;
+	op->nr_files = 4;
+
+	op->dentry		= old;
+	op->dentry_2		= new;
+	op->ops			= &afs_silly_rename_operation;
+
+	trace_afs_silly_rename(vnode, false);
+	return afs_do_sync_operation(op);
+}
+
+/*
+ * Perform silly-rename of a dentry.
+ *
+ * AFS is stateless and the server doesn't know when the client is holding a
+ * file open.  To prevent application problems when a file is unlinked while
+ * it's still open, the client performs a "silly-rename".  That is, it renames
+ * the file to a hidden file in the same directory, and only performs the
+ * unlink once the last reference to it is put.
+ *
+ * The final cleanup is done during dentry_iput.
+ */
+int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+		    struct dentry *dentry, struct key *key)
+{
+	static unsigned int sillycounter;
+	struct dentry *sdentry = NULL;
+	unsigned char silly[16];
+	int ret = -EBUSY;
+
+	_enter("");
+
+	/* We don't allow a dentry to be silly-renamed twice. */
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		return -EBUSY;
+
+	sdentry = NULL;
+	do {
+		dput(sdentry);
+		sillycounter++;
+
+		/* Create a silly name.  Note that the ".__afs" prefix is
+		 * understood by the salvager and must not be changed.
+		 */
+		scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
+		sdentry = lookup_noperm(&QSTR(silly), dentry->d_parent);
+
+		/* N.B. Better to return EBUSY here ... it could be dangerous
+		 * to delete the file while it's in use.
+		 */
+		if (IS_ERR(sdentry))
+			goto out;
+	} while (!d_is_negative(sdentry));
+
+	ihold(&vnode->netfs.inode);
+
+	ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key);
+	switch (ret) {
+	case 0:
+		/* The rename succeeded. */
+		set_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags);
+		d_move(dentry, sdentry);
+		break;
+	case -ERESTARTSYS:
+		/* The result of the rename is unknown. Play it safe by forcing
+		 * a new lookup.
+		 */
+		d_drop(dentry);
+		d_drop(sdentry);
+	}
+
+	iput(&vnode->netfs.inode);
+	dput(sdentry);
+out:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+static void afs_silly_unlink_success(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+	afs_check_dir_conflict(op, &op->file[0]);
+	afs_vnode_commit_status(op, &op->file[0]);
+	afs_vnode_commit_status(op, &op->file[1]);
+	afs_update_dentry_version(op, &op->file[0], op->dentry);
+}
+
+static void afs_silly_unlink_edit_dir(struct afs_operation *op)
+{
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode *dvnode = dvp->vnode;
+
+	_enter("op=%08x", op->debug_id);
+	down_write(&dvnode->validate_lock);
+	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+	    dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
+		afs_edit_dir_remove(dvnode, &op->dentry->d_name,
+				    afs_edit_dir_for_unlink);
+	up_write(&dvnode->validate_lock);
+}
+
+static const struct afs_operation_ops afs_silly_unlink_operation = {
+	.issue_afs_rpc	= afs_fs_remove_file,
+	.issue_yfs_rpc	= yfs_fs_remove_file,
+	.success	= afs_silly_unlink_success,
+	.aborted	= afs_check_for_remote_deletion,
+	.edit_dir	= afs_silly_unlink_edit_dir,
+};
+
+/*
+ * Tell the server to remove a sillyrename file.
+ */
+static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+			       struct dentry *dentry, struct key *key)
+{
+	struct afs_operation *op;
+
+	_enter("");
+
+	op = afs_alloc_operation(NULL, dvnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	afs_op_set_vnode(op, 0, dvnode);
+	afs_op_set_vnode(op, 1, vnode);
+	op->file[0].dv_delta = 1;
+	op->file[0].modification = true;
+	op->file[0].update_ctime = true;
+	op->file[1].op_unlinked = true;
+	op->file[1].update_ctime = true;
+
+	op->dentry	= dentry;
+	op->ops		= &afs_silly_unlink_operation;
+
+	trace_afs_silly_rename(vnode, true);
+	afs_begin_vnode_operation(op);
+	afs_wait_for_operation(op);
+
+	/* If there was a conflict with a third party, check the status of the
+	 * unlinked vnode.
+	 */
+	if (op->cumul_error.error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+		op->file[1].update_ctime = false;
+		op->fetch_status.which = 1;
+		op->ops = &afs_fetch_status_operation;
+		afs_begin_vnode_operation(op);
+		afs_wait_for_operation(op);
+	}
+
+	return afs_put_operation(op);
+}
+
+/*
+ * Remove sillyrename file on iput.
+ */
+int afs_silly_iput(struct dentry *dentry, struct inode *inode)
+{
+	struct afs_vnode *dvnode = AFS_FS_I(d_inode(dentry->d_parent));
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct dentry *alias;
+	int ret;
+
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+	_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
+
+	down_read(&dvnode->rmdir_lock);
+
+	alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq);
+	if (IS_ERR(alias)) {
+		up_read(&dvnode->rmdir_lock);
+		return 0;
+	}
+
+	if (!d_in_lookup(alias)) {
+		/* We raced with lookup...  See if we need to transfer the
+		 * sillyrename information to the aliased dentry.
+		 */
+		ret = 0;
+		spin_lock(&alias->d_lock);
+		if (d_really_is_positive(alias) &&
+		    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+			alias->d_flags |= DCACHE_NFSFS_RENAMED;
+			ret = 1;
+		}
+		spin_unlock(&alias->d_lock);
+		up_read(&dvnode->rmdir_lock);
+		dput(alias);
+		return ret;
+	}
+
+	/* Stop lock-release from complaining. */
+	spin_lock(&vnode->lock);
+	vnode->lock_state = AFS_VNODE_LOCK_DELETED;
+	trace_afs_flock_ev(vnode, NULL, afs_flock_silly_delete, 0);
+	spin_unlock(&vnode->lock);
+
+	afs_do_silly_unlink(dvnode, vnode, dentry, dvnode->silly_key);
+	up_read(&dvnode->rmdir_lock);
+	d_lookup_done(alias);
+	dput(alias);
+	return 1;
+}
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
new file mode 100644
index 000000000000..aa56e8951e03
--- /dev/null
+++ b/fs/afs/dynroot.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS dynamic root handling
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/dns_resolver.h>
+#include "internal.h"
+
+#define AFS_MIN_DYNROOT_CELL_INO 4 /* Allow for ., .., @cell, .@cell */
+#define AFS_MAX_DYNROOT_CELL_INO ((unsigned int)INT_MAX)
+
+static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino);
+
+/*
+ * iget5() comparator for inode created by autocell operations
+ */
+static int afs_iget5_pseudo_test(struct inode *inode, void *opaque)
+{
+	struct afs_fid *fid = opaque;
+
+	return inode->i_ino == fid->vnode;
+}
+
+/*
+ * iget5() inode initialiser
+ */
+static int afs_iget5_pseudo_set(struct inode *inode, void *opaque)
+{
+	struct afs_super_info *as = AFS_FS_S(inode->i_sb);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_fid *fid = opaque;
+
+	vnode->volume		= as->volume;
+	vnode->fid		= *fid;
+	inode->i_ino		= fid->vnode;
+	inode->i_generation	= fid->unique;
+	return 0;
+}
+
+/*
+ * Create an inode for an autocell dynamic automount dir.
+ */
+static struct inode *afs_iget_pseudo_dir(struct super_block *sb, ino_t ino)
+{
+	struct afs_vnode *vnode;
+	struct inode *inode;
+	struct afs_fid fid = { .vnode = ino, .unique = 1, };
+
+	_enter("");
+
+	inode = iget5_locked(sb, fid.vnode,
+			     afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
+	if (!inode) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	_debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
+	       inode, inode->i_ino, fid.vid, fid.vnode, fid.unique);
+
+	vnode = AFS_FS_I(inode);
+
+	if (inode_state_read_once(inode) & I_NEW) {
+		netfs_inode_init(&vnode->netfs, NULL, false);
+		simple_inode_init_ts(inode);
+		set_nlink(inode, 2);
+		inode->i_size		= 0;
+		inode->i_mode		= S_IFDIR | 0555;
+		inode->i_op		= &afs_autocell_inode_operations;
+		inode->i_uid		= GLOBAL_ROOT_UID;
+		inode->i_gid		= GLOBAL_ROOT_GID;
+		inode->i_blocks		= 0;
+		inode->i_generation	= 0;
+		inode->i_flags		|= S_AUTOMOUNT | S_NOATIME;
+
+		set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
+		set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+
+		unlock_new_inode(inode);
+	}
+	_leave(" = %p", inode);
+	return inode;
+}
+
+/*
+ * Try to automount the mountpoint with pseudo directory, if the autocell
+ * option is set.
+ */
+static struct dentry *afs_dynroot_lookup_cell(struct inode *dir, struct dentry *dentry,
+					      unsigned int flags)
+{
+	struct afs_cell *cell = NULL;
+	struct afs_net *net = afs_d2net(dentry);
+	struct inode *inode = NULL;
+	const char *name = dentry->d_name.name;
+	size_t len = dentry->d_name.len;
+	bool dotted = false;
+	int ret = -ENOENT;
+
+	/* Names prefixed with a dot are R/W mounts. */
+	if (name[0] == '.') {
+		name++;
+		len--;
+		dotted = true;
+	}
+
+	cell = afs_lookup_cell(net, name, len, NULL,
+			       AFS_LOOKUP_CELL_DYNROOT,
+			       afs_cell_trace_use_lookup_dynroot);
+	if (IS_ERR(cell)) {
+		ret = PTR_ERR(cell);
+		goto out_no_cell;
+	}
+
+	inode = afs_iget_pseudo_dir(dir->i_sb, cell->dynroot_ino * 2 + dotted);
+	if (IS_ERR(inode)) {
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	dentry->d_fsdata = cell;
+	return d_splice_alias(inode, dentry);
+
+out:
+	afs_unuse_cell(cell, afs_cell_trace_unuse_lookup_dynroot);
+out_no_cell:
+	if (!inode)
+		return d_splice_alias(inode, dentry);
+	return ret == -ENOENT ? NULL : ERR_PTR(ret);
+}
+
+/*
+ * Look up an entry in a dynroot directory.
+ */
+static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
+					 unsigned int flags)
+{
+	_enter("%pd", dentry);
+
+	if (flags & LOOKUP_CREATE)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (dentry->d_name.len >= AFSNAMEMAX) {
+		_leave(" = -ENAMETOOLONG");
+		return ERR_PTR(-ENAMETOOLONG);
+	}
+
+	if (dentry->d_name.len == 5 &&
+	    memcmp(dentry->d_name.name, "@cell", 5) == 0)
+		return afs_lookup_atcell(dir, dentry, 2);
+
+	if (dentry->d_name.len == 6 &&
+	    memcmp(dentry->d_name.name, ".@cell", 6) == 0)
+		return afs_lookup_atcell(dir, dentry, 3);
+
+	return afs_dynroot_lookup_cell(dir, dentry, flags);
+}
+
+const struct inode_operations afs_dynroot_inode_operations = {
+	.lookup		= afs_dynroot_lookup,
+};
+
+static void afs_dynroot_d_release(struct dentry *dentry)
+{
+	struct afs_cell *cell = dentry->d_fsdata;
+
+	afs_unuse_cell(cell, afs_cell_trace_unuse_dynroot_mntpt);
+}
+
+/*
+ * Keep @cell symlink dentries around, but only keep cell autodirs when they're
+ * being used.
+ */
+static int afs_dynroot_delete_dentry(const struct dentry *dentry)
+{
+	const struct qstr *name = &dentry->d_name;
+
+	if (name->len == 5 && memcmp(name->name, "@cell", 5) == 0)
+		return 0;
+	if (name->len == 6 && memcmp(name->name, ".@cell", 6) == 0)
+		return 0;
+	return 1;
+}
+
+const struct dentry_operations afs_dynroot_dentry_operations = {
+	.d_delete	= afs_dynroot_delete_dentry,
+	.d_release	= afs_dynroot_d_release,
+	.d_automount	= afs_d_automount,
+};
+
+static void afs_atcell_delayed_put_cell(void *arg)
+{
+	struct afs_cell *cell = arg;
+
+	afs_put_cell(cell, afs_cell_trace_put_atcell);
+}
+
+/*
+ * Read @cell or .@cell symlinks.
+ */
+static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inode,
+				       struct delayed_call *done)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_cell *cell;
+	struct afs_net *net = afs_i2net(inode);
+	const char *name;
+	bool dotted = vnode->fid.vnode == 3;
+
+	if (!rcu_access_pointer(net->ws_cell))
+		return ERR_PTR(-ENOENT);
+
+	if (!dentry) {
+		/* We're in RCU-pathwalk. */
+		cell = rcu_dereference(net->ws_cell);
+		if (dotted)
+			name = cell->name - 1;
+		else
+			name = cell->name;
+		/* Shouldn't need to set a delayed call. */
+		return name;
+	}
+
+	down_read(&net->cells_lock);
+
+	cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
+	if (dotted)
+		name = cell->name - 1;
+	else
+		name = cell->name;
+	afs_get_cell(cell, afs_cell_trace_get_atcell);
+	set_delayed_call(done, afs_atcell_delayed_put_cell, cell);
+
+	up_read(&net->cells_lock);
+	return name;
+}
+
+static const struct inode_operations afs_atcell_inode_operations = {
+	.get_link	= afs_atcell_get_link,
+};
+
+/*
+ * Create an inode for the @cell or .@cell symlinks.
+ */
+static struct dentry *afs_lookup_atcell(struct inode *dir, struct dentry *dentry, ino_t ino)
+{
+	struct afs_vnode *vnode;
+	struct inode *inode;
+	struct afs_fid fid = { .vnode = ino, .unique = 1, };
+
+	inode = iget5_locked(dir->i_sb, fid.vnode,
+			     afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	vnode = AFS_FS_I(inode);
+
+	if (inode_state_read_once(inode) & I_NEW) {
+		netfs_inode_init(&vnode->netfs, NULL, false);
+		simple_inode_init_ts(inode);
+		set_nlink(inode, 1);
+		inode->i_size		= 0;
+		inode->i_mode		= S_IFLNK | 0555;
+		inode->i_op		= &afs_atcell_inode_operations;
+		inode->i_uid		= GLOBAL_ROOT_UID;
+		inode->i_gid		= GLOBAL_ROOT_GID;
+		inode->i_blocks		= 0;
+		inode->i_generation	= 0;
+		inode->i_flags		|= S_NOATIME;
+
+		unlock_new_inode(inode);
+	}
+	return d_splice_alias(inode, dentry);
+}
+
+/*
+ * Transcribe the cell database into readdir content under the RCU read lock.
+ * Each cell produces two entries, one prefixed with a dot and one not.
+ */
+static int afs_dynroot_readdir_cells(struct afs_net *net, struct dir_context *ctx)
+{
+	const struct afs_cell *cell;
+	loff_t newpos;
+
+	_enter("%llu", ctx->pos);
+
+	for (;;) {
+		unsigned int ix = ctx->pos >> 1;
+
+		cell = idr_get_next(&net->cells_dyn_ino, &ix);
+		if (!cell)
+			return 0;
+		if (READ_ONCE(cell->state) == AFS_CELL_REMOVING ||
+		    READ_ONCE(cell->state) == AFS_CELL_DEAD) {
+			ctx->pos += 2;
+			ctx->pos &= ~1;
+			continue;
+		}
+
+		newpos = ix << 1;
+		if (newpos > ctx->pos)
+			ctx->pos = newpos;
+
+		_debug("pos %llu -> cell %u", ctx->pos, cell->dynroot_ino);
+
+		if ((ctx->pos & 1) == 0) {
+			if (!dir_emit(ctx, cell->name, cell->name_len,
+				      cell->dynroot_ino, DT_DIR))
+				return 0;
+			ctx->pos++;
+		}
+		if ((ctx->pos & 1) == 1) {
+			if (!dir_emit(ctx, cell->name - 1, cell->name_len + 1,
+				      cell->dynroot_ino + 1, DT_DIR))
+				return 0;
+			ctx->pos++;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Read the AFS dynamic root directory.  This produces a list of cellnames,
+ * dotted and undotted, along with @cell and .@cell links if configured.
+ */
+static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct afs_net *net = afs_d2net(file->f_path.dentry);
+	int ret = 0;
+
+	if (!dir_emit_dots(file, ctx))
+		return 0;
+
+	if (ctx->pos == 2) {
+		if (rcu_access_pointer(net->ws_cell) &&
+		    !dir_emit(ctx, "@cell", 5, 2, DT_LNK))
+			return 0;
+		ctx->pos = 3;
+	}
+	if (ctx->pos == 3) {
+		if (rcu_access_pointer(net->ws_cell) &&
+		    !dir_emit(ctx, ".@cell", 6, 3, DT_LNK))
+			return 0;
+		ctx->pos = 4;
+	}
+
+	if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) {
+		down_read(&net->cells_lock);
+		ret = afs_dynroot_readdir_cells(net, ctx);
+		up_read(&net->cells_lock);
+	}
+	return ret;
+}
+
+static const struct file_operations afs_dynroot_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.iterate_shared	= afs_dynroot_readdir,
+	.fsync		= noop_fsync,
+};
+
+/*
+ * Create an inode for a dynamic root directory.
+ */
+struct inode *afs_dynroot_iget_root(struct super_block *sb)
+{
+	struct afs_super_info *as = AFS_FS_S(sb);
+	struct afs_vnode *vnode;
+	struct inode *inode;
+	struct afs_fid fid = { .vid = 0, .vnode = 1, .unique = 1,};
+
+	if (as->volume)
+		fid.vid = as->volume->vid;
+
+	inode = iget5_locked(sb, fid.vnode,
+			     afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	vnode = AFS_FS_I(inode);
+
+	/* there shouldn't be an existing inode */
+	if (inode_state_read_once(inode) & I_NEW) {
+		netfs_inode_init(&vnode->netfs, NULL, false);
+		simple_inode_init_ts(inode);
+		set_nlink(inode, 2);
+		inode->i_size		= 0;
+		inode->i_mode		= S_IFDIR | 0555;
+		inode->i_op		= &afs_dynroot_inode_operations;
+		inode->i_fop		= &afs_dynroot_file_operations;
+		inode->i_uid		= GLOBAL_ROOT_UID;
+		inode->i_gid		= GLOBAL_ROOT_GID;
+		inode->i_blocks		= 0;
+		inode->i_generation	= 0;
+		inode->i_flags		|= S_NOATIME;
+
+		set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
+		unlock_new_inode(inode);
+	}
+	_leave(" = %p", inode);
+	return inode;
+}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 66d50fe2ee45..f66a92294284 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS filesystem file handling
  *
  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -16,27 +12,32 @@
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/gfp.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/netfs.h>
+#include <trace/events/netfs.h>
 #include "internal.h"
 
-static int afs_readpage(struct file *file, struct page *page);
-static void afs_invalidatepage(struct page *page, unsigned int offset,
-			       unsigned int length);
-static int afs_releasepage(struct page *page, gfp_t gfp_flags);
-static int afs_launder_page(struct page *page);
+static int afs_file_mmap_prepare(struct vm_area_desc *desc);
 
-static int afs_readpages(struct file *filp, struct address_space *mapping,
-			 struct list_head *pages, unsigned nr_pages);
+static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
+				    struct pipe_inode_info *pipe,
+				    size_t len, unsigned int flags);
+static void afs_vm_open(struct vm_area_struct *area);
+static void afs_vm_close(struct vm_area_struct *area);
+static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff);
 
 const struct file_operations afs_file_operations = {
 	.open		= afs_open,
 	.release	= afs_release,
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= generic_file_aio_read,
-	.aio_write	= afs_file_write,
-	.mmap		= generic_file_readonly_mmap,
-	.splice_read	= generic_file_splice_read,
+	.read_iter	= afs_file_read_iter,
+	.write_iter	= netfs_file_write_iter,
+	.mmap_prepare	= afs_file_mmap_prepare,
+	.splice_read	= afs_file_splice_read,
+	.splice_write	= iter_file_splice_write,
 	.fsync		= afs_fsync,
 	.lock		= afs_lock,
 	.flock		= afs_flock,
@@ -48,45 +49,120 @@ const struct inode_operations afs_file_inode_operations = {
 	.permission	= afs_permission,
 };
 
-const struct address_space_operations afs_fs_aops = {
-	.readpage	= afs_readpage,
-	.readpages	= afs_readpages,
-	.set_page_dirty	= afs_set_page_dirty,
-	.launder_page	= afs_launder_page,
-	.releasepage	= afs_releasepage,
-	.invalidatepage	= afs_invalidatepage,
-	.write_begin	= afs_write_begin,
-	.write_end	= afs_write_end,
-	.writepage	= afs_writepage,
+const struct address_space_operations afs_file_aops = {
+	.direct_IO	= noop_direct_IO,
+	.read_folio	= netfs_read_folio,
+	.readahead	= netfs_readahead,
+	.dirty_folio	= netfs_dirty_folio,
+	.release_folio	= netfs_release_folio,
+	.invalidate_folio = netfs_invalidate_folio,
+	.migrate_folio	= filemap_migrate_folio,
 	.writepages	= afs_writepages,
 };
 
+static const struct vm_operations_struct afs_vm_ops = {
+	.open		= afs_vm_open,
+	.close		= afs_vm_close,
+	.fault		= filemap_fault,
+	.map_pages	= afs_vm_map_pages,
+	.page_mkwrite	= afs_page_mkwrite,
+};
+
+/*
+ * Discard a pin on a writeback key.
+ */
+void afs_put_wb_key(struct afs_wb_key *wbk)
+{
+	if (wbk && refcount_dec_and_test(&wbk->usage)) {
+		key_put(wbk->key);
+		kfree(wbk);
+	}
+}
+
+/*
+ * Cache key for writeback.
+ */
+int afs_cache_wb_key(struct afs_vnode *vnode, struct afs_file *af)
+{
+	struct afs_wb_key *wbk, *p;
+
+	wbk = kzalloc(sizeof(struct afs_wb_key), GFP_KERNEL);
+	if (!wbk)
+		return -ENOMEM;
+	refcount_set(&wbk->usage, 2);
+	wbk->key = af->key;
+
+	spin_lock(&vnode->wb_lock);
+	list_for_each_entry(p, &vnode->wb_keys, vnode_link) {
+		if (p->key == wbk->key)
+			goto found;
+	}
+
+	key_get(wbk->key);
+	list_add_tail(&wbk->vnode_link, &vnode->wb_keys);
+	spin_unlock(&vnode->wb_lock);
+	af->wb = wbk;
+	return 0;
+
+found:
+	refcount_inc(&p->usage);
+	spin_unlock(&vnode->wb_lock);
+	af->wb = p;
+	kfree(wbk);
+	return 0;
+}
+
 /*
  * open an AFS file or directory and attach a key to it
  */
 int afs_open(struct inode *inode, struct file *file)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_file *af;
 	struct key *key;
 	int ret;
 
-	_enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
 
 	key = afs_request_key(vnode->volume->cell);
 	if (IS_ERR(key)) {
-		_leave(" = %ld [key]", PTR_ERR(key));
-		return PTR_ERR(key);
+		ret = PTR_ERR(key);
+		goto error;
 	}
 
+	af = kzalloc(sizeof(*af), GFP_KERNEL);
+	if (!af) {
+		ret = -ENOMEM;
+		goto error_key;
+	}
+	af->key = key;
+
 	ret = afs_validate(vnode, key);
-	if (ret < 0) {
-		_leave(" = %d [val]", ret);
-		return ret;
+	if (ret < 0)
+		goto error_af;
+
+	if (file->f_mode & FMODE_WRITE) {
+		ret = afs_cache_wb_key(vnode, af);
+		if (ret < 0)
+			goto error_af;
 	}
 
-	file->private_data = key;
+	if (file->f_flags & O_TRUNC)
+		set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+
+	fscache_use_cookie(afs_vnode_cache(vnode), file->f_mode & FMODE_WRITE);
+
+	file->private_data = af;
 	_leave(" = 0");
 	return 0;
+
+error_af:
+	kfree(af);
+error_key:
+	key_put(key);
+error:
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
@@ -94,287 +170,397 @@ int afs_open(struct inode *inode, struct file *file)
  */
 int afs_release(struct inode *inode, struct file *file)
 {
+	struct afs_vnode_cache_aux aux;
 	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_file *af = file->private_data;
+	loff_t i_size;
+	int ret = 0;
 
-	_enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
 
-	key_put(file->private_data);
-	_leave(" = 0");
-	return 0;
+	if ((file->f_mode & FMODE_WRITE))
+		ret = vfs_fsync(file, 0);
+
+	file->private_data = NULL;
+	if (af->wb)
+		afs_put_wb_key(af->wb);
+
+	if ((file->f_mode & FMODE_WRITE)) {
+		i_size = i_size_read(&vnode->netfs.inode);
+		afs_set_cache_aux(vnode, &aux);
+		fscache_unuse_cookie(afs_vnode_cache(vnode), &aux, &i_size);
+	} else {
+		fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL);
+	}
+
+	key_put(af->key);
+	kfree(af);
+	afs_prune_wb_keys(vnode);
+	_leave(" = %d", ret);
+	return ret;
 }
 
-#ifdef CONFIG_AFS_FSCACHE
-/*
- * deal with notification that a page was read from the cache
- */
-static void afs_file_readpage_read_complete(struct page *page,
-					    void *data,
-					    int error)
+static void afs_fetch_data_notify(struct afs_operation *op)
 {
-	_enter("%p,%p,%d", page, data, error);
+	struct netfs_io_subrequest *subreq = op->fetch.subreq;
 
-	/* if the read completes with an error, we just unlock the page and let
-	 * the VM reissue the readpage */
-	if (!error)
-		SetPageUptodate(page);
-	unlock_page(page);
+	subreq->error = afs_op_error(op);
+	netfs_read_subreq_terminated(subreq);
 }
-#endif
 
-/*
- * read page from file, directory or symlink, given a key to use
- */
-int afs_page_filler(void *data, struct page *page)
+static void afs_fetch_data_success(struct afs_operation *op)
 {
-	struct inode *inode = page->mapping->host;
-	struct afs_vnode *vnode = AFS_FS_I(inode);
-	struct key *key = data;
-	size_t len;
-	off_t offset;
-	int ret;
+	struct afs_vnode *vnode = op->file[0].vnode;
 
-	_enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index);
+	_enter("op=%08x", op->debug_id);
+	afs_vnode_commit_status(op, &op->file[0]);
+	afs_stat_v(vnode, n_fetches);
+	atomic_long_add(op->fetch.subreq->transferred, &op->net->n_fetch_bytes);
+	afs_fetch_data_notify(op);
+}
 
-	BUG_ON(!PageLocked(page));
+static void afs_fetch_data_aborted(struct afs_operation *op)
+{
+	afs_check_for_remote_deletion(op);
+	afs_fetch_data_notify(op);
+}
 
-	ret = -ESTALE;
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-		goto error;
+const struct afs_operation_ops afs_fetch_data_operation = {
+	.issue_afs_rpc	= afs_fs_fetch_data,
+	.issue_yfs_rpc	= yfs_fs_fetch_data,
+	.success	= afs_fetch_data_success,
+	.aborted	= afs_fetch_data_aborted,
+	.failed		= afs_fetch_data_notify,
+};
 
-	/* is it cached? */
-#ifdef CONFIG_AFS_FSCACHE
-	ret = fscache_read_or_alloc_page(vnode->cache,
-					 page,
-					 afs_file_readpage_read_complete,
-					 NULL,
-					 GFP_KERNEL);
-#else
-	ret = -ENOBUFS;
-#endif
-	switch (ret) {
-		/* read BIO submitted (page in cache) */
-	case 0:
-		break;
+static void afs_issue_read_call(struct afs_operation *op)
+{
+	op->call_responded = false;
+	op->call_error = 0;
+	op->call_abort_code = 0;
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags))
+		yfs_fs_fetch_data(op);
+	else
+		afs_fs_fetch_data(op);
+}
 
-		/* page not yet cached */
-	case -ENODATA:
-		_debug("cache said ENODATA");
-		goto go_on;
+static void afs_end_read(struct afs_operation *op)
+{
+	if (op->call_responded && op->server)
+		set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags);
+
+	if (!afs_op_error(op))
+		afs_fetch_data_success(op);
+	else if (op->cumul_error.aborted)
+		afs_fetch_data_aborted(op);
+	else
+		afs_fetch_data_notify(op);
+
+	afs_end_vnode_operation(op);
+	afs_put_operation(op);
+}
 
-		/* page will not be cached */
-	case -ENOBUFS:
-		_debug("cache said ENOBUFS");
-	default:
-	go_on:
-		offset = page->index << PAGE_CACHE_SHIFT;
-		len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE);
-
-		/* read the contents of the file from the server into the
-		 * page */
-		ret = afs_vnode_fetch_data(vnode, key, offset, len, page);
-		if (ret < 0) {
-			if (ret == -ENOENT) {
-				_debug("got NOENT from server"
-				       " - marking file deleted and stale");
-				set_bit(AFS_VNODE_DELETED, &vnode->flags);
-				ret = -ESTALE;
-			}
-
-#ifdef CONFIG_AFS_FSCACHE
-			fscache_uncache_page(vnode->cache, page);
-#endif
-			BUG_ON(PageFsCache(page));
-			goto error;
-		}
+/*
+ * Perform I/O processing on an asynchronous call.  The work item carries a ref
+ * to the call struct that we either need to release or to pass on.
+ */
+static void afs_read_receive(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	enum afs_call_state state;
 
-		SetPageUptodate(page);
+	_enter("");
 
-		/* send the page to the cache */
-#ifdef CONFIG_AFS_FSCACHE
-		if (PageFsCache(page) &&
-		    fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) {
-			fscache_uncache_page(vnode->cache, page);
-			BUG_ON(PageFsCache(page));
-		}
-#endif
-		unlock_page(page);
+	state = READ_ONCE(call->state);
+	if (state == AFS_CALL_COMPLETE)
+		return;
+	trace_afs_read_recv(op, call);
+
+	while (state < AFS_CALL_COMPLETE && READ_ONCE(call->need_attention)) {
+		WRITE_ONCE(call->need_attention, false);
+		afs_deliver_to_call(call);
+		state = READ_ONCE(call->state);
 	}
 
-	_leave(" = 0");
-	return 0;
+	if (state < AFS_CALL_COMPLETE) {
+		netfs_read_subreq_progress(op->fetch.subreq);
+		if (rxrpc_kernel_check_life(call->net->socket, call->rxcall))
+			return;
+		/* rxrpc terminated the call. */
+		afs_set_call_complete(call, call->error, call->abort_code);
+	}
 
-error:
-	SetPageError(page);
-	unlock_page(page);
-	_leave(" = %d", ret);
-	return ret;
+	op->call_abort_code	= call->abort_code;
+	op->call_error		= call->error;
+	op->call_responded	= call->responded;
+	op->call		= NULL;
+	call->op		= NULL;
+	afs_put_call(call);
+
+	/* If the call failed, then we need to crank the server rotation
+	 * handle and try the next.
+	 */
+	if (afs_select_fileserver(op)) {
+		afs_issue_read_call(op);
+		return;
+	}
+
+	afs_end_read(op);
 }
 
-/*
- * read page from file, directory or symlink, given a file to nominate the key
- * to be used
- */
-static int afs_readpage(struct file *file, struct page *page)
+void afs_fetch_data_async_rx(struct work_struct *work)
 {
-	struct key *key;
-	int ret;
+	struct afs_call *call = container_of(work, struct afs_call, async_work);
 
-	if (file) {
-		key = file->private_data;
-		ASSERT(key != NULL);
-		ret = afs_page_filler(key, page);
-	} else {
-		struct inode *inode = page->mapping->host;
-		key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
-		if (IS_ERR(key)) {
-			ret = PTR_ERR(key);
-		} else {
-			ret = afs_page_filler(key, page);
-			key_put(key);
-		}
+	afs_read_receive(call);
+	afs_put_call(call);
+}
+
+void afs_fetch_data_immediate_cancel(struct afs_call *call)
+{
+	if (call->async) {
+		afs_get_call(call, afs_call_trace_wake);
+		if (!queue_work(afs_async_calls, &call->async_work))
+			afs_deferred_put_call(call);
+		flush_work(&call->async_work);
 	}
-	return ret;
 }
 
 /*
- * read a set of pages
+ * Fetch file data from the volume.
  */
-static int afs_readpages(struct file *file, struct address_space *mapping,
-			 struct list_head *pages, unsigned nr_pages)
+static void afs_issue_read(struct netfs_io_subrequest *subreq)
 {
-	struct key *key = file->private_data;
-	struct afs_vnode *vnode;
-	int ret = 0;
+	struct afs_operation *op;
+	struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
+	struct key *key = subreq->rreq->netfs_priv;
+
+	_enter("%s{%llx:%llu.%u},%x,,,",
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key));
+
+	op = afs_alloc_operation(key, vnode->volume);
+	if (IS_ERR(op)) {
+		subreq->error = PTR_ERR(op);
+		netfs_read_subreq_terminated(subreq);
+		return;
+	}
 
-	_enter("{%d},{%lu},,%d",
-	       key_serial(key), mapping->host->i_ino, nr_pages);
+	afs_op_set_vnode(op, 0, vnode);
+
+	op->fetch.subreq = subreq;
+	op->ops		= &afs_fetch_data_operation;
+
+	trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+
+	if (subreq->rreq->origin == NETFS_READAHEAD ||
+	    subreq->rreq->iocb) {
+		op->flags |= AFS_OPERATION_ASYNC;
+
+		if (!afs_begin_vnode_operation(op)) {
+			subreq->error = afs_put_operation(op);
+			netfs_read_subreq_terminated(subreq);
+			return;
+		}
 
-	ASSERT(key != NULL);
+		if (!afs_select_fileserver(op)) {
+			afs_end_read(op);
+			return;
+		}
 
-	vnode = AFS_FS_I(mapping->host);
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-		_leave(" = -ESTALE");
-		return -ESTALE;
+		afs_issue_read_call(op);
+	} else {
+		afs_do_sync_operation(op);
 	}
+}
 
-	/* attempt to read as many of the pages as possible */
-#ifdef CONFIG_AFS_FSCACHE
-	ret = fscache_read_or_alloc_pages(vnode->cache,
-					  mapping,
-					  pages,
-					  &nr_pages,
-					  afs_file_readpage_read_complete,
-					  NULL,
-					  mapping_gfp_mask(mapping));
-#else
-	ret = -ENOBUFS;
-#endif
-
-	switch (ret) {
-		/* all pages are being read from the cache */
-	case 0:
-		BUG_ON(!list_empty(pages));
-		BUG_ON(nr_pages != 0);
-		_leave(" = 0 [reading all]");
-		return 0;
-
-		/* there were pages that couldn't be read from the cache */
-	case -ENODATA:
-	case -ENOBUFS:
-		break;
+static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
+{
+	struct afs_vnode *vnode = AFS_FS_I(rreq->inode);
 
-		/* other error */
+	if (file)
+		rreq->netfs_priv = key_get(afs_file_key(file));
+	rreq->rsize = 256 * 1024;
+	rreq->wsize = 256 * 1024 * 1024;
+
+	switch (rreq->origin) {
+	case NETFS_READ_SINGLE:
+		if (!file) {
+			struct key *key = afs_request_key(vnode->volume->cell);
+
+			if (IS_ERR(key))
+				return PTR_ERR(key);
+			rreq->netfs_priv = key;
+		}
+		break;
+	case NETFS_WRITEBACK:
+	case NETFS_WRITETHROUGH:
+	case NETFS_UNBUFFERED_WRITE:
+	case NETFS_DIO_WRITE:
+		if (S_ISREG(rreq->inode->i_mode))
+			rreq->io_streams[0].avail = true;
+		break;
+	case NETFS_WRITEBACK_SINGLE:
 	default:
-		_leave(" = %d", ret);
-		return ret;
+		break;
 	}
+	return 0;
+}
 
-	/* load the missing pages from the network */
-	ret = read_cache_pages(mapping, pages, afs_page_filler, key);
+static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
+				 struct folio **foliop, void **_fsdata)
+{
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 
-	_leave(" = %d [netting]", ret);
-	return ret;
+	return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
 }
 
-/*
- * write back a dirty page
- */
-static int afs_launder_page(struct page *page)
+static void afs_free_request(struct netfs_io_request *rreq)
 {
-	_enter("{%lu}", page->index);
+	key_put(rreq->netfs_priv);
+	afs_put_wb_key(rreq->netfs_priv2);
+}
 
-	return 0;
+static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	loff_t i_size;
+
+	write_seqlock(&vnode->cb_lock);
+	i_size = i_size_read(&vnode->netfs.inode);
+	if (new_i_size > i_size) {
+		i_size_write(&vnode->netfs.inode, new_i_size);
+		inode_set_bytes(&vnode->netfs.inode, new_i_size);
+	}
+	write_sequnlock(&vnode->cb_lock);
+	fscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size);
 }
 
-/*
- * invalidate part or all of a page
- * - release a page and clean up its private data if offset is 0 (indicating
- *   the entire page)
- */
-static void afs_invalidatepage(struct page *page, unsigned int offset,
-			       unsigned int length)
+static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq)
 {
-	struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
+	struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
 
-	_enter("{%lu},%u,%u", page->index, offset, length);
+	afs_invalidate_cache(vnode, 0);
+}
 
-	BUG_ON(!PageLocked(page));
+const struct netfs_request_ops afs_req_ops = {
+	.init_request		= afs_init_request,
+	.free_request		= afs_free_request,
+	.check_write_begin	= afs_check_write_begin,
+	.issue_read		= afs_issue_read,
+	.update_i_size		= afs_update_i_size,
+	.invalidate_cache	= afs_netfs_invalidate_cache,
+	.begin_writeback	= afs_begin_writeback,
+	.prepare_write		= afs_prepare_write,
+	.issue_write		= afs_issue_write,
+	.retry_request		= afs_retry_request,
+};
 
-	/* we clean up only if the entire page is being invalidated */
-	if (offset == 0 && length == PAGE_CACHE_SIZE) {
-#ifdef CONFIG_AFS_FSCACHE
-		if (PageFsCache(page)) {
-			struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
-			fscache_wait_on_page_write(vnode->cache, page);
-			fscache_uncache_page(vnode->cache, page);
-		}
-#endif
+static void afs_add_open_mmap(struct afs_vnode *vnode)
+{
+	if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) {
+		down_write(&vnode->volume->open_mmaps_lock);
 
-		if (PagePrivate(page)) {
-			if (wb && !PageWriteback(page)) {
-				set_page_private(page, 0);
-				afs_put_writeback(wb);
-			}
+		if (list_empty(&vnode->cb_mmap_link))
+			list_add_tail(&vnode->cb_mmap_link, &vnode->volume->open_mmaps);
 
-			if (!page_private(page))
-				ClearPagePrivate(page);
-		}
+		up_write(&vnode->volume->open_mmaps_lock);
 	}
+}
+
+static void afs_drop_open_mmap(struct afs_vnode *vnode)
+{
+	if (atomic_add_unless(&vnode->cb_nr_mmap, -1, 1))
+		return;
 
-	_leave("");
+	down_write(&vnode->volume->open_mmaps_lock);
+
+	read_seqlock_excl(&vnode->cb_lock);
+	// the only place where ->cb_nr_mmap may hit 0
+	// see __afs_break_callback() for the other side...
+	if (atomic_dec_and_test(&vnode->cb_nr_mmap))
+		list_del_init(&vnode->cb_mmap_link);
+	read_sequnlock_excl(&vnode->cb_lock);
+
+	up_write(&vnode->volume->open_mmaps_lock);
+	flush_work(&vnode->cb_work);
 }
 
 /*
- * release a page and clean up its private state if it's not busy
- * - return true if the page can now be released, false if not
+ * Handle setting up a memory mapping on an AFS file.
  */
-static int afs_releasepage(struct page *page, gfp_t gfp_flags)
+static int afs_file_mmap_prepare(struct vm_area_desc *desc)
 {
-	struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
-	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
-
-	_enter("{{%x:%u}[%lu],%lx},%x",
-	       vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
-	       gfp_flags);
-
-	/* deny if page is being written to the cache and the caller hasn't
-	 * elected to wait */
-#ifdef CONFIG_AFS_FSCACHE
-	if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) {
-		_leave(" = F [cache busy]");
-		return 0;
-	}
-#endif
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(desc->file));
+	int ret;
 
-	if (PagePrivate(page)) {
-		if (wb) {
-			set_page_private(page, 0);
-			afs_put_writeback(wb);
-		}
-		ClearPagePrivate(page);
-	}
+	afs_add_open_mmap(vnode);
+
+	ret = generic_file_mmap_prepare(desc);
+	if (ret == 0)
+		desc->vm_ops = &afs_vm_ops;
+	else
+		afs_drop_open_mmap(vnode);
+	return ret;
+}
+
+static void afs_vm_open(struct vm_area_struct *vma)
+{
+	afs_add_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
+}
+
+static void afs_vm_close(struct vm_area_struct *vma)
+{
+	afs_drop_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
+}
+
+static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff)
+{
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file));
+
+	if (afs_check_validity(vnode))
+		return filemap_map_pages(vmf, start_pgoff, end_pgoff);
+	return 0;
+}
+
+static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_file *af = iocb->ki_filp->private_data;
+	ssize_t ret;
+
+	if (iocb->ki_flags & IOCB_DIRECT)
+		return netfs_unbuffered_read_iter(iocb, iter);
 
-	/* indicate that the page can be released */
-	_leave(" = T");
-	return 1;
+	ret = netfs_start_io_read(inode);
+	if (ret < 0)
+		return ret;
+	ret = afs_validate(vnode, af->key);
+	if (ret == 0)
+		ret = filemap_read(iocb, iter, 0);
+	netfs_end_io_read(inode);
+	return ret;
+}
+
+static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
+				    struct pipe_inode_info *pipe,
+				    size_t len, unsigned int flags)
+{
+	struct inode *inode = file_inode(in);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_file *af = in->private_data;
+	ssize_t ret;
+
+	ret = netfs_start_io_read(inode);
+	if (ret < 0)
+		return ret;
+	ret = afs_validate(vnode, af->key);
+	if (ret == 0)
+		ret = filemap_splice_read(in, ppos, pipe, len, flags);
+	netfs_end_io_read(inode);
+	return ret;
 }
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index a8cf2cff836c..f0e96a35093f 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -1,68 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS file locking support
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include "internal.h"
 
 #define AFS_LOCK_GRANTED	0
 #define AFS_LOCK_PENDING	1
+#define AFS_LOCK_YOUR_TRY	2
+
+struct workqueue_struct *afs_lock_manager;
 
+static void afs_next_locker(struct afs_vnode *vnode, int error);
 static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
 static void afs_fl_release_private(struct file_lock *fl);
 
-static struct workqueue_struct *afs_lock_manager;
-static DEFINE_MUTEX(afs_lock_manager_mutex);
-
 static const struct file_lock_operations afs_lock_ops = {
 	.fl_copy_lock		= afs_fl_copy_lock,
 	.fl_release_private	= afs_fl_release_private,
 };
 
-/*
- * initialise the lock manager thread if it isn't already running
- */
-static int afs_init_lock_manager(void)
+static inline void afs_set_lock_state(struct afs_vnode *vnode, enum afs_lock_state state)
 {
-	int ret;
-
-	ret = 0;
-	if (!afs_lock_manager) {
-		mutex_lock(&afs_lock_manager_mutex);
-		if (!afs_lock_manager) {
-			afs_lock_manager =
-				create_singlethread_workqueue("kafs_lockd");
-			if (!afs_lock_manager)
-				ret = -ENOMEM;
-		}
-		mutex_unlock(&afs_lock_manager_mutex);
-	}
-	return ret;
+	_debug("STATE %u -> %u", vnode->lock_state, state);
+	vnode->lock_state = state;
 }
 
-/*
- * destroy the lock manager thread if it's running
- */
-void __exit afs_kill_lock_manager(void)
-{
-	if (afs_lock_manager)
-		destroy_workqueue(afs_lock_manager);
-}
+static atomic_t afs_file_lock_debug_id;
 
 /*
  * if the callback is broken on this vnode, then the lock may now be available
  */
 void afs_lock_may_be_available(struct afs_vnode *vnode)
 {
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
-	queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
+	spin_lock(&vnode->lock);
+	if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
+		afs_next_locker(vnode, 0);
+	trace_afs_flock_ev(vnode, NULL, afs_flock_callback_break, 0);
+	spin_unlock(&vnode->lock);
 }
 
 /*
@@ -71,8 +50,36 @@ void afs_lock_may_be_available(struct afs_vnode *vnode)
  */
 static void afs_schedule_lock_extension(struct afs_vnode *vnode)
 {
-	queue_delayed_work(afs_lock_manager, &vnode->lock_work,
-			   AFS_LOCKWAIT * HZ / 2);
+	ktime_t expires_at, now, duration;
+	u64 duration_j;
+
+	expires_at = ktime_add_ms(vnode->locked_at, AFS_LOCKWAIT * 1000 / 2);
+	now = ktime_get_real();
+	duration = ktime_sub(expires_at, now);
+	if (duration <= 0)
+		duration_j = 0;
+	else
+		duration_j = nsecs_to_jiffies(ktime_to_ns(duration));
+
+	queue_delayed_work(afs_lock_manager, &vnode->lock_work, duration_j);
+}
+
+/*
+ * In the case of successful completion of a lock operation, record the time
+ * the reply appeared and start the lock extension timer.
+ */
+void afs_lock_op_done(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode *vnode = op->file[0].vnode;
+
+	if (call->error == 0) {
+		spin_lock(&vnode->lock);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
+		vnode->locked_at = call->issue_time;
+		afs_schedule_lock_extension(vnode);
+		spin_unlock(&vnode->lock);
+	}
 }
 
 /*
@@ -80,22 +87,190 @@ static void afs_schedule_lock_extension(struct afs_vnode *vnode)
  * first lock in the queue is itself a readlock)
  * - the caller must hold the vnode lock
  */
-static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl)
+static void afs_grant_locks(struct afs_vnode *vnode)
 {
 	struct file_lock *p, *_p;
+	bool exclusive = (vnode->lock_type == AFS_LOCK_WRITE);
 
-	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
-	if (fl->fl_type == F_RDLCK) {
-		list_for_each_entry_safe(p, _p, &vnode->pending_locks,
-					 fl_u.afs.link) {
-			if (p->fl_type == F_RDLCK) {
-				p->fl_u.afs.state = AFS_LOCK_GRANTED;
-				list_move_tail(&p->fl_u.afs.link,
-					       &vnode->granted_locks);
-				wake_up(&p->fl_wait);
-			}
+	list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+		if (!exclusive && lock_is_write(p))
+			continue;
+
+		list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks);
+		p->fl_u.afs.state = AFS_LOCK_GRANTED;
+		trace_afs_flock_op(vnode, p, afs_flock_op_grant);
+		locks_wake_up(p);
+	}
+}
+
+/*
+ * If an error is specified, reject every pending lock that matches the
+ * authentication and type of the lock we failed to get.  If there are any
+ * remaining lockers, try to wake up one of them to have a go.
+ */
+static void afs_next_locker(struct afs_vnode *vnode, int error)
+{
+	struct file_lock *p, *_p, *next = NULL;
+	struct key *key = vnode->lock_key;
+	unsigned int type = F_RDLCK;
+
+	_enter("");
+
+	if (vnode->lock_type == AFS_LOCK_WRITE)
+		type = F_WRLCK;
+
+	list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+		if (error &&
+		    p->c.flc_type == type &&
+		    afs_file_key(p->c.flc_file) == key) {
+			list_del_init(&p->fl_u.afs.link);
+			p->fl_u.afs.state = error;
+			locks_wake_up(p);
 		}
+
+		/* Select the next locker to hand off to. */
+		if (next && (lock_is_write(next) || lock_is_read(p)))
+			continue;
+		next = p;
+	}
+
+	vnode->lock_key = NULL;
+	key_put(key);
+
+	if (next) {
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+		next->fl_u.afs.state = AFS_LOCK_YOUR_TRY;
+		trace_afs_flock_op(vnode, next, afs_flock_op_wake);
+		locks_wake_up(next);
+	} else {
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_no_lockers, 0);
 	}
+
+	_leave("");
+}
+
+/*
+ * Kill off all waiters in the the pending lock queue due to the vnode being
+ * deleted.
+ */
+static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
+{
+	struct file_lock *p;
+
+	afs_set_lock_state(vnode, AFS_VNODE_LOCK_DELETED);
+
+	while (!list_empty(&vnode->pending_locks)) {
+		p = list_entry(vnode->pending_locks.next,
+			       struct file_lock, fl_u.afs.link);
+		list_del_init(&p->fl_u.afs.link);
+		p->fl_u.afs.state = -ENOENT;
+		locks_wake_up(p);
+	}
+
+	key_put(vnode->lock_key);
+	vnode->lock_key = NULL;
+}
+
+static void afs_lock_success(struct afs_operation *op)
+{
+	_enter("op=%08x", op->debug_id);
+	afs_vnode_commit_status(op, &op->file[0]);
+}
+
+static const struct afs_operation_ops afs_set_lock_operation = {
+	.issue_afs_rpc	= afs_fs_set_lock,
+	.issue_yfs_rpc	= yfs_fs_set_lock,
+	.success	= afs_lock_success,
+	.aborted	= afs_check_for_remote_deletion,
+};
+
+/*
+ * Get a lock on a file
+ */
+static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
+			afs_lock_type_t type)
+{
+	struct afs_operation *op;
+
+	_enter("%s{%llx:%llu.%u},%x,%u",
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key), type);
+
+	op = afs_alloc_operation(key, vnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	afs_op_set_vnode(op, 0, vnode);
+
+	op->lock.type	= type;
+	op->ops		= &afs_set_lock_operation;
+	return afs_do_sync_operation(op);
+}
+
+static const struct afs_operation_ops afs_extend_lock_operation = {
+	.issue_afs_rpc	= afs_fs_extend_lock,
+	.issue_yfs_rpc	= yfs_fs_extend_lock,
+	.success	= afs_lock_success,
+};
+
+/*
+ * Extend a lock on a file
+ */
+static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
+{
+	struct afs_operation *op;
+
+	_enter("%s{%llx:%llu.%u},%x",
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key));
+
+	op = afs_alloc_operation(key, vnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	afs_op_set_vnode(op, 0, vnode);
+
+	op->flags	|= AFS_OPERATION_UNINTR;
+	op->ops		= &afs_extend_lock_operation;
+	return afs_do_sync_operation(op);
+}
+
+static const struct afs_operation_ops afs_release_lock_operation = {
+	.issue_afs_rpc	= afs_fs_release_lock,
+	.issue_yfs_rpc	= yfs_fs_release_lock,
+	.success	= afs_lock_success,
+};
+
+/*
+ * Release a lock on a file
+ */
+static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
+{
+	struct afs_operation *op;
+
+	_enter("%s{%llx:%llu.%u},%x",
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key));
+
+	op = afs_alloc_operation(key, vnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	afs_op_set_vnode(op, 0, vnode);
+
+	op->flags	|= AFS_OPERATION_UNINTR;
+	op->ops		= &afs_release_lock_operation;
+	return afs_do_sync_operation(op);
 }
 
 /*
@@ -107,126 +282,105 @@ void afs_lock_work(struct work_struct *work)
 {
 	struct afs_vnode *vnode =
 		container_of(work, struct afs_vnode, lock_work.work);
-	struct file_lock *fl;
-	afs_lock_type_t type;
 	struct key *key;
 	int ret;
 
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
 	spin_lock(&vnode->lock);
 
-	if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) {
-		_debug("unlock");
+again:
+	_debug("wstate %u for %p", vnode->lock_state, vnode);
+	switch (vnode->lock_state) {
+	case AFS_VNODE_LOCK_NEED_UNLOCK:
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_UNLOCKING);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_work_unlocking, 0);
 		spin_unlock(&vnode->lock);
 
 		/* attempt to release the server lock; if it fails, we just
-		 * wait 5 minutes and it'll time out anyway */
-		ret = afs_vnode_release_lock(vnode, vnode->unlock_key);
-		if (ret < 0)
+		 * wait 5 minutes and it'll expire anyway */
+		ret = afs_release_lock(vnode, vnode->lock_key);
+		if (ret < 0 && vnode->lock_state != AFS_VNODE_LOCK_DELETED) {
+			trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail,
+					   ret);
 			printk(KERN_WARNING "AFS:"
-			       " Failed to release lock on {%x:%x} error %d\n",
+			       " Failed to release lock on {%llx:%llx} error %d\n",
 			       vnode->fid.vid, vnode->fid.vnode, ret);
+		}
 
 		spin_lock(&vnode->lock);
-		key_put(vnode->unlock_key);
-		vnode->unlock_key = NULL;
-		clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags);
-	}
+		if (ret == -ENOENT)
+			afs_kill_lockers_enoent(vnode);
+		else
+			afs_next_locker(vnode, 0);
+		spin_unlock(&vnode->lock);
+		return;
 
-	/* if we've got a lock, then it must be time to extend that lock as AFS
-	 * locks time out after 5 minutes */
-	if (!list_empty(&vnode->granted_locks)) {
+	/* If we've already got a lock, then it must be time to extend that
+	 * lock as AFS locks time out after 5 minutes.
+	 */
+	case AFS_VNODE_LOCK_GRANTED:
 		_debug("extend");
 
-		if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
-			BUG();
-		fl = list_entry(vnode->granted_locks.next,
-				struct file_lock, fl_u.afs.link);
-		key = key_get(fl->fl_file->private_data);
+		ASSERT(!list_empty(&vnode->granted_locks));
+
+		key = key_get(vnode->lock_key);
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_EXTENDING);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_work_extending, 0);
 		spin_unlock(&vnode->lock);
 
-		ret = afs_vnode_extend_lock(vnode, key);
-		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
+		ret = afs_extend_lock(vnode, key); /* RPC */
 		key_put(key);
-		switch (ret) {
-		case 0:
-			afs_schedule_lock_extension(vnode);
-			break;
-		default:
-			/* ummm... we failed to extend the lock - retry
-			 * extension shortly */
-			printk(KERN_WARNING "AFS:"
-			       " Failed to extend lock on {%x:%x} error %d\n",
-			       vnode->fid.vid, vnode->fid.vnode, ret);
+
+		if (ret < 0) {
+			trace_afs_flock_ev(vnode, NULL, afs_flock_extend_fail,
+					   ret);
+			pr_warn("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
+				vnode->fid.vid, vnode->fid.vnode, ret);
+		}
+
+		spin_lock(&vnode->lock);
+
+		if (ret == -ENOENT) {
+			afs_kill_lockers_enoent(vnode);
+			spin_unlock(&vnode->lock);
+			return;
+		}
+
+		if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
+			goto again;
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
+
+		if (ret != 0)
 			queue_delayed_work(afs_lock_manager, &vnode->lock_work,
 					   HZ * 10);
-			break;
-		}
-		_leave(" [extend]");
+		spin_unlock(&vnode->lock);
+		_leave(" [ext]");
 		return;
-	}
 
-	/* if we don't have a granted lock, then we must've been called back by
-	 * the server, and so if might be possible to get a lock we're
-	 * currently waiting for */
-	if (!list_empty(&vnode->pending_locks)) {
-		_debug("get");
-
-		if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
-			BUG();
-		fl = list_entry(vnode->pending_locks.next,
-				struct file_lock, fl_u.afs.link);
-		key = key_get(fl->fl_file->private_data);
-		type = (fl->fl_type == F_RDLCK) ?
-			AFS_LOCK_READ : AFS_LOCK_WRITE;
+	/* If we're waiting for a callback to indicate lock release, we can't
+	 * actually rely on this, so need to recheck at regular intervals.  The
+	 * problem is that the server might not notify us if the lock just
+	 * expires (say because a client died) rather than being explicitly
+	 * released.
+	 */
+	case AFS_VNODE_LOCK_WAITING_FOR_CB:
+		_debug("retry");
+		afs_next_locker(vnode, 0);
 		spin_unlock(&vnode->lock);
+		return;
 
-		ret = afs_vnode_set_lock(vnode, key, type);
-		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
-		switch (ret) {
-		case -EWOULDBLOCK:
-			_debug("blocked");
-			break;
-		case 0:
-			_debug("acquired");
-			if (type == AFS_LOCK_READ)
-				set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
-			else
-				set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
-			ret = AFS_LOCK_GRANTED;
-		default:
-			spin_lock(&vnode->lock);
-			/* the pending lock may have been withdrawn due to a
-			 * signal */
-			if (list_entry(vnode->pending_locks.next,
-				       struct file_lock, fl_u.afs.link) == fl) {
-				fl->fl_u.afs.state = ret;
-				if (ret == AFS_LOCK_GRANTED)
-					afs_grant_locks(vnode, fl);
-				else
-					list_del_init(&fl->fl_u.afs.link);
-				wake_up(&fl->fl_wait);
-				spin_unlock(&vnode->lock);
-			} else {
-				_debug("withdrawn");
-				clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
-				clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
-				spin_unlock(&vnode->lock);
-				afs_vnode_release_lock(vnode, key);
-				if (!list_empty(&vnode->pending_locks))
-					afs_lock_may_be_available(vnode);
-			}
-			break;
-		}
-		key_put(key);
-		_leave(" [pend]");
+	case AFS_VNODE_LOCK_DELETED:
+		afs_kill_lockers_enoent(vnode);
+		spin_unlock(&vnode->lock);
 		return;
-	}
 
-	/* looks like the lock request was withdrawn on a signal */
-	spin_unlock(&vnode->lock);
-	_leave(" [no locks]");
+	default:
+		/* Looks like a lock request was withdrawn. */
+		spin_unlock(&vnode->lock);
+		_leave(" [no]");
+		return;
+	}
 }
 
 /*
@@ -235,16 +389,60 @@ void afs_lock_work(struct work_struct *work)
  * AF_RXRPC
  * - the caller must hold the vnode lock
  */
-static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
+static void afs_defer_unlock(struct afs_vnode *vnode)
 {
-	cancel_delayed_work(&vnode->lock_work);
-	if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) &&
-	    !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
-		BUG();
-	if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags))
-		BUG();
-	vnode->unlock_key = key_get(key);
-	afs_lock_may_be_available(vnode);
+	_enter("%u", vnode->lock_state);
+
+	if (list_empty(&vnode->granted_locks) &&
+	    (vnode->lock_state == AFS_VNODE_LOCK_GRANTED ||
+	     vnode->lock_state == AFS_VNODE_LOCK_EXTENDING)) {
+		cancel_delayed_work(&vnode->lock_work);
+
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_NEED_UNLOCK);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_defer_unlock, 0);
+		queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
+	}
+}
+
+/*
+ * Check that our view of the file metadata is up to date and check to see
+ * whether we think that we have a locking permit.
+ */
+static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
+			      enum afs_flock_mode mode, afs_lock_type_t type)
+{
+	afs_access_t access;
+	int ret;
+
+	/* Make sure we've got a callback on this file and that our view of the
+	 * data version is up to date.
+	 */
+	ret = afs_validate(vnode, key);
+	if (ret < 0)
+		return ret;
+
+	/* Check the permission set to see if we're actually going to be
+	 * allowed to get a lock on this file.
+	 */
+	ret = afs_check_permit(vnode, key, &access);
+	if (ret < 0)
+		return ret;
+
+	/* At a rough estimation, you need LOCK, WRITE or INSERT perm to
+	 * read-lock a file and WRITE or INSERT perm to write-lock a file.
+	 *
+	 * We can't rely on the server to do this for us since if we want to
+	 * share a read lock that we already have, we won't go the server.
+	 */
+	if (type == AFS_LOCK_READ) {
+		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
+			return -EACCES;
+	} else {
+		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
+			return -EACCES;
+	}
+
+	return 0;
 }
 
 /*
@@ -254,185 +452,247 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
+	enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode;
 	afs_lock_type_t type;
-	struct key *key = file->private_data;
+	struct key *key = afs_file_key(file);
+	bool partial, no_server_lock = false;
 	int ret;
 
-	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+	if (mode == afs_flock_mode_unset)
+		mode = afs_flock_mode_openafs;
 
-	/* only whole-file locks are supported */
-	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
-		return -EINVAL;
-
-	ret = afs_init_lock_manager();
-	if (ret < 0)
-		return ret;
+	_enter("{%llx:%llu},%llu-%llu,%u,%u",
+	       vnode->fid.vid, vnode->fid.vnode,
+	       fl->fl_start, fl->fl_end, fl->c.flc_type, mode);
 
 	fl->fl_ops = &afs_lock_ops;
 	INIT_LIST_HEAD(&fl->fl_u.afs.link);
 	fl->fl_u.afs.state = AFS_LOCK_PENDING;
 
-	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
-
-	spin_lock(&inode->i_lock);
+	partial = (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX);
+	type = lock_is_read(fl) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+	if (mode == afs_flock_mode_write && partial)
+		type = AFS_LOCK_WRITE;
 
-	/* make sure we've got a callback on this file and that our view of the
-	 * data version is up to date */
-	ret = afs_vnode_fetch_status(vnode, NULL, key);
+	ret = afs_do_setlk_check(vnode, key, mode, type);
 	if (ret < 0)
-		goto error;
+		return ret;
 
-	if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) {
-		ret = -EAGAIN;
-		goto error;
+	trace_afs_flock_op(vnode, fl, afs_flock_op_set_lock);
+
+	/* AFS3 protocol only supports full-file locks and doesn't provide any
+	 * method of upgrade/downgrade, so we need to emulate for partial-file
+	 * locks.
+	 *
+	 * The OpenAFS client only gets a server lock for a full-file lock and
+	 * keeps partial-file locks local.  Allow this behaviour to be emulated
+	 * (as the default).
+	 */
+	if (mode == afs_flock_mode_local ||
+	    (partial && mode == afs_flock_mode_openafs)) {
+		no_server_lock = true;
+		goto skip_server_lock;
 	}
 
 	spin_lock(&vnode->lock);
+	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
 
-	/* if we've already got a readlock on the server then we can instantly
-	 * grant another readlock, irrespective of whether there are any
-	 * pending writelocks */
-	if (type == AFS_LOCK_READ &&
-	    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
-		_debug("instant readlock");
-		ASSERTCMP(vnode->flags &
-			  ((1 << AFS_VNODE_LOCKING) |
-			   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
-		ASSERT(!list_empty(&vnode->granted_locks));
-		goto sharing_existing_lock;
-	}
+	ret = -ENOENT;
+	if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
+		goto error_unlock;
 
-	/* if there's no-one else with a lock on this vnode, then we need to
-	 * ask the server for a lock */
-	if (list_empty(&vnode->pending_locks) &&
-	    list_empty(&vnode->granted_locks)) {
-		_debug("not locked");
-		ASSERTCMP(vnode->flags &
-			  ((1 << AFS_VNODE_LOCKING) |
-			   (1 << AFS_VNODE_READLOCKED) |
-			   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
-		list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
-		set_bit(AFS_VNODE_LOCKING, &vnode->flags);
-		spin_unlock(&vnode->lock);
+	/* If we've already got a lock on the server then try to move to having
+	 * the VFS grant the requested lock.  Note that this means that other
+	 * clients may get starved out.
+	 */
+	_debug("try %u", vnode->lock_state);
+	if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED) {
+		if (type == AFS_LOCK_READ) {
+			_debug("instant readlock");
+			list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+			fl->fl_u.afs.state = AFS_LOCK_GRANTED;
+			goto vnode_is_locked_u;
+		}
 
-		ret = afs_vnode_set_lock(vnode, key, type);
-		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
-		switch (ret) {
-		case 0:
-			_debug("acquired");
-			goto acquired_server_lock;
-		case -EWOULDBLOCK:
-			_debug("would block");
-			spin_lock(&vnode->lock);
-			ASSERT(list_empty(&vnode->granted_locks));
-			ASSERTCMP(vnode->pending_locks.next, ==,
-				  &fl->fl_u.afs.link);
-			goto wait;
-		default:
-			spin_lock(&vnode->lock);
-			list_del_init(&fl->fl_u.afs.link);
-			spin_unlock(&vnode->lock);
-			goto error;
+		if (vnode->lock_type == AFS_LOCK_WRITE) {
+			_debug("instant writelock");
+			list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+			fl->fl_u.afs.state = AFS_LOCK_GRANTED;
+			goto vnode_is_locked_u;
 		}
 	}
 
-	/* otherwise, we need to wait for a local lock to become available */
-	_debug("wait local");
-	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
-wait:
-	if (!(fl->fl_flags & FL_SLEEP)) {
-		_debug("noblock");
+	if (vnode->lock_state == AFS_VNODE_LOCK_NONE &&
+	    !(fl->c.flc_flags & FL_SLEEP)) {
 		ret = -EAGAIN;
-		goto abort_attempt;
+		if (type == AFS_LOCK_READ) {
+			if (vnode->status.lock_count == -1)
+				goto lock_is_contended; /* Write locked */
+		} else {
+			if (vnode->status.lock_count != 0)
+				goto lock_is_contended; /* Locked */
+		}
 	}
-	spin_unlock(&vnode->lock);
 
-	/* now we need to sleep and wait for the lock manager thread to get the
-	 * lock from the server */
-	_debug("sleep");
-	ret = wait_event_interruptible(fl->fl_wait,
-				       fl->fl_u.afs.state <= AFS_LOCK_GRANTED);
-	if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
-		ret = fl->fl_u.afs.state;
-		if (ret < 0)
-			goto error;
-		spin_lock(&vnode->lock);
-		goto given_lock;
-	}
+	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
+		goto need_to_wait;
+
+try_to_lock:
+	/* We don't have a lock on this vnode and we aren't currently waiting
+	 * for one either, so ask the server for a lock.
+	 *
+	 * Note that we need to be careful if we get interrupted by a signal
+	 * after dispatching the request as we may still get the lock, even
+	 * though we don't wait for the reply (it's not too bad a problem - the
+	 * lock will expire in 5 mins anyway).
+	 */
+	trace_afs_flock_ev(vnode, fl, afs_flock_try_to_lock, 0);
+	vnode->lock_key = key_get(key);
+	vnode->lock_type = type;
+	afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+	spin_unlock(&vnode->lock);
 
-	/* we were interrupted, but someone may still be in the throes of
-	 * giving us the lock */
-	_debug("intr");
-	ASSERTCMP(ret, ==, -ERESTARTSYS);
+	ret = afs_set_lock(vnode, key, type); /* RPC */
 
 	spin_lock(&vnode->lock);
-	if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
-		ret = fl->fl_u.afs.state;
-		if (ret < 0) {
-			spin_unlock(&vnode->lock);
-			goto error;
-		}
-		goto given_lock;
-	}
+	switch (ret) {
+	case -EKEYREJECTED:
+	case -EKEYEXPIRED:
+	case -EKEYREVOKED:
+	case -EPERM:
+	case -EACCES:
+		fl->fl_u.afs.state = ret;
+		trace_afs_flock_ev(vnode, fl, afs_flock_fail_perm, ret);
+		list_del_init(&fl->fl_u.afs.link);
+		afs_next_locker(vnode, ret);
+		goto error_unlock;
 
-abort_attempt:
-	/* we aren't going to get the lock, either because we're unwilling to
-	 * wait, or because some signal happened */
-	_debug("abort");
-	if (list_empty(&vnode->granted_locks) &&
-	    vnode->pending_locks.next == &fl->fl_u.afs.link) {
-		if (vnode->pending_locks.prev != &fl->fl_u.afs.link) {
-			/* kick the next pending lock into having a go */
-			list_del_init(&fl->fl_u.afs.link);
-			afs_lock_may_be_available(vnode);
-		}
-	} else {
+	case -ENOENT:
+		fl->fl_u.afs.state = ret;
+		trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret);
+		list_del_init(&fl->fl_u.afs.link);
+		afs_kill_lockers_enoent(vnode);
+		goto error_unlock;
+
+	default:
+		fl->fl_u.afs.state = ret;
+		trace_afs_flock_ev(vnode, fl, afs_flock_fail_other, ret);
 		list_del_init(&fl->fl_u.afs.link);
+		afs_next_locker(vnode, 0);
+		goto error_unlock;
+
+	case -EWOULDBLOCK:
+		/* The server doesn't have a lock-waiting queue, so the client
+		 * will have to retry.  The server will break the outstanding
+		 * callbacks on a file when a lock is released.
+		 */
+		ASSERT(list_empty(&vnode->granted_locks));
+		ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link);
+		goto lock_is_contended;
+
+	case 0:
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
+		trace_afs_flock_ev(vnode, fl, afs_flock_acquired, type);
+		afs_grant_locks(vnode);
+		goto vnode_is_locked_u;
 	}
-	spin_unlock(&vnode->lock);
-	goto error;
 
-acquired_server_lock:
-	/* we've acquired a server lock, but it needs to be renewed after 5
-	 * mins */
-	spin_lock(&vnode->lock);
-	afs_schedule_lock_extension(vnode);
-	if (type == AFS_LOCK_READ)
-		set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
-	else
-		set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
-sharing_existing_lock:
-	/* the lock has been granted as far as we're concerned... */
-	fl->fl_u.afs.state = AFS_LOCK_GRANTED;
-	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
-given_lock:
-	/* ... but we do still need to get the VFS's blessing */
-	ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING)));
-	ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) |
-				(1 << AFS_VNODE_WRITELOCKED))) != 0);
-	ret = posix_lock_file(file, fl, NULL);
+vnode_is_locked_u:
+	spin_unlock(&vnode->lock);
+vnode_is_locked:
+	/* the lock has been granted by the server... */
+	ASSERTCMP(fl->fl_u.afs.state, ==, AFS_LOCK_GRANTED);
+
+skip_server_lock:
+	/* ... but the VFS still needs to distribute access on this client. */
+	trace_afs_flock_ev(vnode, fl, afs_flock_vfs_locking, 0);
+	ret = locks_lock_file_wait(file, fl);
+	trace_afs_flock_ev(vnode, fl, afs_flock_vfs_lock, ret);
 	if (ret < 0)
 		goto vfs_rejected_lock;
-	spin_unlock(&vnode->lock);
 
-	/* again, make sure we've got a callback on this file and, again, make
+	/* Again, make sure we've got a callback on this file and, again, make
 	 * sure that our view of the data version is up to date (we ignore
-	 * errors incurred here and deal with the consequences elsewhere) */
-	afs_vnode_fetch_status(vnode, NULL, key);
+	 * errors incurred here and deal with the consequences elsewhere).
+	 */
+	afs_validate(vnode, key);
+	_leave(" = 0");
+	return 0;
 
-error:
-	spin_unlock(&inode->i_lock);
-	_leave(" = %d", ret);
-	return ret;
+lock_is_contended:
+	if (!(fl->c.flc_flags & FL_SLEEP)) {
+		list_del_init(&fl->fl_u.afs.link);
+		afs_next_locker(vnode, 0);
+		ret = -EAGAIN;
+		goto error_unlock;
+	}
+
+	afs_set_lock_state(vnode, AFS_VNODE_LOCK_WAITING_FOR_CB);
+	trace_afs_flock_ev(vnode, fl, afs_flock_would_block, ret);
+	queue_delayed_work(afs_lock_manager, &vnode->lock_work, HZ * 5);
+
+need_to_wait:
+	/* We're going to have to wait.  Either this client doesn't have a lock
+	 * on the server yet and we need to wait for a callback to occur, or
+	 * the client does have a lock on the server, but it's shared and we
+	 * need an exclusive lock.
+	 */
+	spin_unlock(&vnode->lock);
+
+	trace_afs_flock_ev(vnode, fl, afs_flock_waiting, 0);
+	ret = wait_event_interruptible(fl->c.flc_wait,
+				       fl->fl_u.afs.state != AFS_LOCK_PENDING);
+	trace_afs_flock_ev(vnode, fl, afs_flock_waited, ret);
+
+	if (fl->fl_u.afs.state >= 0 && fl->fl_u.afs.state != AFS_LOCK_GRANTED) {
+		spin_lock(&vnode->lock);
+
+		switch (fl->fl_u.afs.state) {
+		case AFS_LOCK_YOUR_TRY:
+			fl->fl_u.afs.state = AFS_LOCK_PENDING;
+			goto try_to_lock;
+		case AFS_LOCK_PENDING:
+			if (ret > 0) {
+				/* We need to retry the lock.  We may not be
+				 * notified by the server if it just expired
+				 * rather than being released.
+				 */
+				ASSERTCMP(vnode->lock_state, ==, AFS_VNODE_LOCK_WAITING_FOR_CB);
+				afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+				fl->fl_u.afs.state = AFS_LOCK_PENDING;
+				goto try_to_lock;
+			}
+			goto error_unlock;
+		case AFS_LOCK_GRANTED:
+		default:
+			break;
+		}
+
+		spin_unlock(&vnode->lock);
+	}
+
+	if (fl->fl_u.afs.state == AFS_LOCK_GRANTED)
+		goto vnode_is_locked;
+	ret = fl->fl_u.afs.state;
+	goto error;
 
 vfs_rejected_lock:
-	/* the VFS rejected the lock we just obtained, so we have to discard
-	 * what we just got */
+	/* The VFS rejected the lock we just obtained, so we have to discard
+	 * what we just got.  We defer this to the lock manager work item to
+	 * deal with.
+	 */
 	_debug("vfs refused %d", ret);
+	if (no_server_lock)
+		goto error;
+	spin_lock(&vnode->lock);
 	list_del_init(&fl->fl_u.afs.link);
-	if (list_empty(&vnode->granted_locks))
-		afs_defer_unlock(vnode, key);
-	goto abort_attempt;
+	afs_defer_unlock(vnode);
+
+error_unlock:
+	spin_unlock(&vnode->lock);
+error:
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
@@ -440,34 +700,20 @@ vfs_rejected_lock:
  */
 static int afs_do_unlk(struct file *file, struct file_lock *fl)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
-	struct key *key = file->private_data;
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 	int ret;
 
-	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+	_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode,
+	       fl->c.flc_type);
 
-	/* only whole-file unlocks are supported */
-	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
-		return -EINVAL;
+	trace_afs_flock_op(vnode, fl, afs_flock_op_unlock);
 
-	fl->fl_ops = &afs_lock_ops;
-	INIT_LIST_HEAD(&fl->fl_u.afs.link);
-	fl->fl_u.afs.state = AFS_LOCK_PENDING;
+	/* Flush all pending writes before doing anything with locks. */
+	vfs_fsync(file, 0);
 
-	spin_lock(&vnode->lock);
-	ret = posix_lock_file(file, fl, NULL);
-	if (ret < 0) {
-		spin_unlock(&vnode->lock);
-		_leave(" = %d [vfs]", ret);
-		return ret;
-	}
-
-	/* discard the server lock only if all granted locks are gone */
-	if (list_empty(&vnode->granted_locks))
-		afs_defer_unlock(vnode, key);
-	spin_unlock(&vnode->lock);
-	_leave(" = 0");
-	return 0;
+	ret = locks_lock_file_wait(file, fl);
+	_leave(" = %d [%u]", ret, vnode->lock_state);
+	return ret;
 }
 
 /*
@@ -475,38 +721,40 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
  */
 static int afs_do_getlk(struct file *file, struct file_lock *fl)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
-	struct key *key = file->private_data;
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+	struct key *key = afs_file_key(file);
 	int ret, lock_count;
 
 	_enter("");
 
-	fl->fl_type = F_UNLCK;
+	if (vnode->lock_state == AFS_VNODE_LOCK_DELETED)
+		return -ENOENT;
 
-	mutex_lock(&vnode->vfs_inode.i_mutex);
+	fl->c.flc_type = F_UNLCK;
 
 	/* check local lock records first */
-	ret = 0;
 	posix_test_lock(file, fl);
-	if (fl->fl_type == F_UNLCK) {
+	if (lock_is_unlock(fl)) {
 		/* no local locks; consult the server */
-		ret = afs_vnode_fetch_status(vnode, NULL, key);
+		ret = afs_fetch_status(vnode, key, false, NULL);
 		if (ret < 0)
 			goto error;
-		lock_count = vnode->status.lock_count;
-		if (lock_count) {
+
+		lock_count = READ_ONCE(vnode->status.lock_count);
+		if (lock_count != 0) {
 			if (lock_count > 0)
-				fl->fl_type = F_RDLCK;
+				fl->c.flc_type = F_RDLCK;
 			else
-				fl->fl_type = F_WRLCK;
+				fl->c.flc_type = F_WRLCK;
 			fl->fl_start = 0;
 			fl->fl_end = OFFSET_MAX;
+			fl->c.flc_pid = 0;
 		}
 	}
 
+	ret = 0;
 error:
-	mutex_unlock(&vnode->vfs_inode.i_mutex);
-	_leave(" = %d [%hd]", ret, fl->fl_type);
+	_leave(" = %d [%hd]", ret, fl->c.flc_type);
 	return ret;
 }
 
@@ -516,21 +764,33 @@ error:
 int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+	enum afs_flock_operation op;
+	int ret;
 
-	_enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
+	_enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
-	       fl->fl_type, fl->fl_flags,
+	       fl->c.flc_type, fl->c.flc_flags,
 	       (long long) fl->fl_start, (long long) fl->fl_end);
 
-	/* AFS doesn't support mandatory locks */
-	if (__mandatory_lock(&vnode->vfs_inode) && fl->fl_type != F_UNLCK)
-		return -ENOLCK;
-
 	if (IS_GETLK(cmd))
 		return afs_do_getlk(file, fl);
-	if (fl->fl_type == F_UNLCK)
-		return afs_do_unlk(file, fl);
-	return afs_do_setlk(file, fl);
+
+	fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
+	trace_afs_flock_op(vnode, fl, afs_flock_op_lock);
+
+	if (lock_is_unlock(fl))
+		ret = afs_do_unlk(file, fl);
+	else
+		ret = afs_do_setlk(file, fl);
+
+	switch (ret) {
+	case 0:		op = afs_flock_op_return_ok; break;
+	case -EAGAIN:	op = afs_flock_op_return_eagain; break;
+	case -EDEADLK:	op = afs_flock_op_return_edeadlk; break;
+	default:	op = afs_flock_op_return_error; break;
+	}
+	trace_afs_flock_op(vnode, fl, op);
+	return ret;
 }
 
 /*
@@ -539,10 +799,12 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+	enum afs_flock_operation op;
+	int ret;
 
-	_enter("{%x:%u},%d,{t=%x,fl=%x}",
+	_enter("{%llx:%llu},%d,{t=%x,fl=%x}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
-	       fl->fl_type, fl->fl_flags);
+	       fl->c.flc_type, fl->c.flc_flags);
 
 	/*
 	 * No BSD flocks over NFS allowed.
@@ -551,17 +813,26 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 	 * Not sure whether that would be unique, though, or whether
 	 * that would break in other places.
 	 */
-	if (!(fl->fl_flags & FL_FLOCK))
+	if (!(fl->c.flc_flags & FL_FLOCK))
 		return -ENOLCK;
 
+	fl->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
+	trace_afs_flock_op(vnode, fl, afs_flock_op_flock);
+
 	/* we're simulating flock() locks using posix locks on the server */
-	fl->fl_owner = (fl_owner_t) file;
-	fl->fl_start = 0;
-	fl->fl_end = OFFSET_MAX;
+	if (lock_is_unlock(fl))
+		ret = afs_do_unlk(file, fl);
+	else
+		ret = afs_do_setlk(file, fl);
 
-	if (fl->fl_type == F_UNLCK)
-		return afs_do_unlk(file, fl);
-	return afs_do_setlk(file, fl);
+	switch (ret) {
+	case 0:		op = afs_flock_op_return_ok; break;
+	case -EAGAIN:	op = afs_flock_op_return_eagain; break;
+	case -EDEADLK:	op = afs_flock_op_return_edeadlk; break;
+	default:	op = afs_flock_op_return_error; break;
+	}
+	trace_afs_flock_op(vnode, fl, op);
+	return ret;
 }
 
 /*
@@ -572,9 +843,16 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
  */
 static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->c.flc_file));
+
 	_enter("");
 
+	new->fl_u.afs.debug_id = atomic_inc_return(&afs_file_lock_debug_id);
+
+	spin_lock(&vnode->lock);
+	trace_afs_flock_op(vnode, new, afs_flock_op_copy_lock);
 	list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
+	spin_unlock(&vnode->lock);
 }
 
 /*
@@ -583,7 +861,17 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
  */
 static void afs_fl_release_private(struct file_lock *fl)
 {
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(fl->c.flc_file));
+
 	_enter("");
 
+	spin_lock(&vnode->lock);
+
+	trace_afs_flock_op(vnode, fl, afs_flock_op_release_lock);
 	list_del_init(&fl->fl_u.afs.link);
+	if (list_empty(&vnode->granted_locks))
+		afs_defer_unlock(vnode);
+
+	_debug("state %u for %p", vnode->lock_state, vnode);
+	spin_unlock(&vnode->lock);
 }
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
new file mode 100644
index 000000000000..8418813ee043
--- /dev/null
+++ b/fs/afs/fs_operation.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Fileserver-directed operation handling.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include "internal.h"
+
+static atomic_t afs_operation_debug_counter;
+
+/*
+ * Create an operation against a volume.
+ */
+struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *volume)
+{
+	struct afs_operation *op;
+
+	_enter("");
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return ERR_PTR(-ENOMEM);
+
+	if (!key) {
+		key = afs_request_key(volume->cell);
+		if (IS_ERR(key)) {
+			kfree(op);
+			return ERR_CAST(key);
+		}
+	} else {
+		key_get(key);
+	}
+
+	op->key			= key;
+	op->volume		= afs_get_volume(volume, afs_volume_trace_get_new_op);
+	op->net			= volume->cell->net;
+	op->cb_v_break		= atomic_read(&volume->cb_v_break);
+	op->pre_volsync.creation = volume->creation_time;
+	op->pre_volsync.update	= volume->update_time;
+	op->debug_id		= atomic_inc_return(&afs_operation_debug_counter);
+	op->nr_iterations	= -1;
+	afs_op_set_error(op, -EDESTADDRREQ);
+
+	_leave(" = [op=%08x]", op->debug_id);
+	return op;
+}
+
+struct afs_io_locker {
+	struct list_head	link;
+	struct task_struct	*task;
+	unsigned long		have_lock;
+};
+
+/*
+ * Unlock the I/O lock on a vnode.
+ */
+static void afs_unlock_for_io(struct afs_vnode *vnode)
+{
+	struct afs_io_locker *locker;
+
+	spin_lock(&vnode->lock);
+	locker = list_first_entry_or_null(&vnode->io_lock_waiters,
+					  struct afs_io_locker, link);
+	if (locker) {
+		list_del(&locker->link);
+		smp_store_release(&locker->have_lock, 1); /* The unlock barrier. */
+		smp_mb__after_atomic(); /* Store have_lock before task state */
+		wake_up_process(locker->task);
+	} else {
+		clear_bit(AFS_VNODE_IO_LOCK, &vnode->flags);
+	}
+	spin_unlock(&vnode->lock);
+}
+
+/*
+ * Lock the I/O lock on a vnode uninterruptibly.  We can't use an ordinary
+ * mutex as lockdep will complain if we unlock it in the wrong thread.
+ */
+static void afs_lock_for_io(struct afs_vnode *vnode)
+{
+	struct afs_io_locker myself = { .task = current, };
+
+	spin_lock(&vnode->lock);
+
+	if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+		spin_unlock(&vnode->lock);
+		return;
+	}
+
+	list_add_tail(&myself.link, &vnode->io_lock_waiters);
+	spin_unlock(&vnode->lock);
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (smp_load_acquire(&myself.have_lock)) /* The lock barrier */
+			break;
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
+/*
+ * Lock the I/O lock on a vnode interruptibly.  We can't use an ordinary mutex
+ * as lockdep will complain if we unlock it in the wrong thread.
+ */
+static int afs_lock_for_io_interruptible(struct afs_vnode *vnode)
+{
+	struct afs_io_locker myself = { .task = current, };
+	int ret = 0;
+
+	spin_lock(&vnode->lock);
+
+	if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+		spin_unlock(&vnode->lock);
+		return 0;
+	}
+
+	list_add_tail(&myself.link, &vnode->io_lock_waiters);
+	spin_unlock(&vnode->lock);
+
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (smp_load_acquire(&myself.have_lock) || /* The lock barrier */
+		    signal_pending(current))
+			break;
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	/* If we got a signal, try to transfer the lock onto the next
+	 * waiter.
+	 */
+	if (unlikely(signal_pending(current))) {
+		spin_lock(&vnode->lock);
+		if (myself.have_lock) {
+			spin_unlock(&vnode->lock);
+			afs_unlock_for_io(vnode);
+		} else {
+			list_del(&myself.link);
+			spin_unlock(&vnode->lock);
+		}
+		ret = -ERESTARTSYS;
+	}
+	return ret;
+}
+
+/*
+ * Lock the vnode(s) being operated upon.
+ */
+static bool afs_get_io_locks(struct afs_operation *op)
+{
+	struct afs_vnode *vnode = op->file[0].vnode;
+	struct afs_vnode *vnode2 = op->file[1].vnode;
+
+	_enter("");
+
+	if (op->flags & AFS_OPERATION_UNINTR) {
+		afs_lock_for_io(vnode);
+		op->flags |= AFS_OPERATION_LOCK_0;
+		_leave(" = t [1]");
+		return true;
+	}
+
+	if (!vnode2 || !op->file[1].need_io_lock || vnode == vnode2)
+		vnode2 = NULL;
+
+	if (vnode2 > vnode)
+		swap(vnode, vnode2);
+
+	if (afs_lock_for_io_interruptible(vnode) < 0) {
+		afs_op_set_error(op, -ERESTARTSYS);
+		op->flags |= AFS_OPERATION_STOP;
+		_leave(" = f [I 0]");
+		return false;
+	}
+	op->flags |= AFS_OPERATION_LOCK_0;
+
+	if (vnode2) {
+		if (afs_lock_for_io_interruptible(vnode2) < 0) {
+			afs_op_set_error(op, -ERESTARTSYS);
+			op->flags |= AFS_OPERATION_STOP;
+			afs_unlock_for_io(vnode);
+			op->flags &= ~AFS_OPERATION_LOCK_0;
+			_leave(" = f [I 1]");
+			return false;
+		}
+		op->flags |= AFS_OPERATION_LOCK_1;
+	}
+
+	_leave(" = t [2]");
+	return true;
+}
+
+static void afs_drop_io_locks(struct afs_operation *op)
+{
+	struct afs_vnode *vnode = op->file[0].vnode;
+	struct afs_vnode *vnode2 = op->file[1].vnode;
+
+	_enter("");
+
+	if (op->flags & AFS_OPERATION_LOCK_1)
+		afs_unlock_for_io(vnode2);
+	if (op->flags & AFS_OPERATION_LOCK_0)
+		afs_unlock_for_io(vnode);
+}
+
+static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp,
+			      unsigned int index)
+{
+	struct afs_vnode *vnode = vp->vnode;
+
+	if (vnode) {
+		vp->fid			= vnode->fid;
+		vp->dv_before		= vnode->status.data_version;
+		vp->cb_break_before	= afs_calc_vnode_cb_break(vnode);
+		if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
+			op->flags	|= AFS_OPERATION_CUR_ONLY;
+		if (vp->modification)
+			set_bit(AFS_VNODE_MODIFYING, &vnode->flags);
+	}
+
+	if (vp->fid.vnode)
+		_debug("PREP[%u] {%llx:%llu.%u}",
+		       index, vp->fid.vid, vp->fid.vnode, vp->fid.unique);
+}
+
+/*
+ * Begin an operation on the fileserver.
+ *
+ * Fileserver operations are serialised on the server by vnode, so we serialise
+ * them here also using the io_lock.
+ */
+bool afs_begin_vnode_operation(struct afs_operation *op)
+{
+	struct afs_vnode *vnode = op->file[0].vnode;
+
+	ASSERT(vnode);
+
+	_enter("");
+
+	if (op->file[0].need_io_lock)
+		if (!afs_get_io_locks(op))
+			return false;
+
+	afs_prepare_vnode(op, &op->file[0], 0);
+	afs_prepare_vnode(op, &op->file[1], 1);
+	op->cb_v_break = atomic_read(&op->volume->cb_v_break);
+	_leave(" = true");
+	return true;
+}
+
+/*
+ * Tidy up a filesystem cursor and unlock the vnode.
+ */
+void afs_end_vnode_operation(struct afs_operation *op)
+{
+	_enter("");
+
+	switch (afs_op_error(op)) {
+	case -EDESTADDRREQ:
+	case -EADDRNOTAVAIL:
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+		afs_dump_edestaddrreq(op);
+		break;
+	}
+
+	afs_drop_io_locks(op);
+}
+
+/*
+ * Wait for an in-progress operation to complete.
+ */
+void afs_wait_for_operation(struct afs_operation *op)
+{
+	_enter("");
+
+	while (afs_select_fileserver(op)) {
+		op->call_responded = false;
+		op->call_error = 0;
+		op->call_abort_code = 0;
+		if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) &&
+		    op->ops->issue_yfs_rpc)
+			op->ops->issue_yfs_rpc(op);
+		else if (op->ops->issue_afs_rpc)
+			op->ops->issue_afs_rpc(op);
+		else
+			op->call_error = -ENOTSUPP;
+
+		if (op->call) {
+			afs_wait_for_call_to_complete(op->call);
+			op->call_abort_code = op->call->abort_code;
+			op->call_error = op->call->error;
+			op->call_responded = op->call->responded;
+			afs_put_call(op->call);
+		}
+	}
+
+	if (op->call_responded && op->server)
+		set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags);
+
+	if (!afs_op_error(op)) {
+		_debug("success");
+		op->ops->success(op);
+	} else if (op->cumul_error.aborted) {
+		if (op->ops->aborted)
+			op->ops->aborted(op);
+	} else {
+		if (op->ops->failed)
+			op->ops->failed(op);
+	}
+
+	afs_end_vnode_operation(op);
+
+	if (!afs_op_error(op) && op->ops->edit_dir) {
+		_debug("edit_dir");
+		op->ops->edit_dir(op);
+	}
+	_leave("");
+}
+
+/*
+ * Dispose of an operation.
+ */
+int afs_put_operation(struct afs_operation *op)
+{
+	struct afs_addr_list *alist;
+	int i, ret = afs_op_error(op);
+
+	_enter("op=%08x,%d", op->debug_id, ret);
+
+	if (op->ops && op->ops->put)
+		op->ops->put(op);
+	if (op->file[0].modification)
+		clear_bit(AFS_VNODE_MODIFYING, &op->file[0].vnode->flags);
+	if (op->file[1].modification && op->file[1].vnode != op->file[0].vnode)
+		clear_bit(AFS_VNODE_MODIFYING, &op->file[1].vnode->flags);
+	if (op->file[0].put_vnode)
+		iput(&op->file[0].vnode->netfs.inode);
+	if (op->file[1].put_vnode)
+		iput(&op->file[1].vnode->netfs.inode);
+
+	if (op->more_files) {
+		for (i = 0; i < op->nr_files - 2; i++)
+			if (op->more_files[i].put_vnode)
+				iput(&op->more_files[i].vnode->netfs.inode);
+		kfree(op->more_files);
+	}
+
+	if (op->estate) {
+		alist = op->estate->addresses;
+		if (alist) {
+			if (op->call_responded &&
+			    op->addr_index != alist->preferred &&
+			    test_bit(alist->preferred, &op->addr_tried))
+				WRITE_ONCE(alist->preferred, op->addr_index);
+		}
+	}
+
+	afs_clear_server_states(op);
+	afs_put_serverlist(op->net, op->server_list);
+	afs_put_volume(op->volume, afs_volume_trace_put_put_op);
+	key_put(op->key);
+	kfree(op);
+	return ret;
+}
+
+int afs_do_sync_operation(struct afs_operation *op)
+{
+	afs_begin_vnode_operation(op);
+	afs_wait_for_operation(op);
+	return afs_put_operation(op);
+}
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
new file mode 100644
index 000000000000..e0030ac74ea0
--- /dev/null
+++ b/fs/afs/fs_probe.c
@@ -0,0 +1,539 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS fileserver probing
+ *
+ * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_afs.h"
+#include "protocol_yfs.h"
+
+static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
+static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
+
+struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *estate,
+						  enum afs_estate_trace where)
+{
+	if (estate) {
+		int r;
+
+		__refcount_inc(&estate->ref, &r);
+		trace_afs_estate(estate->server_id, estate->probe_seq, r, where);
+	}
+	return estate;
+}
+
+static void afs_endpoint_state_rcu(struct rcu_head *rcu)
+{
+	struct afs_endpoint_state *estate = container_of(rcu, struct afs_endpoint_state, rcu);
+
+	trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
+			 afs_estate_trace_free);
+	afs_put_addrlist(estate->addresses, afs_alist_trace_put_estate);
+	kfree(estate);
+}
+
+void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where)
+{
+	if (estate) {
+		unsigned int server_id = estate->server_id, probe_seq = estate->probe_seq;
+		bool dead;
+		int r;
+
+		dead = __refcount_dec_and_test(&estate->ref, &r);
+		trace_afs_estate(server_id, probe_seq, r, where);
+		if (dead)
+			call_rcu(&estate->rcu, afs_endpoint_state_rcu);
+	}
+}
+
+/*
+ * Start the probe polling timer.  We have to supply it with an inc on the
+ * outstanding server count.
+ */
+static void afs_schedule_fs_probe(struct afs_net *net,
+				  struct afs_server *server, bool fast)
+{
+	unsigned long atj;
+
+	if (!net->live)
+		return;
+
+	atj = server->probed_at;
+	atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
+
+	afs_inc_servers_outstanding(net);
+	if (timer_reduce(&net->fs_probe_timer, atj))
+		afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Handle the completion of a set of probes.
+ */
+static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server,
+				  struct afs_endpoint_state *estate)
+{
+	bool responded = test_bit(AFS_ESTATE_RESPONDED, &estate->flags);
+
+	write_seqlock(&net->fs_lock);
+	if (responded) {
+		list_add_tail(&server->probe_link, &net->fs_probe_slow);
+	} else {
+		server->rtt = UINT_MAX;
+		clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
+		list_add_tail(&server->probe_link, &net->fs_probe_fast);
+	}
+
+	write_sequnlock(&net->fs_lock);
+
+	afs_schedule_fs_probe(net, server, !responded);
+}
+
+/*
+ * Handle the completion of a probe.
+ */
+static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server,
+				  struct afs_endpoint_state *estate)
+{
+	_enter("");
+
+	if (atomic_dec_and_test(&estate->nr_probing))
+		afs_finished_fs_probe(net, server, estate);
+
+	wake_up_all(&server->probe_wq);
+}
+
+/*
+ * Handle inability to send a probe due to ENOMEM when trying to allocate a
+ * call struct.
+ */
+static void afs_fs_probe_not_done(struct afs_net *net,
+				  struct afs_server *server,
+				  struct afs_endpoint_state *estate,
+				  int index)
+{
+	_enter("");
+
+	trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
+	spin_lock(&server->probe_lock);
+
+	set_bit(AFS_ESTATE_LOCAL_FAILURE, &estate->flags);
+	if (estate->error == 0)
+		estate->error = -ENOMEM;
+
+	set_bit(index, &estate->failed_set);
+
+	spin_unlock(&server->probe_lock);
+	return afs_done_one_fs_probe(net, server, estate);
+}
+
+/*
+ * Process the result of probing a fileserver.  This is called after successful
+ * or failed delivery of an FS.GetCapabilities operation.
+ */
+void afs_fileserver_probe_result(struct afs_call *call)
+{
+	struct afs_endpoint_state *estate = call->probe;
+	struct afs_addr_list *alist = estate->addresses;
+	struct afs_address *addr = &alist->addrs[call->probe_index];
+	struct afs_server *server = call->server;
+	unsigned int index = call->probe_index;
+	unsigned int rtt_us = -1, cap0;
+	int ret = call->error;
+
+	_enter("%pU,%u", &server->uuid, index);
+
+	WRITE_ONCE(addr->last_error, ret);
+
+	spin_lock(&server->probe_lock);
+
+	switch (ret) {
+	case 0:
+		estate->error = 0;
+		goto responded;
+	case -ECONNABORTED:
+		if (!test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) {
+			estate->abort_code = call->abort_code;
+			estate->error = ret;
+		}
+		goto responded;
+	case -ENOMEM:
+	case -ENONET:
+		clear_bit(index, &estate->responsive_set);
+		set_bit(AFS_ESTATE_LOCAL_FAILURE, &estate->flags);
+		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
+		goto out;
+	case -ECONNRESET: /* Responded, but call expired. */
+	case -ERFKILL:
+	case -EADDRNOTAVAIL:
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -EHOSTDOWN:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -ETIME:
+	default:
+		clear_bit(index, &estate->responsive_set);
+		set_bit(index, &estate->failed_set);
+		if (!test_bit(AFS_ESTATE_RESPONDED, &estate->flags) &&
+		    (estate->error == 0 ||
+		     estate->error == -ETIMEDOUT ||
+		     estate->error == -ETIME))
+			estate->error = ret;
+		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
+		goto out;
+	}
+
+responded:
+	clear_bit(index, &estate->failed_set);
+
+	if (call->service_id == YFS_FS_SERVICE) {
+		set_bit(AFS_ESTATE_IS_YFS, &estate->flags);
+		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+		server->service_id = call->service_id;
+	} else {
+		set_bit(AFS_ESTATE_NOT_YFS, &estate->flags);
+		if (!test_bit(AFS_ESTATE_IS_YFS, &estate->flags)) {
+			clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+			server->service_id = call->service_id;
+		}
+		cap0 = ntohl(call->tmp);
+		if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
+			set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
+		else
+			clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
+	}
+
+	rtt_us = rxrpc_kernel_get_srtt(addr->peer);
+	if (rtt_us < estate->rtt) {
+		estate->rtt = rtt_us;
+		server->rtt = rtt_us;
+		alist->preferred = index;
+	}
+
+	smp_wmb(); /* Set rtt before responded. */
+	set_bit(AFS_ESTATE_RESPONDED, &estate->flags);
+	set_bit(index, &estate->responsive_set);
+	set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
+out:
+	spin_unlock(&server->probe_lock);
+
+	trace_afs_fs_probe(server, false, estate, index, call->error, call->abort_code, rtt_us);
+	_debug("probe[%x] %pU [%u] %pISpc rtt=%d ret=%d",
+	       estate->probe_seq, &server->uuid, index,
+	       rxrpc_kernel_remote_addr(alist->addrs[index].peer),
+	       rtt_us, ret);
+
+	return afs_done_one_fs_probe(call->net, server, estate);
+}
+
+/*
+ * Probe all of a fileserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+int afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
+			    struct afs_addr_list *new_alist, struct key *key)
+{
+	struct afs_endpoint_state *estate, *old;
+	struct afs_addr_list *old_alist = NULL, *alist;
+	unsigned long unprobed;
+
+	_enter("%pU", &server->uuid);
+
+	estate = kzalloc(sizeof(*estate), GFP_KERNEL);
+	if (!estate)
+		return -ENOMEM;
+
+	refcount_set(&estate->ref, 2);
+	estate->server_id = server->debug_id;
+	estate->rtt = UINT_MAX;
+
+	write_lock(&server->fs_lock);
+
+	old = rcu_dereference_protected(server->endpoint_state,
+					lockdep_is_held(&server->fs_lock));
+	if (old) {
+		estate->responsive_set = old->responsive_set;
+		if (!new_alist)
+			new_alist = old->addresses;
+	}
+
+	if (old_alist != new_alist)
+		afs_set_peer_appdata(server, old_alist, new_alist);
+
+	estate->addresses = afs_get_addrlist(new_alist, afs_alist_trace_get_estate);
+	alist = estate->addresses;
+	estate->probe_seq = ++server->probe_counter;
+	atomic_set(&estate->nr_probing, alist->nr_addrs);
+
+	if (new_alist)
+		server->addr_version = new_alist->version;
+	rcu_assign_pointer(server->endpoint_state, estate);
+	write_unlock(&server->fs_lock);
+	if (old)
+		set_bit(AFS_ESTATE_SUPERSEDED, &old->flags);
+
+	trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
+			 afs_estate_trace_alloc_probe);
+
+	afs_get_address_preferences(net, new_alist);
+
+	server->probed_at = jiffies;
+	unprobed = (1UL << alist->nr_addrs) - 1;
+	while (unprobed) {
+		unsigned int index = 0, i;
+		int best_prio = -1;
+
+		for (i = 0; i < alist->nr_addrs; i++) {
+			if (test_bit(i, &unprobed) &&
+			    alist->addrs[i].prio > best_prio) {
+				index = i;
+				best_prio = alist->addrs[i].prio;
+			}
+		}
+		__clear_bit(index, &unprobed);
+
+		trace_afs_fs_probe(server, true, estate, index, 0, 0, 0);
+		if (!afs_fs_get_capabilities(net, server, estate, index, key))
+			afs_fs_probe_not_done(net, server, estate, index);
+	}
+
+	afs_put_endpoint_state(old, afs_estate_trace_put_probe);
+	afs_put_endpoint_state(estate, afs_estate_trace_put_probe);
+	return 0;
+}
+
+/*
+ * Wait for the first as-yet untried fileserver to respond, for the probe state
+ * to be superseded or for all probes to finish.
+ */
+int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr)
+{
+	struct afs_endpoint_state *estate;
+	struct afs_server_list *slist = op->server_list;
+	bool still_probing = true;
+	int ret = 0, i;
+
+	_enter("%u", slist->nr_servers);
+
+	for (i = 0; i < slist->nr_servers; i++) {
+		estate = states[i].endpoint_state;
+		if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
+			return 2;
+		if (atomic_read(&estate->nr_probing))
+			still_probing = true;
+		if (estate->responsive_set & states[i].untried_addrs)
+			return 1;
+	}
+	if (!still_probing)
+		return 0;
+
+	for (i = 0; i < slist->nr_servers; i++)
+		add_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter);
+
+	for (;;) {
+		still_probing = false;
+
+		set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+		for (i = 0; i < slist->nr_servers; i++) {
+			estate = states[i].endpoint_state;
+			if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) {
+				ret = 2;
+				goto stop;
+			}
+			if (atomic_read(&estate->nr_probing))
+				still_probing = true;
+			if (estate->responsive_set & states[i].untried_addrs) {
+				ret = 1;
+				goto stop;
+			}
+		}
+
+		if (!still_probing || signal_pending(current))
+			goto stop;
+		schedule();
+	}
+
+stop:
+	set_current_state(TASK_RUNNING);
+
+	for (i = 0; i < slist->nr_servers; i++)
+		remove_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter);
+
+	if (!ret && signal_pending(current))
+		ret = -ERESTARTSYS;
+	return ret;
+}
+
+/*
+ * Probe timer.  We have an increment on fs_outstanding that we need to pass
+ * along to the work item.
+ */
+void afs_fs_probe_timer(struct timer_list *timer)
+{
+	struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
+
+	if (!net->live || !queue_work(afs_wq, &net->fs_prober))
+		afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Dispatch a probe to a server.
+ */
+static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server)
+	__releases(&net->fs_lock)
+{
+	struct key *key = NULL;
+
+	/* We remove it from the queues here - it will be added back to
+	 * one of the queues on the completion of the probe.
+	 */
+	list_del_init(&server->probe_link);
+
+	afs_get_server(server, afs_server_trace_get_probe);
+	write_sequnlock(&net->fs_lock);
+
+	afs_fs_probe_fileserver(net, server, NULL, key);
+	afs_put_server(net, server, afs_server_trace_put_probe);
+}
+
+/*
+ * Probe a server immediately without waiting for its due time to come
+ * round.  This is used when all of the addresses have been tried.
+ */
+void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
+{
+	write_seqlock(&net->fs_lock);
+	if (!list_empty(&server->probe_link))
+		return afs_dispatch_fs_probe(net, server);
+	write_sequnlock(&net->fs_lock);
+}
+
+/*
+ * Probe dispatcher to regularly dispatch probes to keep NAT alive.
+ */
+void afs_fs_probe_dispatcher(struct work_struct *work)
+{
+	struct afs_net *net = container_of(work, struct afs_net, fs_prober);
+	struct afs_server *fast, *slow, *server;
+	unsigned long nowj, timer_at, poll_at;
+	bool first_pass = true, set_timer = false;
+
+	if (!net->live) {
+		afs_dec_servers_outstanding(net);
+		return;
+	}
+
+	_enter("");
+
+	if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
+		afs_dec_servers_outstanding(net);
+		_leave(" [none]");
+		return;
+	}
+
+again:
+	write_seqlock(&net->fs_lock);
+
+	fast = slow = server = NULL;
+	nowj = jiffies;
+	timer_at = nowj + MAX_JIFFY_OFFSET;
+
+	if (!list_empty(&net->fs_probe_fast)) {
+		fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
+		poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
+		if (time_before(nowj, poll_at)) {
+			timer_at = poll_at;
+			set_timer = true;
+			fast = NULL;
+		}
+	}
+
+	if (!list_empty(&net->fs_probe_slow)) {
+		slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
+		poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
+		if (time_before(nowj, poll_at)) {
+			if (time_before(poll_at, timer_at))
+			    timer_at = poll_at;
+			set_timer = true;
+			slow = NULL;
+		}
+	}
+
+	server = fast ?: slow;
+	if (server)
+		_debug("probe %pU", &server->uuid);
+
+	if (server && (first_pass || !need_resched())) {
+		afs_dispatch_fs_probe(net, server);
+		first_pass = false;
+		goto again;
+	}
+
+	write_sequnlock(&net->fs_lock);
+
+	if (server) {
+		if (!queue_work(afs_wq, &net->fs_prober))
+			afs_dec_servers_outstanding(net);
+		_leave(" [requeue]");
+	} else if (set_timer) {
+		if (timer_reduce(&net->fs_probe_timer, timer_at))
+			afs_dec_servers_outstanding(net);
+		_leave(" [timer]");
+	} else {
+		afs_dec_servers_outstanding(net);
+		_leave(" [quiesce]");
+	}
+}
+
+/*
+ * Wait for a probe on a particular fileserver to complete for 2s.
+ */
+int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate,
+			      unsigned long exclude, bool is_intr)
+{
+	struct wait_queue_entry wait;
+	unsigned long timo = 2 * HZ;
+
+	if (atomic_read(&estate->nr_probing) == 0)
+		goto dont_wait;
+
+	init_wait_entry(&wait, 0);
+	for (;;) {
+		prepare_to_wait_event(&server->probe_wq, &wait,
+				      is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+		if (timo == 0 ||
+		    test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags) ||
+		    (estate->responsive_set & ~exclude) ||
+		    atomic_read(&estate->nr_probing) == 0 ||
+		    (is_intr && signal_pending(current)))
+			break;
+		timo = schedule_timeout(timo);
+	}
+
+	finish_wait(&server->probe_wq, &wait);
+
+dont_wait:
+	if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
+		return 0;
+	if (estate->responsive_set & ~exclude)
+		return 1;
+	if (is_intr && signal_pending(current))
+		return -ERESTARTSYS;
+	if (timo == 0)
+		return -ETIME;
+	return -EDESTADDRREQ;
+}
+
+/*
+ * Clean up the probing when the namespace is killed off.
+ */
+void afs_fs_probe_cleanup(struct afs_net *net)
+{
+	if (timer_delete_sync(&net->fs_probe_timer))
+		afs_dec_servers_outstanding(net);
+}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c2e930ec2888..bc9556991d7c 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1,20 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS File Server client stubs
  *
  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/circ_buf.h>
+#include <linux/iversion.h>
+#include <linux/netfs.h>
 #include "internal.h"
 #include "afs_fs.h"
+#include "xdr_fs.h"
 
 /*
  * decode an AFSFid block
@@ -30,127 +29,122 @@ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
 }
 
 /*
+ * Dump a bad file status record.
+ */
+static void xdr_dump_bad(const __be32 *bp)
+{
+	__be32 x[4];
+	int i;
+
+	pr_notice("AFS XDR: Bad status record\n");
+	for (i = 0; i < 5 * 4 * 4; i += 16) {
+		memcpy(x, bp, 16);
+		bp += 4;
+		pr_notice("%03x: %08x %08x %08x %08x\n",
+			  i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
+	}
+
+	memcpy(x, bp, 4);
+	pr_notice("0x50: %08x\n", ntohl(x[0]));
+}
+
+/*
  * decode an AFSFetchStatus block
  */
 static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
-				      struct afs_file_status *status,
-				      struct afs_vnode *vnode,
-				      afs_dataversion_t *store_version)
+				      struct afs_call *call,
+				      struct afs_status_cb *scb)
 {
-	afs_dataversion_t expected_version;
-	const __be32 *bp = *_bp;
-	umode_t mode;
+	const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
+	struct afs_file_status *status = &scb->status;
+	bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
 	u64 data_version, size;
-	u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
-	kuid_t owner;
-	kgid_t group;
-
-#define EXTRACT(DST)				\
-	do {					\
-		u32 x = ntohl(*bp++);		\
-		changed |= DST - x;		\
-		DST = x;			\
-	} while (0)
-
-	status->if_version = ntohl(*bp++);
-	EXTRACT(status->type);
-	EXTRACT(status->nlink);
-	size = ntohl(*bp++);
-	data_version = ntohl(*bp++);
-	EXTRACT(status->author);
-	owner = make_kuid(&init_user_ns, ntohl(*bp++));
-	changed |= !uid_eq(owner, status->owner);
-	status->owner = owner;
-	EXTRACT(status->caller_access); /* call ticket dependent */
-	EXTRACT(status->anon_access);
-	EXTRACT(status->mode);
-	EXTRACT(status->parent.vnode);
-	EXTRACT(status->parent.unique);
-	bp++; /* seg size */
-	status->mtime_client = ntohl(*bp++);
-	status->mtime_server = ntohl(*bp++);
-	group = make_kgid(&init_user_ns, ntohl(*bp++));
-	changed |= !gid_eq(group, status->group);
-	status->group = group;
-	bp++; /* sync counter */
-	data_version |= (u64) ntohl(*bp++) << 32;
-	EXTRACT(status->lock_count);
-	size |= (u64) ntohl(*bp++) << 32;
-	bp++; /* spare 4 */
-	*_bp = bp;
+	u32 type, abort_code;
+
+	abort_code = ntohl(xdr->abort_code);
+
+	if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) {
+		if (xdr->if_version == htonl(0) &&
+		    abort_code != 0 &&
+		    inline_error) {
+			/* The OpenAFS fileserver has a bug in FS.InlineBulkStatus
+			 * whereby it doesn't set the interface version in the error
+			 * case.
+			 */
+			status->abort_code = abort_code;
+			scb->have_error = true;
+			goto advance;
+		}
 
-	if (size != status->size) {
-		status->size = size;
-		changed |= true;
+		pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
+		goto bad;
 	}
-	status->mode &= S_IALLUGO;
-
-	_debug("vnode time %lx, %lx",
-	       status->mtime_client, status->mtime_server);
-
-	if (vnode) {
-		status->parent.vid = vnode->fid.vid;
-		if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
-			_debug("vnode changed");
-			i_size_write(&vnode->vfs_inode, size);
-			vnode->vfs_inode.i_uid = status->owner;
-			vnode->vfs_inode.i_gid = status->group;
-			vnode->vfs_inode.i_generation = vnode->fid.unique;
-			set_nlink(&vnode->vfs_inode, status->nlink);
-
-			mode = vnode->vfs_inode.i_mode;
-			mode &= ~S_IALLUGO;
-			mode |= status->mode;
-			barrier();
-			vnode->vfs_inode.i_mode = mode;
-		}
 
-		vnode->vfs_inode.i_ctime.tv_sec	= status->mtime_server;
-		vnode->vfs_inode.i_mtime	= vnode->vfs_inode.i_ctime;
-		vnode->vfs_inode.i_atime	= vnode->vfs_inode.i_ctime;
-		vnode->vfs_inode.i_version	= data_version;
+	if (abort_code != 0 && inline_error) {
+		status->abort_code = abort_code;
+		scb->have_error = true;
+		goto advance;
 	}
 
-	expected_version = status->data_version;
-	if (store_version)
-		expected_version = *store_version;
-
-	if (expected_version != data_version) {
-		status->data_version = data_version;
-		if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
-			_debug("vnode modified %llx on {%x:%u}",
-			       (unsigned long long) data_version,
-			       vnode->fid.vid, vnode->fid.vnode);
-			set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
-			set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
-		}
-	} else if (store_version) {
-		status->data_version = data_version;
+	type = ntohl(xdr->type);
+	switch (type) {
+	case AFS_FTYPE_FILE:
+	case AFS_FTYPE_DIR:
+	case AFS_FTYPE_SYMLINK:
+		status->type = type;
+		break;
+	default:
+		goto bad;
 	}
+
+	status->nlink		= ntohl(xdr->nlink);
+	status->author		= ntohl(xdr->author);
+	status->owner		= ntohl(xdr->owner);
+	status->caller_access	= ntohl(xdr->caller_access); /* Ticket dependent */
+	status->anon_access	= ntohl(xdr->anon_access);
+	status->mode		= ntohl(xdr->mode) & S_IALLUGO;
+	status->group		= ntohl(xdr->group);
+	status->lock_count	= ntohl(xdr->lock_count);
+
+	status->mtime_client.tv_sec = ntohl(xdr->mtime_client);
+	status->mtime_client.tv_nsec = 0;
+	status->mtime_server.tv_sec = ntohl(xdr->mtime_server);
+	status->mtime_server.tv_nsec = 0;
+
+	size  = (u64)ntohl(xdr->size_lo);
+	size |= (u64)ntohl(xdr->size_hi) << 32;
+	status->size = size;
+
+	data_version  = (u64)ntohl(xdr->data_version_lo);
+	data_version |= (u64)ntohl(xdr->data_version_hi) << 32;
+	status->data_version = data_version;
+	scb->have_status = true;
+advance:
+	*_bp = (const void *)*_bp + sizeof(*xdr);
+	return;
+
+bad:
+	xdr_dump_bad(*_bp);
+	afs_protocol_error(call, afs_eproto_bad_status);
+	goto advance;
 }
 
-/*
- * decode an AFSCallBack block
- */
-static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
+static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
 {
-	const __be32 *bp = *_bp;
-
-	vnode->cb_version	= ntohl(*bp++);
-	vnode->cb_expiry	= ntohl(*bp++);
-	vnode->cb_type		= ntohl(*bp++);
-	vnode->cb_expires	= vnode->cb_expiry + get_seconds();
-	*_bp = bp;
+	return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry;
 }
 
-static void xdr_decode_AFSCallBack_raw(const __be32 **_bp,
-				       struct afs_callback *cb)
+static void xdr_decode_AFSCallBack(const __be32 **_bp,
+				   struct afs_call *call,
+				   struct afs_status_cb *scb)
 {
+	struct afs_callback *cb = &scb->callback;
 	const __be32 *bp = *_bp;
 
-	cb->version	= ntohl(*bp++);
-	cb->expiry	= ntohl(*bp++);
-	cb->type	= ntohl(*bp++);
+	bp++; /* version */
+	cb->expires_at	= xdr_decode_expiry(call, ntohl(*bp++));
+	bp++; /* type */
+	scb->have_cb	= true;
 	*_bp = bp;
 }
 
@@ -161,14 +155,18 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp,
 				  struct afs_volsync *volsync)
 {
 	const __be32 *bp = *_bp;
+	u32 creation;
 
-	volsync->creation = ntohl(*bp++);
+	creation = ntohl(*bp++);
 	bp++; /* spare2 */
 	bp++; /* spare3 */
 	bp++; /* spare4 */
 	bp++; /* spare5 */
 	bp++; /* spare6 */
 	*_bp = bp;
+
+	if (volsync)
+		volsync->creation = creation;
 }
 
 /*
@@ -229,33 +227,30 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
 	vs->blocks_in_use	= ntohl(*bp++);
 	vs->part_blocks_avail	= ntohl(*bp++);
 	vs->part_max_blocks	= ntohl(*bp++);
+	vs->vol_copy_date	= 0;
+	vs->vol_backup_date	= 0;
 	*_bp = bp;
 }
 
 /*
  * deliver reply data to an FS.FetchStatus
  */
-static int afs_deliver_fs_fetch_status(struct afs_call *call,
-				       struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_status(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
 	const __be32 *bp;
+	int ret;
 
-	_enter(",,%u", last);
-
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	xdr_decode_AFSCallBack(&bp, vnode);
-	if (call->reply2)
-		xdr_decode_AFSVolSync(&bp, call->reply2);
+	xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_AFSCallBack(&bp, call, &vp->scb);
+	xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -266,162 +261,143 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
  */
 static const struct afs_call_type afs_RXFSFetchStatus = {
 	.name		= "FS.FetchStatus",
+	.op		= afs_FS_FetchStatus,
 	.deliver	= afs_deliver_fs_fetch_status,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
  * fetch the status information for a file
  */
-int afs_fs_fetch_file_status(struct afs_server *server,
-			     struct key *key,
-			     struct afs_vnode *vnode,
-			     struct afs_volsync *volsync,
-			     const struct afs_wait_mode *wait_mode)
+void afs_fs_fetch_status(struct afs_operation *op)
 {
+	struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
 	struct afs_call *call;
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
-	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
 
-	call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSFetchStatus,
+				   16, (21 + 3 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = vnode;
-	call->reply2 = volsync;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	bp[0] = htonl(FSFETCHSTATUS);
-	bp[1] = htonl(vnode->fid.vid);
-	bp[2] = htonl(vnode->fid.vnode);
-	bp[3] = htonl(vnode->fid.unique);
+	bp[1] = htonl(vp->fid.vid);
+	bp[2] = htonl(vp->fid.vnode);
+	bp[3] = htonl(vp->fid.unique);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * deliver reply data to an FS.FetchData
  */
-static int afs_deliver_fs_fetch_data(struct afs_call *call,
-				     struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_data(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_operation *op = call->op;
+	struct netfs_io_subrequest *subreq = op->fetch.subreq;
+	struct afs_vnode_param *vp = &op->file[0];
 	const __be32 *bp;
-	struct page *page;
-	void *buffer;
+	size_t count_before;
 	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u,%zu,%zu/%llu}",
+	       call->unmarshall, call->iov_len, iov_iter_count(call->iter),
+	       call->remaining);
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		call->remaining = 0;
 		call->unmarshall++;
-		if (call->operation_ID != FSFETCHDATA64) {
-			call->unmarshall++;
-			goto no_msw;
+		if (call->operation_ID == FSFETCHDATA64) {
+			afs_extract_to_tmp64(call);
+		} else {
+			call->tmp_u = htonl(0);
+			afs_extract_to_tmp(call);
 		}
+		fallthrough;
 
-		/* extract the upper part of the returned data length of an
-		 * FSFETCHDATA64 op (which should always be 0 using this
-		 * client) */
+		/* Extract the returned data length into ->remaining.
+		 * This may indicate more or less data than was
+		 * requested will be returned.
+		 */
 	case 1:
-		_debug("extract data length (MSW)");
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		_debug("extract data length");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
-		call->count = ntohl(call->tmp);
-		_debug("DATA length MSW: %u", call->count);
-		if (call->count > 0)
-			return -EBADMSG;
-		call->offset = 0;
-		call->unmarshall++;
+		call->remaining = be64_to_cpu(call->tmp64);
+		_debug("DATA length: %llu", call->remaining);
 
-	no_msw:
-		/* extract the returned data length */
-	case 2:
-		_debug("extract data length");
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (call->remaining == 0)
+			goto no_more_data;
 
-		call->count = ntohl(call->tmp);
-		_debug("DATA length: %u", call->count);
-		if (call->count > PAGE_SIZE)
-			return -EBADMSG;
-		call->offset = 0;
+		call->iter = &subreq->io_iter;
+		call->iov_len = umin(call->remaining, subreq->len - subreq->transferred);
 		call->unmarshall++;
+		fallthrough;
 
 		/* extract the returned data */
+	case 2:
+		count_before = call->iov_len;
+		_debug("extract data %zu/%llu", count_before, call->remaining);
+
+		ret = afs_extract_data(call, true);
+		subreq->transferred += count_before - call->iov_len;
+		call->remaining -= count_before - call->iov_len;
+		if (ret < 0)
+			return ret;
+
+		call->iter = &call->def_iter;
+		if (call->remaining)
+			goto no_more_data;
+
+		/* Discard any excess data the server gave us */
+		afs_extract_discard(call, call->remaining);
+		call->unmarshall = 3;
+		fallthrough;
+
 	case 3:
-		_debug("extract data");
-		if (call->count > 0) {
-			page = call->reply3;
-			buffer = kmap_atomic(page);
-			ret = afs_extract_data(call, skb, last, buffer,
-					       call->count);
-			kunmap_atomic(buffer);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
-		}
+		_debug("extract discard %zu/%llu",
+		       iov_iter_count(call->iter), call->remaining);
 
-		call->offset = 0;
-		call->unmarshall++;
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+	no_more_data:
+		call->unmarshall = 4;
+		afs_extract_to_buf(call, (21 + 3 + 6) * 4);
+		fallthrough;
 
 		/* extract the metadata */
 	case 4:
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       (21 + 3 + 6) * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
 
 		bp = call->buffer;
-		xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-		xdr_decode_AFSCallBack(&bp, vnode);
-		if (call->reply2)
-			xdr_decode_AFSVolSync(&bp, call->reply2);
+		xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+		xdr_decode_AFSCallBack(&bp, call, &vp->scb);
+		xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+		if (subreq->start + subreq->transferred >= vp->scb.status.size)
+			__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
 
-		call->offset = 0;
 		call->unmarshall++;
+		fallthrough;
 
 	case 5:
-		_debug("trailer");
-		if (skb->len != 0)
-			return -EBADMSG;
 		break;
 	}
 
-	if (!last)
-		return 0;
-
-	if (call->count < PAGE_SIZE) {
-		_debug("clear");
-		page = call->reply3;
-		buffer = kmap_atomic(page);
-		memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-		kunmap_atomic(buffer);
-	}
-
 	_leave(" = 0 [done]");
 	return 0;
 }
@@ -431,394 +407,372 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
  */
 static const struct afs_call_type afs_RXFSFetchData = {
 	.name		= "FS.FetchData",
+	.op		= afs_FS_FetchData,
+	.async_rx	= afs_fetch_data_async_rx,
 	.deliver	= afs_deliver_fs_fetch_data,
-	.abort_to_error	= afs_abort_to_error,
+	.immediate_cancel = afs_fetch_data_immediate_cancel,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSFetchData64 = {
 	.name		= "FS.FetchData64",
+	.op		= afs_FS_FetchData64,
+	.async_rx	= afs_fetch_data_async_rx,
 	.deliver	= afs_deliver_fs_fetch_data,
-	.abort_to_error	= afs_abort_to_error,
+	.immediate_cancel = afs_fetch_data_immediate_cancel,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
  * fetch data from a very large file
  */
-static int afs_fs_fetch_data64(struct afs_server *server,
-			       struct key *key,
-			       struct afs_vnode *vnode,
-			       off_t offset, size_t length,
-			       struct page *buffer,
-			       const struct afs_wait_mode *wait_mode)
+static void afs_fs_fetch_data64(struct afs_operation *op)
 {
+	struct netfs_io_subrequest *subreq = op->fetch.subreq;
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
 	__be32 *bp;
 
 	_enter("");
 
-	ASSERTCMP(length, <, ULONG_MAX);
-
-	call = afs_alloc_flat_call(&afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
+		return afs_op_nomem(op);
 
-	call->key = key;
-	call->reply = vnode;
-	call->reply2 = NULL; /* volsync */
-	call->reply3 = buffer;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
-	call->operation_ID = FSFETCHDATA64;
+	if (op->flags & AFS_OPERATION_ASYNC)
+		call->async = true;
 
 	/* marshall the parameters */
 	bp = call->request;
 	bp[0] = htonl(FSFETCHDATA64);
-	bp[1] = htonl(vnode->fid.vid);
-	bp[2] = htonl(vnode->fid.vnode);
-	bp[3] = htonl(vnode->fid.unique);
-	bp[4] = htonl(upper_32_bits(offset));
-	bp[5] = htonl((u32) offset);
+	bp[1] = htonl(vp->fid.vid);
+	bp[2] = htonl(vp->fid.vnode);
+	bp[3] = htonl(vp->fid.unique);
+	bp[4] = htonl(upper_32_bits(subreq->start + subreq->transferred));
+	bp[5] = htonl(lower_32_bits(subreq->start + subreq->transferred));
 	bp[6] = 0;
-	bp[7] = htonl((u32) length);
+	bp[7] = htonl(lower_32_bits(subreq->len   - subreq->transferred));
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * fetch data from a file
  */
-int afs_fs_fetch_data(struct afs_server *server,
-		      struct key *key,
-		      struct afs_vnode *vnode,
-		      off_t offset, size_t length,
-		      struct page *buffer,
-		      const struct afs_wait_mode *wait_mode)
+void afs_fs_fetch_data(struct afs_operation *op)
 {
+	struct netfs_io_subrequest *subreq = op->fetch.subreq;
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
 	__be32 *bp;
 
-	if (upper_32_bits(offset) || upper_32_bits(offset + length))
-		return afs_fs_fetch_data64(server, key, vnode, offset, length,
-					   buffer, wait_mode);
+	if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
+		return afs_fs_fetch_data64(op);
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = vnode;
-	call->reply2 = NULL; /* volsync */
-	call->reply3 = buffer;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
-	call->operation_ID = FSFETCHDATA;
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	bp[0] = htonl(FSFETCHDATA);
-	bp[1] = htonl(vnode->fid.vid);
-	bp[2] = htonl(vnode->fid.vnode);
-	bp[3] = htonl(vnode->fid.unique);
-	bp[4] = htonl(offset);
-	bp[5] = htonl(length);
-
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	bp[1] = htonl(vp->fid.vid);
+	bp[2] = htonl(vp->fid.vnode);
+	bp[3] = htonl(vp->fid.unique);
+	bp[4] = htonl(lower_32_bits(subreq->start + subreq->transferred));
+	bp[5] = htonl(lower_32_bits(subreq->len   + subreq->transferred));
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
- * deliver reply data to an FS.GiveUpCallBacks
+ * deliver reply data to an FS.CreateFile or an FS.MakeDir
  */
-static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
-					    struct sk_buff *skb, bool last)
+static int afs_deliver_fs_create_vnode(struct afs_call *call)
 {
-	_enter(",{%u},%d", skb->len, last);
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	const __be32 *bp;
+	int ret;
 
-	if (skb->len > 0)
-		return -EBADMSG; /* shouldn't be any reply data */
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_AFSFid(&bp, &op->file[1].fid);
+	xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb);
+	xdr_decode_AFSCallBack(&bp, call, &vp->scb);
+	xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+	_leave(" = 0 [done]");
 	return 0;
 }
 
 /*
- * FS.GiveUpCallBacks operation type
+ * FS.CreateFile and FS.MakeDir operation type
  */
-static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
-	.name		= "FS.GiveUpCallBacks",
-	.deliver	= afs_deliver_fs_give_up_callbacks,
-	.abort_to_error	= afs_abort_to_error,
+static const struct afs_call_type afs_RXFSCreateFile = {
+	.name		= "FS.CreateFile",
+	.op		= afs_FS_CreateFile,
+	.deliver	= afs_deliver_fs_create_vnode,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
- * give up a set of callbacks
- * - the callbacks are held in the server->cb_break ring
+ * Create a file.
  */
-int afs_fs_give_up_callbacks(struct afs_server *server,
-			     const struct afs_wait_mode *wait_mode)
+void afs_fs_create_file(struct afs_operation *op)
 {
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
 	struct afs_call *call;
-	size_t ncallbacks;
-	__be32 *bp, *tp;
-	int loop;
-
-	ncallbacks = CIRC_CNT(server->cb_break_head, server->cb_break_tail,
-			      ARRAY_SIZE(server->cb_break));
-
-	_enter("{%zu},", ncallbacks);
+	size_t namesz, reqsz, padsz;
+	__be32 *bp;
 
-	if (ncallbacks == 0)
-		return 0;
-	if (ncallbacks > AFSCBMAX)
-		ncallbacks = AFSCBMAX;
+	_enter("");
 
-	_debug("break %zu callbacks", ncallbacks);
+	namesz = name->len;
+	padsz = (4 - (namesz & 3)) & 3;
+	reqsz = (5 * 4) + namesz + padsz + (6 * 4);
 
-	call = afs_alloc_flat_call(&afs_RXFSGiveUpCallBacks,
-				   12 + ncallbacks * 6 * 4, 0);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSCreateFile,
+				   reqsz, (3 + 21 + 21 + 3 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
-	tp = bp + 2 + ncallbacks * 3;
-	*bp++ = htonl(FSGIVEUPCALLBACKS);
-	*bp++ = htonl(ncallbacks);
-	*tp++ = htonl(ncallbacks);
-
-	atomic_sub(ncallbacks, &server->cb_break_n);
-	for (loop = ncallbacks; loop > 0; loop--) {
-		struct afs_callback *cb =
-			&server->cb_break[server->cb_break_tail];
-
-		*bp++ = htonl(cb->fid.vid);
-		*bp++ = htonl(cb->fid.vnode);
-		*bp++ = htonl(cb->fid.unique);
-		*tp++ = htonl(cb->version);
-		*tp++ = htonl(cb->expiry);
-		*tp++ = htonl(cb->type);
-		smp_mb();
-		server->cb_break_tail =
-			(server->cb_break_tail + 1) &
-			(ARRAY_SIZE(server->cb_break) - 1);
+	*bp++ = htonl(FSCREATEFILE);
+	*bp++ = htonl(dvp->fid.vid);
+	*bp++ = htonl(dvp->fid.vnode);
+	*bp++ = htonl(dvp->fid.unique);
+	*bp++ = htonl(namesz);
+	memcpy(bp, name->name, namesz);
+	bp = (void *) bp + namesz;
+	if (padsz > 0) {
+		memset(bp, 0, padsz);
+		bp = (void *) bp + padsz;
 	}
+	*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+	*bp++ = htonl(op->mtime.tv_sec); /* mtime */
+	*bp++ = 0; /* owner */
+	*bp++ = 0; /* group */
+	*bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */
+	*bp++ = 0; /* segment size */
 
-	ASSERT(ncallbacks > 0);
-	wake_up_nr(&server->cb_break_waitq, ncallbacks);
-
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
-}
-
-/*
- * deliver reply data to an FS.CreateFile or an FS.MakeDir
- */
-static int afs_deliver_fs_create_vnode(struct afs_call *call,
-				       struct sk_buff *skb, bool last)
-{
-	struct afs_vnode *vnode = call->reply;
-	const __be32 *bp;
-
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
-
-	/* unmarshall the reply once we've received all of it */
-	bp = call->buffer;
-	xdr_decode_AFSFid(&bp, call->reply2);
-	xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	xdr_decode_AFSCallBack_raw(&bp, call->reply4);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
-
-	_leave(" = 0 [done]");
-	return 0;
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
-/*
- * FS.CreateFile and FS.MakeDir operation type
- */
-static const struct afs_call_type afs_RXFSCreateXXXX = {
-	.name		= "FS.CreateXXXX",
+static const struct afs_call_type afs_RXFSMakeDir = {
+	.name		= "FS.MakeDir",
+	.op		= afs_FS_MakeDir,
 	.deliver	= afs_deliver_fs_create_vnode,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
- * create a file or make a directory
+ * Create a new directory
  */
-int afs_fs_create(struct afs_server *server,
-		  struct key *key,
-		  struct afs_vnode *vnode,
-		  const char *name,
-		  umode_t mode,
-		  struct afs_fid *newfid,
-		  struct afs_file_status *newstatus,
-		  struct afs_callback *newcb,
-		  const struct afs_wait_mode *wait_mode)
+void afs_fs_make_dir(struct afs_operation *op)
 {
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
 	struct afs_call *call;
 	size_t namesz, reqsz, padsz;
 	__be32 *bp;
 
 	_enter("");
 
-	namesz = strlen(name);
+	namesz = name->len;
 	padsz = (4 - (namesz & 3)) & 3;
 	reqsz = (5 * 4) + namesz + padsz + (6 * 4);
 
-	call = afs_alloc_flat_call(&afs_RXFSCreateXXXX, reqsz,
-				   (3 + 21 + 21 + 3 + 6) * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSMakeDir,
+				   reqsz, (3 + 21 + 21 + 3 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = vnode;
-	call->reply2 = newfid;
-	call->reply3 = newstatus;
-	call->reply4 = newcb;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
-	*bp++ = htonl(S_ISDIR(mode) ? FSMAKEDIR : FSCREATEFILE);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(FSMAKEDIR);
+	*bp++ = htonl(dvp->fid.vid);
+	*bp++ = htonl(dvp->fid.vnode);
+	*bp++ = htonl(dvp->fid.unique);
 	*bp++ = htonl(namesz);
-	memcpy(bp, name, namesz);
+	memcpy(bp, name->name, namesz);
 	bp = (void *) bp + namesz;
 	if (padsz > 0) {
 		memset(bp, 0, padsz);
 		bp = (void *) bp + padsz;
 	}
-	*bp++ = htonl(AFS_SET_MODE);
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+	*bp++ = htonl(op->mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
-	*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
+	*bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
- * deliver reply data to an FS.RemoveFile or FS.RemoveDir
+ * Deliver reply data to any operation that returns status and volume sync.
  */
-static int afs_deliver_fs_remove(struct afs_call *call,
-				 struct sk_buff *skb, bool last)
+static int afs_deliver_fs_file_status_and_vol(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *vp = &op->file[0];
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 	_leave(" = 0 [done]");
 	return 0;
 }
 
 /*
- * FS.RemoveDir/FS.RemoveFile operation type
+ * FS.RemoveFile operation type
  */
-static const struct afs_call_type afs_RXFSRemoveXXXX = {
-	.name		= "FS.RemoveXXXX",
-	.deliver	= afs_deliver_fs_remove,
-	.abort_to_error	= afs_abort_to_error,
+static const struct afs_call_type afs_RXFSRemoveFile = {
+	.name		= "FS.RemoveFile",
+	.op		= afs_FS_RemoveFile,
+	.deliver	= afs_deliver_fs_file_status_and_vol,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
- * remove a file or directory
+ * Remove a file.
  */
-int afs_fs_remove(struct afs_server *server,
-		  struct key *key,
-		  struct afs_vnode *vnode,
-		  const char *name,
-		  bool isdir,
-		  const struct afs_wait_mode *wait_mode)
+void afs_fs_remove_file(struct afs_operation *op)
 {
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
 	struct afs_call *call;
 	size_t namesz, reqsz, padsz;
 	__be32 *bp;
 
 	_enter("");
 
-	namesz = strlen(name);
+	namesz = name->len;
 	padsz = (4 - (namesz & 3)) & 3;
 	reqsz = (5 * 4) + namesz + padsz;
 
-	call = afs_alloc_flat_call(&afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSRemoveFile,
+				   reqsz, (21 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSREMOVEFILE);
+	*bp++ = htonl(dvp->fid.vid);
+	*bp++ = htonl(dvp->fid.vnode);
+	*bp++ = htonl(dvp->fid.unique);
+	*bp++ = htonl(namesz);
+	memcpy(bp, name->name, namesz);
+	bp = (void *) bp + namesz;
+	if (padsz > 0) {
+		memset(bp, 0, padsz);
+		bp = (void *) bp + padsz;
+	}
+
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
 
-	call->key = key;
-	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+static const struct afs_call_type afs_RXFSRemoveDir = {
+	.name		= "FS.RemoveDir",
+	.op		= afs_FS_RemoveDir,
+	.deliver	= afs_deliver_fs_file_status_and_vol,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Remove a directory.
+ */
+void afs_fs_remove_dir(struct afs_operation *op)
+{
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_call *call;
+	size_t namesz, reqsz, padsz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = name->len;
+	padsz = (4 - (namesz & 3)) & 3;
+	reqsz = (5 * 4) + namesz + padsz;
+
+	call = afs_alloc_flat_call(op->net, &afs_RXFSRemoveDir,
+				   reqsz, (21 + 6) * 4);
+	if (!call)
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
-	*bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(FSREMOVEDIR);
+	*bp++ = htonl(dvp->fid.vid);
+	*bp++ = htonl(dvp->fid.vnode);
+	*bp++ = htonl(dvp->fid.unique);
 	*bp++ = htonl(namesz);
-	memcpy(bp, name, namesz);
+	memcpy(bp, name->name, namesz);
 	bp = (void *) bp + namesz;
 	if (padsz > 0) {
 		memset(bp, 0, padsz);
 		bp = (void *) bp + padsz;
 	}
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * deliver reply data to an FS.Link
  */
-static int afs_deliver_fs_link(struct afs_call *call,
-			       struct sk_buff *skb, bool last)
+static int afs_deliver_fs_link(struct afs_call *call)
 {
-	struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
+	_enter("{%u}", call->unmarshall);
 
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb);
+	xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -829,85 +783,78 @@ static int afs_deliver_fs_link(struct afs_call *call,
  */
 static const struct afs_call_type afs_RXFSLink = {
 	.name		= "FS.Link",
+	.op		= afs_FS_Link,
 	.deliver	= afs_deliver_fs_link,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
  * make a hard link
  */
-int afs_fs_link(struct afs_server *server,
-		struct key *key,
-		struct afs_vnode *dvnode,
-		struct afs_vnode *vnode,
-		const char *name,
-		const struct afs_wait_mode *wait_mode)
+void afs_fs_link(struct afs_operation *op)
 {
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
 	struct afs_call *call;
 	size_t namesz, reqsz, padsz;
 	__be32 *bp;
 
 	_enter("");
 
-	namesz = strlen(name);
+	namesz = name->len;
 	padsz = (4 - (namesz & 3)) & 3;
 	reqsz = (5 * 4) + namesz + padsz + (3 * 4);
 
-	call = afs_alloc_flat_call(&afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = dvnode;
-	call->reply2 = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSLINK);
-	*bp++ = htonl(dvnode->fid.vid);
-	*bp++ = htonl(dvnode->fid.vnode);
-	*bp++ = htonl(dvnode->fid.unique);
+	*bp++ = htonl(dvp->fid.vid);
+	*bp++ = htonl(dvp->fid.vnode);
+	*bp++ = htonl(dvp->fid.unique);
 	*bp++ = htonl(namesz);
-	memcpy(bp, name, namesz);
+	memcpy(bp, name->name, namesz);
 	bp = (void *) bp + namesz;
 	if (padsz > 0) {
 		memset(bp, 0, padsz);
 		bp = (void *) bp + padsz;
 	}
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = vp->fid;
+	trace_afs_make_fs_call1(call, &vp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * deliver reply data to an FS.Symlink
  */
-static int afs_deliver_fs_symlink(struct afs_call *call,
-				  struct sk_buff *skb, bool last)
+static int afs_deliver_fs_symlink(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
+	_enter("{%u}", call->unmarshall);
 
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFid(&bp, call->reply2);
-	xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	xdr_decode_AFSFid(&bp, &vp->fid);
+	xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb);
+	xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -918,104 +865,91 @@ static int afs_deliver_fs_symlink(struct afs_call *call,
  */
 static const struct afs_call_type afs_RXFSSymlink = {
 	.name		= "FS.Symlink",
+	.op		= afs_FS_Symlink,
 	.deliver	= afs_deliver_fs_symlink,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
  * create a symbolic link
  */
-int afs_fs_symlink(struct afs_server *server,
-		   struct key *key,
-		   struct afs_vnode *vnode,
-		   const char *name,
-		   const char *contents,
-		   struct afs_fid *newfid,
-		   struct afs_file_status *newstatus,
-		   const struct afs_wait_mode *wait_mode)
+void afs_fs_symlink(struct afs_operation *op)
 {
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
 	struct afs_call *call;
 	size_t namesz, reqsz, padsz, c_namesz, c_padsz;
 	__be32 *bp;
 
 	_enter("");
 
-	namesz = strlen(name);
+	namesz = name->len;
 	padsz = (4 - (namesz & 3)) & 3;
 
-	c_namesz = strlen(contents);
+	c_namesz = strlen(op->create.symlink);
 	c_padsz = (4 - (c_namesz & 3)) & 3;
 
 	reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
 
-	call = afs_alloc_flat_call(&afs_RXFSSymlink, reqsz,
+	call = afs_alloc_flat_call(op->net, &afs_RXFSSymlink, reqsz,
 				   (3 + 21 + 21 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = vnode;
-	call->reply2 = newfid;
-	call->reply3 = newstatus;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSSYMLINK);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(dvp->fid.vid);
+	*bp++ = htonl(dvp->fid.vnode);
+	*bp++ = htonl(dvp->fid.unique);
 	*bp++ = htonl(namesz);
-	memcpy(bp, name, namesz);
+	memcpy(bp, name->name, namesz);
 	bp = (void *) bp + namesz;
 	if (padsz > 0) {
 		memset(bp, 0, padsz);
 		bp = (void *) bp + padsz;
 	}
 	*bp++ = htonl(c_namesz);
-	memcpy(bp, contents, c_namesz);
+	memcpy(bp, op->create.symlink, c_namesz);
 	bp = (void *) bp + c_namesz;
 	if (c_padsz > 0) {
 		memset(bp, 0, c_padsz);
 		bp = (void *) bp + c_padsz;
 	}
-	*bp++ = htonl(AFS_SET_MODE);
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+	*bp++ = htonl(op->mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = htonl(S_IRWXUGO); /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * deliver reply data to an FS.Rename
  */
-static int afs_deliver_fs_rename(struct afs_call *call,
-				  struct sk_buff *skb, bool last)
+static int afs_deliver_fs_rename(struct afs_call *call)
 {
-	struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
-	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode, NULL);
-	if (new_dvnode != orig_dvnode)
-		xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode,
-					  NULL);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	/* If the two dirs are the same, we have two copies of the same status
+	 * report, so we just decode it twice.
+	 */
+	xdr_decode_AFSFetchStatus(&bp, call, &orig_dvp->scb);
+	xdr_decode_AFSFetchStatus(&bp, call, &new_dvp->scb);
+	xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -1026,32 +960,30 @@ static int afs_deliver_fs_rename(struct afs_call *call,
  */
 static const struct afs_call_type afs_RXFSRename = {
 	.name		= "FS.Rename",
+	.op		= afs_FS_Rename,
 	.deliver	= afs_deliver_fs_rename,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
- * create a symbolic link
+ * Rename/move a file or directory.
  */
-int afs_fs_rename(struct afs_server *server,
-		  struct key *key,
-		  struct afs_vnode *orig_dvnode,
-		  const char *orig_name,
-		  struct afs_vnode *new_dvnode,
-		  const char *new_name,
-		  const struct afs_wait_mode *wait_mode)
+void afs_fs_rename(struct afs_operation *op)
 {
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	const struct qstr *orig_name = &op->dentry->d_name;
+	const struct qstr *new_name = &op->dentry_2->d_name;
 	struct afs_call *call;
 	size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
 	__be32 *bp;
 
 	_enter("");
 
-	o_namesz = strlen(orig_name);
+	o_namesz = orig_name->len;
 	o_padsz = (4 - (o_namesz & 3)) & 3;
 
-	n_namesz = strlen(new_name);
+	n_namesz = new_name->len;
 	n_padsz = (4 - (n_namesz & 3)) & 3;
 
 	reqsz = (4 * 4) +
@@ -1059,74 +991,60 @@ int afs_fs_rename(struct afs_server *server,
 		(3 * 4) +
 		4 + n_namesz + n_padsz;
 
-	call = afs_alloc_flat_call(&afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = orig_dvnode;
-	call->reply2 = new_dvnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSRENAME);
-	*bp++ = htonl(orig_dvnode->fid.vid);
-	*bp++ = htonl(orig_dvnode->fid.vnode);
-	*bp++ = htonl(orig_dvnode->fid.unique);
+	*bp++ = htonl(orig_dvp->fid.vid);
+	*bp++ = htonl(orig_dvp->fid.vnode);
+	*bp++ = htonl(orig_dvp->fid.unique);
 	*bp++ = htonl(o_namesz);
-	memcpy(bp, orig_name, o_namesz);
+	memcpy(bp, orig_name->name, o_namesz);
 	bp = (void *) bp + o_namesz;
 	if (o_padsz > 0) {
 		memset(bp, 0, o_padsz);
 		bp = (void *) bp + o_padsz;
 	}
 
-	*bp++ = htonl(new_dvnode->fid.vid);
-	*bp++ = htonl(new_dvnode->fid.vnode);
-	*bp++ = htonl(new_dvnode->fid.unique);
+	*bp++ = htonl(new_dvp->fid.vid);
+	*bp++ = htonl(new_dvp->fid.vnode);
+	*bp++ = htonl(new_dvp->fid.unique);
 	*bp++ = htonl(n_namesz);
-	memcpy(bp, new_name, n_namesz);
+	memcpy(bp, new_name->name, n_namesz);
 	bp = (void *) bp + n_namesz;
 	if (n_padsz > 0) {
 		memset(bp, 0, n_padsz);
 		bp = (void *) bp + n_padsz;
 	}
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = orig_dvp->fid;
+	trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
- * deliver reply data to an FS.StoreData
+ * Deliver reply data to FS.StoreData or FS.StoreStatus
  */
-static int afs_deliver_fs_store_data(struct afs_call *call,
-				     struct sk_buff *skb, bool last)
+static int afs_deliver_fs_store_data(struct afs_call *call)
 {
-	struct afs_vnode *vnode = call->reply;
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *vp = &op->file[0];
 	const __be32 *bp;
+	int ret;
 
-	_enter(",,%u", last);
-
-	afs_transfer_reply(call, skb);
-	if (!last) {
-		_leave(" = 0 [more]");
-		return 0;
-	}
+	_enter("");
 
-	if (call->reply_size != call->reply_max) {
-		_leave(" = -EBADMSG [%u != %u]",
-		       call->reply_size, call->reply_max);
-		return -EBADMSG;
-	}
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode,
-				  &call->store_version);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
-
-	afs_pages_written_back(vnode, call);
+	xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -1137,187 +1055,113 @@ static int afs_deliver_fs_store_data(struct afs_call *call,
  */
 static const struct afs_call_type afs_RXFSStoreData = {
 	.name		= "FS.StoreData",
+	.op		= afs_FS_StoreData,
 	.deliver	= afs_deliver_fs_store_data,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSStoreData64 = {
 	.name		= "FS.StoreData64",
+	.op		= afs_FS_StoreData64,
 	.deliver	= afs_deliver_fs_store_data,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
  * store a set of pages to a very large file
  */
-static int afs_fs_store_data64(struct afs_server *server,
-			       struct afs_writeback *wb,
-			       pgoff_t first, pgoff_t last,
-			       unsigned offset, unsigned to,
-			       loff_t size, loff_t pos, loff_t i_size,
-			       const struct afs_wait_mode *wait_mode)
+static void afs_fs_store_data64(struct afs_operation *op)
 {
-	struct afs_vnode *vnode = wb->vnode;
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
-	       key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreData64,
+	call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData64,
 				   (4 + 6 + 3 * 2) * 4,
 				   (21 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
+		return afs_op_nomem(op);
 
-	call->wb = wb;
-	call->key = wb->key;
-	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
-	call->mapping = vnode->vfs_inode.i_mapping;
-	call->first = first;
-	call->last = last;
-	call->first_offset = offset;
-	call->last_to = to;
-	call->send_pages = true;
-	call->store_version = vnode->status.data_version + 1;
+	call->write_iter = op->store.write_iter;
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSSTOREDATA64);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
 
-	*bp++ = 0; /* mask */
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MTIME); /* mask */
+	*bp++ = htonl(op->mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = 0; /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	*bp++ = htonl(pos >> 32);
-	*bp++ = htonl((u32) pos);
-	*bp++ = htonl(size >> 32);
-	*bp++ = htonl((u32) size);
-	*bp++ = htonl(i_size >> 32);
-	*bp++ = htonl((u32) i_size);
+	*bp++ = htonl(upper_32_bits(op->store.pos));
+	*bp++ = htonl(lower_32_bits(op->store.pos));
+	*bp++ = htonl(upper_32_bits(op->store.size));
+	*bp++ = htonl(lower_32_bits(op->store.size));
+	*bp++ = htonl(upper_32_bits(op->store.i_size));
+	*bp++ = htonl(lower_32_bits(op->store.i_size));
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
- * store a set of pages
+ * Write data to a file on the server.
  */
-int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
-		      pgoff_t first, pgoff_t last,
-		      unsigned offset, unsigned to,
-		      const struct afs_wait_mode *wait_mode)
+void afs_fs_store_data(struct afs_operation *op)
 {
-	struct afs_vnode *vnode = wb->vnode;
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
-	loff_t size, pos, i_size;
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
-	       key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
-
-	size = to - offset;
-	if (first != last)
-		size += (loff_t)(last - first) << PAGE_SHIFT;
-	pos = (loff_t)first << PAGE_SHIFT;
-	pos += offset;
-
-	i_size = i_size_read(&vnode->vfs_inode);
-	if (pos + size > i_size)
-		i_size = size + pos;
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
 
 	_debug("size %llx, at %llx, i_size %llx",
-	       (unsigned long long) size, (unsigned long long) pos,
-	       (unsigned long long) i_size);
+	       (unsigned long long)op->store.size,
+	       (unsigned long long)op->store.pos,
+	       (unsigned long long)op->store.i_size);
 
-	if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32)
-		return afs_fs_store_data64(server, wb, first, last, offset, to,
-					   size, pos, i_size, wait_mode);
+	if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
+		return afs_fs_store_data64(op);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreData,
+	call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData,
 				   (4 + 6 + 3) * 4,
 				   (21 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
+		return afs_op_nomem(op);
 
-	call->wb = wb;
-	call->key = wb->key;
-	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
-	call->mapping = vnode->vfs_inode.i_mapping;
-	call->first = first;
-	call->last = last;
-	call->first_offset = offset;
-	call->last_to = to;
-	call->send_pages = true;
-	call->store_version = vnode->status.data_version + 1;
+	call->write_iter = op->store.write_iter;
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSSTOREDATA);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
 
-	*bp++ = 0; /* mask */
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MTIME); /* mask */
+	*bp++ = htonl(op->mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = 0; /* unix mode */
 	*bp++ = 0; /* segment size */
 
-	*bp++ = htonl(pos);
-	*bp++ = htonl(size);
-	*bp++ = htonl(i_size);
+	*bp++ = htonl(lower_32_bits(op->store.pos));
+	*bp++ = htonl(lower_32_bits(op->store.size));
+	*bp++ = htonl(lower_32_bits(op->store.i_size));
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
-}
-
-/*
- * deliver reply data to an FS.StoreStatus
- */
-static int afs_deliver_fs_store_status(struct afs_call *call,
-				       struct sk_buff *skb, bool last)
-{
-	afs_dataversion_t *store_version;
-	struct afs_vnode *vnode = call->reply;
-	const __be32 *bp;
-
-	_enter(",,%u", last);
-
-	afs_transfer_reply(call, skb);
-	if (!last) {
-		_leave(" = 0 [more]");
-		return 0;
-	}
-
-	if (call->reply_size != call->reply_max) {
-		_leave(" = -EBADMSG [%u != %u]",
-		       call->reply_size, call->reply_max);
-		return -EBADMSG;
-	}
-
-	/* unmarshall the reply once we've received all of it */
-	store_version = NULL;
-	if (call->operation_ID == FSSTOREDATA)
-		store_version = &call->store_version;
-
-	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version);
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
-
-	_leave(" = 0 [done]");
-	return 0;
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
@@ -1325,22 +1169,22 @@ static int afs_deliver_fs_store_status(struct afs_call *call,
  */
 static const struct afs_call_type afs_RXFSStoreStatus = {
 	.name		= "FS.StoreStatus",
-	.deliver	= afs_deliver_fs_store_status,
-	.abort_to_error	= afs_abort_to_error,
+	.op		= afs_FS_StoreStatus,
+	.deliver	= afs_deliver_fs_store_data,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSStoreData_as_Status = {
 	.name		= "FS.StoreData",
-	.deliver	= afs_deliver_fs_store_status,
-	.abort_to_error	= afs_abort_to_error,
+	.op		= afs_FS_StoreData,
+	.deliver	= afs_deliver_fs_store_data,
 	.destructor	= afs_flat_call_destructor,
 };
 
 static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
 	.name		= "FS.StoreData64",
-	.deliver	= afs_deliver_fs_store_status,
-	.abort_to_error	= afs_abort_to_error,
+	.op		= afs_FS_StoreData64,
+	.deliver	= afs_deliver_fs_store_data,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1348,435 +1192,306 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
  * set the attributes on a very large file, using FS.StoreData rather than
  * FS.StoreStatus so as to alter the file size also
  */
-static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
-				 struct afs_vnode *vnode, struct iattr *attr,
-				 const struct afs_wait_mode *wait_mode)
+static void afs_fs_setattr_size64(struct afs_operation *op)
 {
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
+	struct iattr *attr = op->setattr.attr;
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
-	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
 
 	ASSERT(attr->ia_valid & ATTR_SIZE);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreData64_as_Status,
+	call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData64_as_Status,
 				   (4 + 6 + 3 * 2) * 4,
 				   (21 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
-	call->store_version = vnode->status.data_version + 1;
-	call->operation_ID = FSSTOREDATA;
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSSTOREDATA64);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
 
 	xdr_encode_AFS_StoreStatus(&bp, attr);
 
-	*bp++ = 0;				/* position of start of write */
-	*bp++ = 0;
-	*bp++ = 0;				/* size of write */
+	*bp++ = htonl(upper_32_bits(attr->ia_size));	/* position of start of write */
+	*bp++ = htonl(lower_32_bits(attr->ia_size));
+	*bp++ = 0;					/* size of write */
 	*bp++ = 0;
-	*bp++ = htonl(attr->ia_size >> 32);	/* new file length */
-	*bp++ = htonl((u32) attr->ia_size);
+	*bp++ = htonl(upper_32_bits(attr->ia_size));	/* new file length */
+	*bp++ = htonl(lower_32_bits(attr->ia_size));
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
  * so as to alter the file size also
  */
-static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
-			       struct afs_vnode *vnode, struct iattr *attr,
-			       const struct afs_wait_mode *wait_mode)
+static void afs_fs_setattr_size(struct afs_operation *op)
 {
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
+	struct iattr *attr = op->setattr.attr;
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
-	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
 
 	ASSERT(attr->ia_valid & ATTR_SIZE);
-	if (attr->ia_size >> 32)
-		return afs_fs_setattr_size64(server, key, vnode, attr,
-					     wait_mode);
+	if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
+		return afs_fs_setattr_size64(op);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status,
+	call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData_as_Status,
 				   (4 + 6 + 3) * 4,
 				   (21 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
-	call->store_version = vnode->status.data_version + 1;
-	call->operation_ID = FSSTOREDATA;
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSSTOREDATA);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
 
 	xdr_encode_AFS_StoreStatus(&bp, attr);
 
-	*bp++ = 0;				/* position of start of write */
+	*bp++ = htonl(attr->ia_size);		/* position of start of write */
 	*bp++ = 0;				/* size of write */
 	*bp++ = htonl(attr->ia_size);		/* new file length */
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * set the attributes on a file, using FS.StoreData if there's a change in file
  * size, and FS.StoreStatus otherwise
  */
-int afs_fs_setattr(struct afs_server *server, struct key *key,
-		   struct afs_vnode *vnode, struct iattr *attr,
-		   const struct afs_wait_mode *wait_mode)
+void afs_fs_setattr(struct afs_operation *op)
 {
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
+	struct iattr *attr = op->setattr.attr;
 	__be32 *bp;
 
 	if (attr->ia_valid & ATTR_SIZE)
-		return afs_fs_setattr_size(server, key, vnode, attr,
-					   wait_mode);
+		return afs_fs_setattr_size(op);
 
-	_enter(",%x,{%x:%u},,",
-	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
 
-	call = afs_alloc_flat_call(&afs_RXFSStoreStatus,
+	call = afs_alloc_flat_call(op->net, &afs_RXFSStoreStatus,
 				   (4 + 6) * 4,
 				   (21 + 6) * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
-	call->operation_ID = FSSTORESTATUS;
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSSTORESTATUS);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
 
-	xdr_encode_AFS_StoreStatus(&bp, attr);
+	xdr_encode_AFS_StoreStatus(&bp, op->setattr.attr);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * deliver reply data to an FS.GetVolumeStatus
  */
-static int afs_deliver_fs_get_volume_status(struct afs_call *call,
-					    struct sk_buff *skb, bool last)
+static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 {
+	struct afs_operation *op = call->op;
 	const __be32 *bp;
 	char *p;
+	u32 size;
 	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
 		call->unmarshall++;
+		afs_extract_to_buf(call, 12 * 4);
+		fallthrough;
 
 		/* extract the returned status record */
 	case 1:
 		_debug("extract status");
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       12 * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		bp = call->buffer;
-		xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
-		call->offset = 0;
+		xdr_decode_AFSFetchVolumeStatus(&bp, &op->volstatus.vs);
 		call->unmarshall++;
+		afs_extract_to_tmp(call);
+		fallthrough;
 
 		/* extract the volume name length */
 	case 2:
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("volname length: %u", call->count);
 		if (call->count >= AFSNAMEMAX)
-			return -EBADMSG;
-		call->offset = 0;
+			return afs_protocol_error(call, afs_eproto_volname_len);
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_to_buf(call, size);
 		call->unmarshall++;
+		fallthrough;
 
 		/* extract the volume name */
 	case 3:
 		_debug("extract volname");
-		if (call->count > 0) {
-			ret = afs_extract_data(call, skb, last, call->reply3,
-					       call->count);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
-		p = call->reply3;
+		p = call->buffer;
 		p[call->count] = 0;
 		_debug("volname '%s'", p);
-
-		call->offset = 0;
-		call->unmarshall++;
-
-		/* extract the volume name padding */
-		if ((call->count & 3) == 0) {
-			call->unmarshall++;
-			goto no_volname_padding;
-		}
-		call->count = 4 - (call->count & 3);
-
-	case 4:
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       call->count);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
-
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
-	no_volname_padding:
+		fallthrough;
 
 		/* extract the offline message length */
-	case 5:
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+	case 4:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("offline msg length: %u", call->count);
 		if (call->count >= AFSNAMEMAX)
-			return -EBADMSG;
-		call->offset = 0;
+			return afs_protocol_error(call, afs_eproto_offline_msg_len);
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_to_buf(call, size);
 		call->unmarshall++;
+		fallthrough;
 
 		/* extract the offline message */
-	case 6:
+	case 5:
 		_debug("extract offline");
-		if (call->count > 0) {
-			ret = afs_extract_data(call, skb, last, call->reply3,
-					       call->count);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
-		p = call->reply3;
+		p = call->buffer;
 		p[call->count] = 0;
 		_debug("offline '%s'", p);
 
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
-
-		/* extract the offline message padding */
-		if ((call->count & 3) == 0) {
-			call->unmarshall++;
-			goto no_offline_padding;
-		}
-		call->count = 4 - (call->count & 3);
-
-	case 7:
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       call->count);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
-
-		call->offset = 0;
-		call->unmarshall++;
-	no_offline_padding:
+		fallthrough;
 
 		/* extract the message of the day length */
-	case 8:
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+	case 6:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("motd length: %u", call->count);
 		if (call->count >= AFSNAMEMAX)
-			return -EBADMSG;
-		call->offset = 0;
+			return afs_protocol_error(call, afs_eproto_motd_len);
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_to_buf(call, size);
 		call->unmarshall++;
+		fallthrough;
 
 		/* extract the message of the day */
-	case 9:
+	case 7:
 		_debug("extract motd");
-		if (call->count > 0) {
-			ret = afs_extract_data(call, skb, last, call->reply3,
-					       call->count);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
-		}
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
 
-		p = call->reply3;
+		p = call->buffer;
 		p[call->count] = 0;
 		_debug("motd '%s'", p);
 
-		call->offset = 0;
 		call->unmarshall++;
+		fallthrough;
 
-		/* extract the message of the day padding */
-		if ((call->count & 3) == 0) {
-			call->unmarshall++;
-			goto no_motd_padding;
-		}
-		call->count = 4 - (call->count & 3);
-
-	case 10:
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       call->count);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
-
-		call->offset = 0;
-		call->unmarshall++;
-	no_motd_padding:
-
-	case 11:
-		_debug("trailer %d", skb->len);
-		if (skb->len != 0)
-			return -EBADMSG;
+	case 8:
 		break;
 	}
 
-	if (!last)
-		return 0;
-
 	_leave(" = 0 [done]");
 	return 0;
 }
 
 /*
- * destroy an FS.GetVolumeStatus call
- */
-static void afs_get_volume_status_call_destructor(struct afs_call *call)
-{
-	kfree(call->reply3);
-	call->reply3 = NULL;
-	afs_flat_call_destructor(call);
-}
-
-/*
  * FS.GetVolumeStatus operation type
  */
 static const struct afs_call_type afs_RXFSGetVolumeStatus = {
 	.name		= "FS.GetVolumeStatus",
+	.op		= afs_FS_GetVolumeStatus,
 	.deliver	= afs_deliver_fs_get_volume_status,
-	.abort_to_error	= afs_abort_to_error,
-	.destructor	= afs_get_volume_status_call_destructor,
+	.destructor	= afs_flat_call_destructor,
 };
 
 /*
  * fetch the status of a volume
  */
-int afs_fs_get_volume_status(struct afs_server *server,
-			     struct key *key,
-			     struct afs_vnode *vnode,
-			     struct afs_volume_status *vs,
-			     const struct afs_wait_mode *wait_mode)
+void afs_fs_get_volume_status(struct afs_operation *op)
 {
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
 	__be32 *bp;
-	void *tmpbuf;
 
 	_enter("");
 
-	tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
-	if (!tmpbuf)
-		return -ENOMEM;
-
-	call = afs_alloc_flat_call(&afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4);
-	if (!call) {
-		kfree(tmpbuf);
-		return -ENOMEM;
-	}
-
-	call->key = key;
-	call->reply = vnode;
-	call->reply2 = vs;
-	call->reply3 = tmpbuf;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSGetVolumeStatus, 2 * 4,
+				   max(12 * 4, AFSOPAQUEMAX + 1));
+	if (!call)
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	bp[0] = htonl(FSGETVOLUMESTATUS);
-	bp[1] = htonl(vnode->fid.vid);
+	bp[1] = htonl(vp->fid.vid);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock
  */
-static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
-				    struct sk_buff *skb, bool last)
+static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
 {
+	struct afs_operation *op = call->op;
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
+	_enter("{%u}", call->unmarshall);
 
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+	xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -1787,8 +1502,9 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
  */
 static const struct afs_call_type afs_RXFSSetLock = {
 	.name		= "FS.SetLock",
+	.op		= afs_FS_SetLock,
 	.deliver	= afs_deliver_fs_xxxx_lock,
-	.abort_to_error	= afs_abort_to_error,
+	.done		= afs_lock_op_done,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1797,8 +1513,9 @@ static const struct afs_call_type afs_RXFSSetLock = {
  */
 static const struct afs_call_type afs_RXFSExtendLock = {
 	.name		= "FS.ExtendLock",
+	.op		= afs_FS_ExtendLock,
 	.deliver	= afs_deliver_fs_xxxx_lock,
-	.abort_to_error	= afs_abort_to_error,
+	.done		= afs_lock_op_done,
 	.destructor	= afs_flat_call_destructor,
 };
 
@@ -1807,105 +1524,611 @@ static const struct afs_call_type afs_RXFSExtendLock = {
  */
 static const struct afs_call_type afs_RXFSReleaseLock = {
 	.name		= "FS.ReleaseLock",
+	.op		= afs_FS_ReleaseLock,
 	.deliver	= afs_deliver_fs_xxxx_lock,
-	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
- * get a lock on a file
+ * Set a lock on a file
  */
-int afs_fs_set_lock(struct afs_server *server,
-		    struct key *key,
-		    struct afs_vnode *vnode,
-		    afs_lock_type_t type,
-		    const struct afs_wait_mode *wait_mode)
+void afs_fs_set_lock(struct afs_operation *op)
 {
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
 	__be32 *bp;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSSetLock, 5 * 4, 6 * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSSETLOCK);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
-	*bp++ = htonl(type);
-
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
+	*bp++ = htonl(op->lock.type);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_calli(call, &vp->fid, op->lock.type);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * extend a lock on a file
  */
-int afs_fs_extend_lock(struct afs_server *server,
-		       struct key *key,
-		       struct afs_vnode *vnode,
-		       const struct afs_wait_mode *wait_mode)
+void afs_fs_extend_lock(struct afs_operation *op)
 {
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
 	__be32 *bp;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSExtendLock, 4 * 4, 6 * 4);
 	if (!call)
-		return -ENOMEM;
-
-	call->key = key;
-	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+		return afs_op_nomem(op);
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(FSEXTENDLOCK);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
 }
 
 /*
  * release a lock on a file
  */
-int afs_fs_release_lock(struct afs_server *server,
-			struct key *key,
-			struct afs_vnode *vnode,
-			const struct afs_wait_mode *wait_mode)
+void afs_fs_release_lock(struct afs_operation *op)
 {
+	struct afs_vnode_param *vp = &op->file[0];
 	struct afs_call *call;
 	__be32 *bp;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4);
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSRELEASELOCK);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to an FS.GiveUpAllCallBacks operation.
+ */
+static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call)
+{
+	return afs_transfer_reply(call);
+}
+
+/*
+ * FS.GiveUpAllCallBacks operation type
+ */
+static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
+	.name		= "FS.GiveUpAllCallBacks",
+	.op		= afs_FS_GiveUpAllCallBacks,
+	.deliver	= afs_deliver_fs_give_up_all_callbacks,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Flush all the callbacks we have on a server.
+ */
+int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server,
+				 struct afs_address *addr, struct key *key)
+{
+	struct afs_call *call;
+	__be32 *bp;
+	int ret;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &afs_RXFSGiveUpAllCallBacks, 1 * 4, 0);
 	if (!call)
 		return -ENOMEM;
 
-	call->key = key;
-	call->reply = vnode;
-	call->service_id = FS_SERVICE;
-	call->port = htons(AFS_FS_PORT);
+	call->key	= key;
+	call->peer	= rxrpc_kernel_get_peer(addr->peer);
+	call->service_id = server->service_id;
 
 	/* marshall the parameters */
 	bp = call->request;
-	*bp++ = htonl(FSRELEASELOCK);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(FSGIVEUPALLCALLBACKS);
+
+	call->server = afs_use_server(server, false, afs_server_trace_use_give_up_cb);
+	afs_make_call(call, GFP_NOFS);
+	afs_wait_for_call_to_complete(call);
+	ret = call->error;
+	if (call->responded)
+		set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
+	afs_put_call(call);
+	return ret;
+}
+
+/*
+ * Deliver reply data to an FS.GetCapabilities operation.
+ */
+static int afs_deliver_fs_get_capabilities(struct afs_call *call)
+{
+	u32 count;
+	int ret;
+
+	_enter("{%u,%zu}", call->unmarshall, iov_iter_count(call->iter));
 
-	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+		/* Extract the capabilities word count */
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		count = ntohl(call->tmp);
+		call->count = count;
+		call->count2 = count;
+		if (count == 0) {
+			call->unmarshall = 4;
+			call->tmp = 0;
+			break;
+		}
+
+		/* Extract the first word of the capabilities to call->tmp */
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+	case 2:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		afs_extract_discard(call, (count - 1) * sizeof(__be32));
+		call->unmarshall++;
+		fallthrough;
+
+		/* Extract remaining capabilities words */
+	case 3:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		call->unmarshall++;
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+static void afs_fs_get_capabilities_destructor(struct afs_call *call)
+{
+	afs_put_endpoint_state(call->probe, afs_estate_trace_put_getcaps);
+	afs_flat_call_destructor(call);
+}
+
+/*
+ * FS.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_RXFSGetCapabilities = {
+	.name		= "FS.GetCapabilities",
+	.op		= afs_FS_GetCapabilities,
+	.deliver	= afs_deliver_fs_get_capabilities,
+	.done		= afs_fileserver_probe_result,
+	.immediate_cancel = afs_fileserver_probe_result,
+	.destructor	= afs_fs_get_capabilities_destructor,
+};
+
+/*
+ * Probe a fileserver for the capabilities that it supports.  This RPC can
+ * reply with up to 196 words.  The operation is asynchronous and if we managed
+ * to allocate a call, true is returned the result is delivered through the
+ * ->done() - otherwise we return false to indicate we didn't even try.
+ */
+bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
+			     struct afs_endpoint_state *estate, unsigned int addr_index,
+			     struct key *key)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4);
+	if (!call)
+		return false;
+
+	call->key	= key;
+	call->server	= afs_use_server(server, false, afs_server_trace_use_get_caps);
+	call->peer	= rxrpc_kernel_get_peer(estate->addresses->addrs[addr_index].peer);
+	call->probe	= afs_get_endpoint_state(estate, afs_estate_trace_get_getcaps);
+	call->probe_index = addr_index;
+	call->service_id = server->service_id;
+	call->upgrade	= true;
+	call->async	= true;
+	call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSGETCAPABILITIES);
+
+	trace_afs_make_fs_call(call, NULL);
+	afs_make_call(call, GFP_NOFS);
+	afs_put_call(call);
+	return true;
+}
+
+/*
+ * Deliver reply data to an FS.InlineBulkStatus call
+ */
+static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_status_cb *scb;
+	const __be32 *bp;
+	u32 tmp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+		/* Extract the file status count and array in two steps */
+	case 1:
+		_debug("extract status count");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		tmp = ntohl(call->tmp);
+		_debug("status count: %u/%u", tmp, op->nr_files);
+		if (tmp != op->nr_files)
+			return afs_protocol_error(call, afs_eproto_ibulkst_count);
+
+		call->count = 0;
+		call->unmarshall++;
+	more_counts:
+		afs_extract_to_buf(call, 21 * sizeof(__be32));
+		fallthrough;
+
+	case 2:
+		_debug("extract status array %u", call->count);
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		switch (call->count) {
+		case 0:
+			scb = &op->file[0].scb;
+			break;
+		case 1:
+			scb = &op->file[1].scb;
+			break;
+		default:
+			scb = &op->more_files[call->count - 2].scb;
+			break;
+		}
+
+		bp = call->buffer;
+		xdr_decode_AFSFetchStatus(&bp, call, scb);
+
+		call->count++;
+		if (call->count < op->nr_files)
+			goto more_counts;
+
+		call->count = 0;
+		call->unmarshall++;
+		afs_extract_to_tmp(call);
+		fallthrough;
+
+		/* Extract the callback count and array in two steps */
+	case 3:
+		_debug("extract CB count");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		tmp = ntohl(call->tmp);
+		_debug("CB count: %u", tmp);
+		if (tmp != op->nr_files)
+			return afs_protocol_error(call, afs_eproto_ibulkst_cb_count);
+		call->count = 0;
+		call->unmarshall++;
+	more_cbs:
+		afs_extract_to_buf(call, 3 * sizeof(__be32));
+		fallthrough;
+
+	case 4:
+		_debug("extract CB array");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		_debug("unmarshall CB array");
+		switch (call->count) {
+		case 0:
+			scb = &op->file[0].scb;
+			break;
+		case 1:
+			scb = &op->file[1].scb;
+			break;
+		default:
+			scb = &op->more_files[call->count - 2].scb;
+			break;
+		}
+
+		bp = call->buffer;
+		xdr_decode_AFSCallBack(&bp, call, scb);
+		call->count++;
+		if (call->count < op->nr_files)
+			goto more_cbs;
+
+		afs_extract_to_buf(call, 6 * sizeof(__be32));
+		call->unmarshall++;
+		fallthrough;
+
+	case 5:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		/* Unfortunately, prior to OpenAFS-1.6, volsync here is filled
+		 * with rubbish.
+		 */
+		xdr_decode_AFSVolSync(&bp, NULL);
+
+		call->unmarshall++;
+		fallthrough;
+
+	case 6:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+static void afs_done_fs_inline_bulk_status(struct afs_call *call)
+{
+	if (call->error == -ECONNABORTED &&
+	    call->abort_code == RX_INVALID_OPERATION) {
+		set_bit(AFS_SERVER_FL_NO_IBULK, &call->server->flags);
+		if (call->op)
+			set_bit(AFS_VOLUME_MAYBE_NO_IBULK, &call->op->volume->flags);
+	}
+}
+
+/*
+ * FS.InlineBulkStatus operation type
+ */
+static const struct afs_call_type afs_RXFSInlineBulkStatus = {
+	.name		= "FS.InlineBulkStatus",
+	.op		= afs_FS_InlineBulkStatus,
+	.deliver	= afs_deliver_fs_inline_bulk_status,
+	.done		= afs_done_fs_inline_bulk_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for up to 50 files
+ */
+void afs_fs_inline_bulk_status(struct afs_operation *op)
+{
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	struct afs_call *call;
+	__be32 *bp;
+	int i;
+
+	if (test_bit(AFS_SERVER_FL_NO_IBULK, &op->server->flags)) {
+		afs_op_set_error(op, -ENOTSUPP);
+		return;
+	}
+
+	_enter(",%x,{%llx:%llu},%u",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode, op->nr_files);
+
+	call = afs_alloc_flat_call(op->net, &afs_RXFSInlineBulkStatus,
+				   (2 + op->nr_files * 3) * 4,
+				   21 * 4);
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSINLINEBULKSTATUS);
+	*bp++ = htonl(op->nr_files);
+	*bp++ = htonl(dvp->fid.vid);
+	*bp++ = htonl(dvp->fid.vnode);
+	*bp++ = htonl(dvp->fid.unique);
+	*bp++ = htonl(vp->fid.vid);
+	*bp++ = htonl(vp->fid.vnode);
+	*bp++ = htonl(vp->fid.unique);
+	for (i = 0; i < op->nr_files - 2; i++) {
+		*bp++ = htonl(op->more_files[i].fid.vid);
+		*bp++ = htonl(op->more_files[i].fid.vnode);
+		*bp++ = htonl(op->more_files[i].fid.unique);
+	}
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * deliver reply data to an FS.FetchACL
+ */
+static int afs_deliver_fs_fetch_acl(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_acl *acl;
+	const __be32 *bp;
+	unsigned int size;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the returned data length */
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		size = call->count2 = ntohl(call->tmp);
+		size = round_up(size, 4);
+
+		acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+		if (!acl)
+			return -ENOMEM;
+		op->acl = acl;
+		acl->size = call->count2;
+		afs_extract_begin(call, acl->data, size);
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the returned data */
+	case 2:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		afs_extract_to_buf(call, (21 + 6) * 4);
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the metadata */
+	case 3:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		xdr_decode_AFSFetchStatus(&bp, call, &vp->scb);
+		xdr_decode_AFSVolSync(&bp, &op->volsync);
+
+		call->unmarshall++;
+		fallthrough;
+
+	case 4:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.FetchACL operation type
+ */
+static const struct afs_call_type afs_RXFSFetchACL = {
+	.name		= "FS.FetchACL",
+	.op		= afs_FS_FetchACL,
+	.deliver	= afs_deliver_fs_fetch_acl,
+};
+
+/*
+ * Fetch the ACL for a file.
+ */
+void afs_fs_fetch_acl(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+	call = afs_alloc_flat_call(op->net, &afs_RXFSFetchACL, 16, (21 + 6) * 4);
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp[0] = htonl(FSFETCHACL);
+	bp[1] = htonl(vp->fid.vid);
+	bp[2] = htonl(vp->fid.vnode);
+	bp[3] = htonl(vp->fid.unique);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_KERNEL);
+}
+
+/*
+ * FS.StoreACL operation type
+ */
+static const struct afs_call_type afs_RXFSStoreACL = {
+	.name		= "FS.StoreACL",
+	.op		= afs_FS_StoreACL,
+	.deliver	= afs_deliver_fs_file_status_and_vol,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the ACL for a file.
+ */
+void afs_fs_store_acl(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	const struct afs_acl *acl = op->acl;
+	size_t size;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+	size = round_up(acl->size, 4);
+	call = afs_alloc_flat_call(op->net, &afs_RXFSStoreACL,
+				   5 * 4 + size, (21 + 6) * 4);
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp[0] = htonl(FSSTOREACL);
+	bp[1] = htonl(vp->fid.vid);
+	bp[2] = htonl(vp->fid.vnode);
+	bp[3] = htonl(vp->fid.unique);
+	bp[4] = htonl(acl->size);
+	memcpy(&bp[5], acl->data, acl->size);
+	if (acl->size != size)
+		memset((void *)&bp[5] + acl->size, 0, size - acl->size);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_KERNEL);
 }
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 789bc253b5f6..dde1857fcabb 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -21,97 +21,480 @@
 #include <linux/sched.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
+#include <linux/iversion.h>
 #include "internal.h"
+#include "afs_fs.h"
 
-struct afs_iget_data {
-	struct afs_fid		fid;
-	struct afs_volume	*volume;	/* volume on which resides */
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
+{
+	size_t size = strlen(op->create.symlink) + 1;
+	size_t dsize = 0;
+	char *p;
+
+	if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
+				      mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
+		return;
+
+	vnode->directory_size = dsize;
+	p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
+	memcpy(p, op->create.symlink, size);
+	kunmap_local(p);
+	set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+	netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+}
+
+static void afs_put_link(void *arg)
+{
+	struct folio *folio = virt_to_folio(arg);
+
+	kunmap_local(arg);
+	folio_put(folio);
+}
+
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+			 struct delayed_call *callback)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct folio *folio;
+	char *content;
+	ssize_t ret;
+
+	if (!dentry) {
+		/* RCU pathwalk. */
+		if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode))
+			return ERR_PTR(-ECHILD);
+		goto good;
+	}
+
+	if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+		goto fetch;
+
+	ret = afs_validate(vnode, NULL);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
+	    test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+		goto good;
+
+fetch:
+	ret = afs_read_single(vnode, NULL);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+
+good:
+	folio = folioq_folio(vnode->directory, 0);
+	folio_get(folio);
+	content = kmap_local_folio(folio, 0);
+	set_delayed_call(callback, afs_put_link, content);
+	return content;
+}
+
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+	DEFINE_DELAYED_CALL(done);
+	const char *content;
+	int len;
+
+	content = afs_get_link(dentry, d_inode(dentry), &done);
+	if (IS_ERR(content)) {
+		do_delayed_call(&done);
+		return PTR_ERR(content);
+	}
+
+	len = umin(strlen(content), buflen);
+	if (copy_to_user(buffer, content, len))
+		len = -EFAULT;
+	do_delayed_call(&done);
+	return len;
+}
+
+static const struct inode_operations afs_symlink_inode_operations = {
+	.get_link	= afs_get_link,
+	.readlink	= afs_readlink,
 };
 
+static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
+{
+	static unsigned long once_only;
+
+	pr_warn("kAFS: AFS vnode with undefined type %u\n", vnode->status.type);
+	pr_warn("kAFS: A=%d m=%o s=%llx v=%llx\n",
+		vnode->status.abort_code,
+		vnode->status.mode,
+		vnode->status.size,
+		vnode->status.data_version);
+	pr_warn("kAFS: vnode %llx:%llx:%x\n",
+		vnode->fid.vid,
+		vnode->fid.vnode,
+		vnode->fid.unique);
+	if (parent_vnode)
+		pr_warn("kAFS: dir %llx:%llx:%x\n",
+			parent_vnode->fid.vid,
+			parent_vnode->fid.vnode,
+			parent_vnode->fid.unique);
+
+	if (!test_and_set_bit(0, &once_only))
+		dump_stack();
+}
+
+/*
+ * Set parameters for the netfs library
+ */
+static void afs_set_netfs_context(struct afs_vnode *vnode)
+{
+	netfs_inode_init(&vnode->netfs, &afs_req_ops, true);
+}
+
 /*
- * map the AFS file status to the inode member variables
+ * Initialise an inode from the vnode status.
  */
-static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
+static int afs_inode_init_from_status(struct afs_operation *op,
+				      struct afs_vnode_param *vp,
+				      struct afs_vnode *vnode)
 {
+	struct afs_file_status *status = &vp->scb.status;
 	struct inode *inode = AFS_VNODE_TO_I(vnode);
+	struct timespec64 t;
+
+	_enter("{%llx:%llu.%u} %s",
+	       vp->fid.vid, vp->fid.vnode, vp->fid.unique,
+	       op->type ? op->type->name : "???");
 
 	_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
-	       vnode->status.type,
-	       vnode->status.nlink,
-	       (unsigned long long) vnode->status.size,
-	       vnode->status.data_version,
-	       vnode->status.mode);
+	       status->type,
+	       status->nlink,
+	       (unsigned long long) status->size,
+	       status->data_version,
+	       status->mode);
+
+	write_seqlock(&vnode->cb_lock);
+
+	vnode->cb_v_check = op->cb_v_break;
+	vnode->status = *status;
+
+	t = status->mtime_client;
+	inode_set_ctime_to_ts(inode, t);
+	inode_set_mtime_to_ts(inode, t);
+	inode_set_atime_to_ts(inode, t);
+	inode->i_flags |= S_NOATIME;
+	inode->i_uid = make_kuid(&init_user_ns, status->owner);
+	inode->i_gid = make_kgid(&init_user_ns, status->group);
+	set_nlink(&vnode->netfs.inode, status->nlink);
 
-	switch (vnode->status.type) {
+	switch (status->type) {
 	case AFS_FTYPE_FILE:
-		inode->i_mode	= S_IFREG | vnode->status.mode;
+		inode->i_mode	= S_IFREG | (status->mode & S_IALLUGO);
 		inode->i_op	= &afs_file_inode_operations;
 		inode->i_fop	= &afs_file_operations;
+		inode->i_mapping->a_ops	= &afs_file_aops;
+		mapping_set_large_folios(inode->i_mapping);
 		break;
 	case AFS_FTYPE_DIR:
-		inode->i_mode	= S_IFDIR | vnode->status.mode;
+		inode->i_mode	= S_IFDIR |  (status->mode & S_IALLUGO);
 		inode->i_op	= &afs_dir_inode_operations;
 		inode->i_fop	= &afs_dir_file_operations;
+		inode->i_mapping->a_ops	= &afs_dir_aops;
+		__set_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &vnode->netfs.flags);
+		/* Assume locally cached directory data will be valid. */
+		__set_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
 		break;
 	case AFS_FTYPE_SYMLINK:
-		inode->i_mode	= S_IFLNK | vnode->status.mode;
-		inode->i_op	= &page_symlink_inode_operations;
+		/* Symlinks with a mode of 0644 are actually mountpoints. */
+		if ((status->mode & 0777) == 0644) {
+			inode->i_flags |= S_AUTOMOUNT;
+
+			set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+
+			inode->i_mode	= S_IFDIR | 0555;
+			inode->i_op	= &afs_mntpt_inode_operations;
+			inode->i_fop	= &afs_mntpt_file_operations;
+		} else {
+			inode->i_mode	= S_IFLNK | status->mode;
+			inode->i_op	= &afs_symlink_inode_operations;
+		}
+		inode->i_mapping->a_ops	= &afs_dir_aops;
+		inode_nohighmem(inode);
+		mapping_set_release_always(inode->i_mapping);
 		break;
 	default:
-		printk("kAFS: AFS vnode with undefined type\n");
-		return -EBADMSG;
+		dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL);
+		write_sequnlock(&vnode->cb_lock);
+		return afs_protocol_error(NULL, afs_eproto_file_type);
 	}
 
-#ifdef CONFIG_AFS_FSCACHE
-	if (vnode->status.size != inode->i_size)
-		fscache_attr_changed(vnode->cache);
-#endif
+	afs_set_i_size(vnode, status->size);
+	afs_set_netfs_context(vnode);
 
-	set_nlink(inode, vnode->status.nlink);
-	inode->i_uid		= vnode->status.owner;
-	inode->i_gid		= GLOBAL_ROOT_GID;
-	inode->i_size		= vnode->status.size;
-	inode->i_ctime.tv_sec	= vnode->status.mtime_server;
-	inode->i_ctime.tv_nsec	= 0;
-	inode->i_atime		= inode->i_mtime = inode->i_ctime;
-	inode->i_blocks		= 0;
-	inode->i_generation	= vnode->fid.unique;
-	inode->i_version	= vnode->status.data_version;
-	inode->i_mapping->a_ops	= &afs_fs_aops;
+	vnode->invalid_before	= status->data_version;
+	trace_afs_set_dv(vnode, status->data_version);
+	inode_set_iversion_raw(&vnode->netfs.inode, status->data_version);
 
-	/* check to see whether a symbolic link is really a mountpoint */
-	if (vnode->status.type == AFS_FTYPE_SYMLINK) {
-		afs_mntpt_check_symlink(vnode, key);
+	if (!vp->scb.have_cb) {
+		/* it's a symlink we just created (the fileserver
+		 * didn't give us a callback) */
+		afs_clear_cb_promise(vnode, afs_cb_promise_set_new_symlink);
+	} else {
+		vnode->cb_server = op->server;
+		afs_set_cb_promise(vnode, vp->scb.callback.expires_at,
+				   afs_cb_promise_set_new_inode);
+	}
 
-		if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
-			inode->i_mode	= S_IFDIR | vnode->status.mode;
-			inode->i_op	= &afs_mntpt_inode_operations;
-			inode->i_fop	= &afs_mntpt_file_operations;
+	write_sequnlock(&vnode->cb_lock);
+	return 0;
+}
+
+/*
+ * Update the core inode struct from a returned status record.
+ */
+static void afs_apply_status(struct afs_operation *op,
+			     struct afs_vnode_param *vp)
+{
+	struct afs_file_status *status = &vp->scb.status;
+	struct afs_vnode *vnode = vp->vnode;
+	struct inode *inode = &vnode->netfs.inode;
+	struct timespec64 t;
+	umode_t mode;
+	bool unexpected_jump = false;
+	bool data_changed = false;
+	bool change_size = vp->set_size;
+
+	_enter("{%llx:%llu.%u} %s",
+	       vp->fid.vid, vp->fid.vnode, vp->fid.unique,
+	       op->type ? op->type->name : "???");
+
+	BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags));
+
+	if (status->type != vnode->status.type) {
+		pr_warn("Vnode %llx:%llx:%x changed type %u to %u\n",
+			vnode->fid.vid,
+			vnode->fid.vnode,
+			vnode->fid.unique,
+			status->type, vnode->status.type);
+		afs_protocol_error(NULL, afs_eproto_bad_status);
+		return;
+	}
+
+	if (status->nlink != vnode->status.nlink)
+		set_nlink(inode, status->nlink);
+
+	if (status->owner != vnode->status.owner)
+		inode->i_uid = make_kuid(&init_user_ns, status->owner);
+
+	if (status->group != vnode->status.group)
+		inode->i_gid = make_kgid(&init_user_ns, status->group);
+
+	if (status->mode != vnode->status.mode) {
+		mode = inode->i_mode;
+		mode &= ~S_IALLUGO;
+		mode |= status->mode & S_IALLUGO;
+		WRITE_ONCE(inode->i_mode, mode);
+	}
+
+	t = status->mtime_client;
+	inode_set_mtime_to_ts(inode, t);
+	if (vp->update_ctime)
+		inode_set_ctime_to_ts(inode, op->ctime);
+
+	if (vnode->status.data_version != status->data_version) {
+		trace_afs_set_dv(vnode, status->data_version);
+		data_changed = true;
+	}
+
+	vnode->status = *status;
+
+	if (vp->dv_before + vp->dv_delta != status->data_version) {
+		trace_afs_dv_mismatch(vnode, vp->dv_before, vp->dv_delta,
+				      status->data_version);
+
+		if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) &&
+		    atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE)
+			pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n",
+				vnode->fid.vid, vnode->fid.vnode,
+				(unsigned long long)vp->dv_before + vp->dv_delta,
+				(unsigned long long)status->data_version,
+				op->type ? op->type->name : "???",
+				op->debug_id);
+
+		vnode->invalid_before = status->data_version;
+		if (vnode->status.type == AFS_FTYPE_DIR)
+			afs_invalidate_dir(vnode, afs_dir_invalid_dv_mismatch);
+		else
+			set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+		change_size = true;
+		data_changed = true;
+		unexpected_jump = true;
+	} else if (vnode->status.type == AFS_FTYPE_DIR) {
+		/* Expected directory change is handled elsewhere so
+		 * that we can locally edit the directory and save on a
+		 * download.
+		 */
+		if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+			data_changed = false;
+		change_size = true;
+	}
+
+	if (data_changed) {
+		inode_set_iversion_raw(inode, status->data_version);
+
+		/* Only update the size if the data version jumped.  If the
+		 * file is being modified locally, then we might have our own
+		 * idea of what the size should be that's not the same as
+		 * what's on the server.
+		 */
+		vnode->netfs.remote_i_size = status->size;
+		if (change_size || status->size > i_size_read(inode)) {
+			afs_set_i_size(vnode, status->size);
+			if (unexpected_jump)
+				vnode->netfs.zero_point = status->size;
+			inode_set_ctime_to_ts(inode, t);
+			inode_set_atime_to_ts(inode, t);
 		}
+		if (op->ops == &afs_fetch_data_operation)
+			op->fetch.subreq->rreq->i_size = status->size;
 	}
+}
 
-	return 0;
+/*
+ * Apply a callback to a vnode.
+ */
+static void afs_apply_callback(struct afs_operation *op,
+			       struct afs_vnode_param *vp)
+{
+	struct afs_callback *cb = &vp->scb.callback;
+	struct afs_vnode *vnode = vp->vnode;
+
+	if (!afs_cb_is_broken(vp->cb_break_before, vnode)) {
+		if (op->volume->type == AFSVL_RWVOL)
+			vnode->cb_server = op->server;
+		afs_set_cb_promise(vnode, cb->expires_at, afs_cb_promise_set_apply_cb);
+	}
 }
 
 /*
- * iget5() comparator
+ * Apply the received status and callback to an inode all in the same critical
+ * section to avoid races with afs_validate().
  */
-static int afs_iget5_test(struct inode *inode, void *opaque)
+void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *vp)
 {
-	struct afs_iget_data *data = opaque;
+	struct afs_vnode *vnode = vp->vnode;
+
+	_enter("");
 
-	return inode->i_ino == data->fid.vnode &&
-		inode->i_generation == data->fid.unique;
+	write_seqlock(&vnode->cb_lock);
+
+	if (vp->scb.have_error) {
+		/* A YFS server will return this from RemoveFile2 and AFS and
+		 * YFS will return this from InlineBulkStatus.
+		 */
+		if (vp->scb.status.abort_code == VNOVNODE) {
+			set_bit(AFS_VNODE_DELETED, &vnode->flags);
+			clear_nlink(&vnode->netfs.inode);
+			__afs_break_callback(vnode, afs_cb_break_for_deleted);
+			op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
+		}
+	} else if (vp->scb.have_status) {
+		if (vp->speculative &&
+		    (test_bit(AFS_VNODE_MODIFYING, &vnode->flags) ||
+		     vp->dv_before != vnode->status.data_version))
+			/* Ignore the result of a speculative bulk status fetch
+			 * if it splits around a modification op, thereby
+			 * appearing to regress the data version.
+			 */
+			goto out;
+		afs_apply_status(op, vp);
+		if (vp->scb.have_cb)
+			afs_apply_callback(op, vp);
+	} else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+		drop_nlink(&vnode->netfs.inode);
+		if (vnode->netfs.inode.i_nlink == 0) {
+			set_bit(AFS_VNODE_DELETED, &vnode->flags);
+			__afs_break_callback(vnode, afs_cb_break_for_deleted);
+		}
+	}
+
+out:
+	write_sequnlock(&vnode->cb_lock);
+
+	if (vp->scb.have_status)
+		afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
 }
 
+static void afs_fetch_status_success(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+	struct afs_vnode *vnode = vp->vnode;
+	int ret;
+
+	if (inode_state_read_once(&vnode->netfs.inode) & I_NEW) {
+		ret = afs_inode_init_from_status(op, vp, vnode);
+		afs_op_set_error(op, ret);
+		if (ret == 0)
+			afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
+	} else {
+		afs_vnode_commit_status(op, vp);
+	}
+}
+
+const struct afs_operation_ops afs_fetch_status_operation = {
+	.issue_afs_rpc	= afs_fs_fetch_status,
+	.issue_yfs_rpc	= yfs_fs_fetch_status,
+	.success	= afs_fetch_status_success,
+	.aborted	= afs_check_for_remote_deletion,
+};
+
 /*
- * iget5() comparator for inode created by autocell operations
- *
- * These pseudo inodes don't match anything.
+ * Fetch file status from the volume.
  */
-static int afs_iget5_autocell_test(struct inode *inode, void *opaque)
+int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new,
+		     afs_access_t *_caller_access)
 {
-	return 0;
+	struct afs_operation *op;
+
+	_enter("%s,{%llx:%llu.%u,S=%lx}",
+	       vnode->volume->name,
+	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+	       vnode->flags);
+
+	op = afs_alloc_operation(key, vnode->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	afs_op_set_vnode(op, 0, vnode);
+
+	op->nr_files	= 1;
+	op->ops		= &afs_fetch_status_operation;
+	afs_begin_vnode_operation(op);
+	afs_wait_for_operation(op);
+
+	if (_caller_access)
+		*_caller_access = op->file[0].scb.status.caller_access;
+	return afs_put_operation(op);
+}
+
+/*
+ * ilookup() comparator
+ */
+int afs_ilookup5_test_by_fid(struct inode *inode, void *opaque)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_fid *fid = opaque;
+
+	return (fid->vnode == vnode->fid.vnode &&
+		fid->vnode_hi == vnode->fid.vnode_hi &&
+		fid->unique == vnode->fid.unique);
+}
+
+/*
+ * iget5() comparator
+ */
+static int afs_iget5_test(struct inode *inode, void *opaque)
+{
+	struct afs_vnode_param *vp = opaque;
+	//struct afs_vnode *vnode = AFS_FS_I(inode);
+
+	return afs_ilookup5_test_by_fid(inode, &vp->fid);
 }
 
 /*
@@ -119,271 +502,216 @@ static int afs_iget5_autocell_test(struct inode *inode, void *opaque)
  */
 static int afs_iget5_set(struct inode *inode, void *opaque)
 {
-	struct afs_iget_data *data = opaque;
+	struct afs_vnode_param *vp = opaque;
+	struct afs_super_info *as = AFS_FS_S(inode->i_sb);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	inode->i_ino = data->fid.vnode;
-	inode->i_generation = data->fid.unique;
-	vnode->fid = data->fid;
-	vnode->volume = data->volume;
+	vnode->volume		= as->volume;
+	vnode->fid		= vp->fid;
 
+	/* YFS supports 96-bit vnode IDs, but Linux only supports
+	 * 64-bit inode numbers.
+	 */
+	inode->i_ino		= vnode->fid.vnode;
+	inode->i_generation	= vnode->fid.unique;
 	return 0;
 }
 
 /*
- * inode retrieval for autocell
+ * Get a cache cookie for an inode.
  */
-struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
-				int namesz, struct key *key)
+static void afs_get_inode_cache(struct afs_vnode *vnode)
 {
-	struct afs_iget_data data;
-	struct afs_super_info *as;
-	struct afs_vnode *vnode;
-	struct super_block *sb;
-	struct inode *inode;
-	static atomic_t afs_autocell_ino;
-
-	_enter("{%x:%u},%*.*s,",
-	       AFS_FS_I(dir)->fid.vid, AFS_FS_I(dir)->fid.vnode,
-	       namesz, namesz, dev_name ?: "");
-
-	sb = dir->i_sb;
-	as = sb->s_fs_info;
-	data.volume = as->volume;
-	data.fid.vid = as->volume->vid;
-	data.fid.unique = 0;
-	data.fid.vnode = 0;
-
-	inode = iget5_locked(sb, atomic_inc_return(&afs_autocell_ino),
-			     afs_iget5_autocell_test, afs_iget5_set,
-			     &data);
-	if (!inode) {
-		_leave(" = -ENOMEM");
-		return ERR_PTR(-ENOMEM);
+#ifdef CONFIG_AFS_FSCACHE
+	struct {
+		__be32 vnode_id;
+		__be32 unique;
+		__be32 vnode_id_ext[2];	/* Allow for a 96-bit key */
+	} __packed key;
+	struct afs_vnode_cache_aux aux;
+
+	if (vnode->status.type != AFS_FTYPE_FILE &&
+	    vnode->status.type != AFS_FTYPE_DIR &&
+	    vnode->status.type != AFS_FTYPE_SYMLINK) {
+		vnode->netfs.cache = NULL;
+		return;
 	}
 
-	_debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }",
-	       inode, inode->i_ino, data.fid.vid, data.fid.vnode,
-	       data.fid.unique);
-
-	vnode = AFS_FS_I(inode);
-
-	/* there shouldn't be an existing inode */
-	BUG_ON(!(inode->i_state & I_NEW));
-
-	inode->i_size		= 0;
-	inode->i_mode		= S_IFDIR | S_IRUGO | S_IXUGO;
-	inode->i_op		= &afs_autocell_inode_operations;
-	set_nlink(inode, 2);
-	inode->i_uid		= GLOBAL_ROOT_UID;
-	inode->i_gid		= GLOBAL_ROOT_GID;
-	inode->i_ctime.tv_sec	= get_seconds();
-	inode->i_ctime.tv_nsec	= 0;
-	inode->i_atime		= inode->i_mtime = inode->i_ctime;
-	inode->i_blocks		= 0;
-	inode->i_version	= 0;
-	inode->i_generation	= 0;
-
-	set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
-	set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-	inode->i_flags |= S_AUTOMOUNT | S_NOATIME;
-	unlock_new_inode(inode);
-	_leave(" = %p", inode);
-	return inode;
+	key.vnode_id		= htonl(vnode->fid.vnode);
+	key.unique		= htonl(vnode->fid.unique);
+	key.vnode_id_ext[0]	= htonl(vnode->fid.vnode >> 32);
+	key.vnode_id_ext[1]	= htonl(vnode->fid.vnode_hi);
+	afs_set_cache_aux(vnode, &aux);
+
+	afs_vnode_set_cache(vnode,
+			    fscache_acquire_cookie(
+				    vnode->volume->cache,
+				    vnode->status.type == AFS_FTYPE_FILE ?
+				    0 : FSCACHE_ADV_SINGLE_CHUNK,
+				    &key, sizeof(key),
+				    &aux, sizeof(aux),
+				    i_size_read(&vnode->netfs.inode)));
+#endif
 }
 
 /*
  * inode retrieval
  */
-struct inode *afs_iget(struct super_block *sb, struct key *key,
-		       struct afs_fid *fid, struct afs_file_status *status,
-		       struct afs_callback *cb)
+struct inode *afs_iget(struct afs_operation *op, struct afs_vnode_param *vp)
 {
-	struct afs_iget_data data = { .fid = *fid };
-	struct afs_super_info *as;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct super_block *sb = dvp->vnode->netfs.inode.i_sb;
 	struct afs_vnode *vnode;
 	struct inode *inode;
 	int ret;
 
-	_enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique);
-
-	as = sb->s_fs_info;
-	data.volume = as->volume;
+	_enter(",{%llx:%llu.%u},,", vp->fid.vid, vp->fid.vnode, vp->fid.unique);
 
-	inode = iget5_locked(sb, fid->vnode, afs_iget5_test, afs_iget5_set,
-			     &data);
+	inode = iget5_locked(sb, vp->fid.vnode, afs_iget5_test, afs_iget5_set, vp);
 	if (!inode) {
 		_leave(" = -ENOMEM");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	_debug("GOT INODE %p { vl=%x vn=%x, u=%x }",
-	       inode, fid->vid, fid->vnode, fid->unique);
-
 	vnode = AFS_FS_I(inode);
 
+	_debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }",
+	       inode, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+
 	/* deal with an existing inode */
-	if (!(inode->i_state & I_NEW)) {
+	if (!(inode_state_read_once(inode) & I_NEW)) {
 		_leave(" = %p", inode);
 		return inode;
 	}
 
-	if (!status) {
-		/* it's a remotely extant inode */
-		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-		ret = afs_vnode_fetch_status(vnode, NULL, key);
-		if (ret < 0)
-			goto bad_inode;
-	} else {
-		/* it's an inode we just created */
-		memcpy(&vnode->status, status, sizeof(vnode->status));
-
-		if (!cb) {
-			/* it's a symlink we just created (the fileserver
-			 * didn't give us a callback) */
-			vnode->cb_version = 0;
-			vnode->cb_expiry = 0;
-			vnode->cb_type = 0;
-			vnode->cb_expires = get_seconds();
-		} else {
-			vnode->cb_version = cb->version;
-			vnode->cb_expiry = cb->expiry;
-			vnode->cb_type = cb->type;
-			vnode->cb_expires = vnode->cb_expiry + get_seconds();
-		}
-	}
-
-	/* set up caching before mapping the status, as map-status reads the
-	 * first page of symlinks to see if they're really mountpoints */
-	inode->i_size = vnode->status.size;
-#ifdef CONFIG_AFS_FSCACHE
-	vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
-					      &afs_vnode_cache_index_def,
-					      vnode);
-#endif
-
-	ret = afs_inode_map_status(vnode, key);
+	ret = afs_inode_init_from_status(op, vp, vnode);
 	if (ret < 0)
 		goto bad_inode;
 
+	afs_get_inode_cache(vnode);
+
 	/* success */
 	clear_bit(AFS_VNODE_UNSET, &vnode->flags);
-	inode->i_flags |= S_NOATIME;
 	unlock_new_inode(inode);
-	_leave(" = %p [CB { v=%u t=%u }]", inode, vnode->cb_version, vnode->cb_type);
+	_leave(" = %p", inode);
 	return inode;
 
 	/* failure */
 bad_inode:
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(vnode->cache, 0);
-	vnode->cache = NULL;
-#endif
 	iget_failed(inode);
 	_leave(" = %d [bad]", ret);
 	return ERR_PTR(ret);
 }
 
-/*
- * mark the data attached to an inode as obsolete due to a write on the server
- * - might also want to ditch all the outstanding writes and dirty pages
- */
-void afs_zap_data(struct afs_vnode *vnode)
+static int afs_iget5_set_root(struct inode *inode, void *opaque)
 {
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+	struct afs_super_info *as = AFS_FS_S(inode->i_sb);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	/* nuke all the non-dirty pages that aren't locked, mapped or being
-	 * written back in a regular file and completely discard the pages in a
-	 * directory or symlink */
-	if (S_ISREG(vnode->vfs_inode.i_mode))
-		invalidate_remote_inode(&vnode->vfs_inode);
-	else
-		invalidate_inode_pages2(vnode->vfs_inode.i_mapping);
+	vnode->volume		= as->volume;
+	vnode->fid.vid		= as->volume->vid;
+	vnode->fid.vnode	= 1;
+	vnode->fid.unique	= 1;
+	inode->i_ino		= 1;
+	inode->i_generation	= 1;
+	return 0;
 }
 
 /*
- * validate a vnode/inode
- * - there are several things we need to check
- *   - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
- *     symlink)
- *   - parent dir metadata changed (security changes)
- *   - dentry data changed (write, truncate)
- *   - dentry metadata changed (security changes)
+ * Set up the root inode for a volume.  This is always vnode 1, unique 1 within
+ * the volume.
  */
-int afs_validate(struct afs_vnode *vnode, struct key *key)
+struct inode *afs_root_iget(struct super_block *sb, struct key *key)
 {
+	struct afs_super_info *as = AFS_FS_S(sb);
+	struct afs_operation *op;
+	struct afs_vnode *vnode;
+	struct inode *inode;
 	int ret;
 
-	_enter("{v={%x:%u} fl=%lx},%x",
-	       vnode->fid.vid, vnode->fid.vnode, vnode->flags,
-	       key_serial(key));
-
-	if (vnode->cb_promised &&
-	    !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
-	    !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
-	    !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-		if (vnode->cb_expires < get_seconds() + 10) {
-			_debug("callback expired");
-			set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-		} else {
-			goto valid;
-		}
+	_enter(",{%llx},,", as->volume->vid);
+
+	inode = iget5_locked(sb, 1, NULL, afs_iget5_set_root, NULL);
+	if (!inode) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
 	}
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-		goto valid;
+	_debug("GOT ROOT INODE %p { vl=%llx }", inode, as->volume->vid);
 
-	mutex_lock(&vnode->validate_lock);
+	BUG_ON(!(inode_state_read_once(inode) & I_NEW));
 
-	/* if the promise has expired, we need to check the server again to get
-	 * a new promise - note that if the (parent) directory's metadata was
-	 * changed then the security may be different and we may no longer have
-	 * access */
-	if (!vnode->cb_promised ||
-	    test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
-		_debug("not promised");
-		ret = afs_vnode_fetch_status(vnode, NULL, key);
-		if (ret < 0)
-			goto error_unlock;
-		_debug("new promise [fl=%lx]", vnode->flags);
-	}
+	vnode = AFS_FS_I(inode);
+	vnode->cb_v_check = atomic_read(&as->volume->cb_v_break);
+	afs_set_netfs_context(vnode);
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-		_debug("file already deleted");
-		ret = -ESTALE;
-		goto error_unlock;
+	op = afs_alloc_operation(key, as->volume);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
+		goto error;
 	}
 
-	/* if the vnode's data version number changed then its contents are
-	 * different */
-	if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
-		afs_zap_data(vnode);
+	afs_op_set_vnode(op, 0, vnode);
 
-	clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
-	mutex_unlock(&vnode->validate_lock);
-valid:
-	_leave(" = 0");
-	return 0;
+	op->nr_files	= 1;
+	op->ops		= &afs_fetch_status_operation;
+	ret = afs_do_sync_operation(op);
+	if (ret < 0)
+		goto error;
 
-error_unlock:
-	mutex_unlock(&vnode->validate_lock);
-	_leave(" = %d", ret);
-	return ret;
+	afs_get_inode_cache(vnode);
+
+	clear_bit(AFS_VNODE_UNSET, &vnode->flags);
+	unlock_new_inode(inode);
+	_leave(" = %p", inode);
+	return inode;
+
+error:
+	iget_failed(inode);
+	_leave(" = %d [bad]", ret);
+	return ERR_PTR(ret);
 }
 
 /*
  * read the attributes of an inode
  */
-int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		      struct kstat *stat)
+int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
+		struct kstat *stat, u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode;
-
-	inode = dentry->d_inode;
+	struct inode *inode = d_inode(path->dentry);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct key *key;
+	int ret, seq;
 
 	_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
 
-	generic_fillattr(inode, stat);
+	if (vnode->volume &&
+	    !(query_flags & AT_STATX_DONT_SYNC) &&
+	    atomic64_read(&vnode->cb_expires_at) == AFS_NO_CB_PROMISE) {
+		key = afs_request_key(vnode->volume->cell);
+		if (IS_ERR(key))
+			return PTR_ERR(key);
+		ret = afs_validate(vnode, key);
+		key_put(key);
+		if (ret < 0)
+			return ret;
+	}
+
+	do {
+		seq = read_seqbegin(&vnode->cb_lock);
+		generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
+		if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
+		    stat->nlink > 0)
+			stat->nlink -= 1;
+
+		/* Lie about the size of directories.  We maintain a locally
+		 * edited copy and may make different allocation decisions on
+		 * it, but we need to give userspace the server's size.
+		 */
+		if (S_ISDIR(inode->i_mode))
+			stat->size = vnode->netfs.remote_i_size;
+	} while (read_seqretry(&vnode->cb_lock, seq));
+
 	return 0;
 }
 
@@ -395,9 +723,9 @@ int afs_drop_inode(struct inode *inode)
 	_enter("");
 
 	if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags))
-		return generic_delete_inode(inode);
+		return inode_just_drop(inode);
 	else
-		return generic_drop_inode(inode);
+		return inode_generic_drop(inode);
 }
 
 /*
@@ -405,94 +733,197 @@ int afs_drop_inode(struct inode *inode)
  */
 void afs_evict_inode(struct inode *inode)
 {
-	struct afs_permits *permits;
-	struct afs_vnode *vnode;
-
-	vnode = AFS_FS_I(inode);
+	struct afs_vnode_cache_aux aux;
+	struct afs_super_info *sbi = AFS_FS_S(inode->i_sb);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	_enter("{%x:%u.%d} v=%u x=%u t=%u }",
+	_enter("{%llx:%llu.%d}",
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       vnode->cb_version,
-	       vnode->cb_expiry,
-	       vnode->cb_type);
+	       vnode->fid.unique);
 
 	_debug("CLEAR INODE %p", inode);
 
 	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 
-	truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
+	if ((S_ISDIR(inode->i_mode) ||
+	     S_ISLNK(inode->i_mode)) &&
+	    (inode_state_read_once(inode) & I_DIRTY) &&
+	    !sbi->dyn_root) {
+		struct writeback_control wbc = {
+			.sync_mode = WB_SYNC_ALL,
+			.for_sync = true,
+			.range_end = LLONG_MAX,
+		};
+
+		afs_single_writepages(inode->i_mapping, &wbc);
+	}
 
-	afs_give_up_callback(vnode);
+	netfs_wait_for_outstanding_io(inode);
+	truncate_inode_pages_final(&inode->i_data);
+	netfs_free_folioq_buffer(vnode->directory);
 
-	if (vnode->server) {
-		spin_lock(&vnode->server->fs_lock);
-		rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
-		spin_unlock(&vnode->server->fs_lock);
-		afs_put_server(vnode->server);
-		vnode->server = NULL;
+	afs_set_cache_aux(vnode, &aux);
+	netfs_clear_inode_writeback(inode, &aux);
+	clear_inode(inode);
+
+	while (!list_empty(&vnode->wb_keys)) {
+		struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
+						    struct afs_wb_key, vnode_link);
+		list_del(&wbk->vnode_link);
+		afs_put_wb_key(wbk);
 	}
 
-	ASSERT(list_empty(&vnode->writebacks));
-	ASSERT(!vnode->cb_promised);
+	fscache_relinquish_cookie(afs_vnode_cache(vnode),
+				  test_bit(AFS_VNODE_DELETED, &vnode->flags));
 
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(vnode->cache, 0);
-	vnode->cache = NULL;
-#endif
+	afs_prune_wb_keys(vnode);
+	afs_put_permits(rcu_access_pointer(vnode->permit_cache));
+	key_put(vnode->silly_key);
+	vnode->silly_key = NULL;
+	key_put(vnode->lock_key);
+	vnode->lock_key = NULL;
+	_leave("");
+}
 
-	mutex_lock(&vnode->permits_lock);
-	permits = vnode->permits;
-	rcu_assign_pointer(vnode->permits, NULL);
-	mutex_unlock(&vnode->permits_lock);
-	if (permits)
-		call_rcu(&permits->rcu, afs_zap_permits);
+static void afs_setattr_success(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct inode *inode = &vp->vnode->netfs.inode;
+	loff_t old_i_size = i_size_read(inode);
+
+	op->setattr.old_i_size = old_i_size;
+	afs_vnode_commit_status(op, vp);
+	/* inode->i_size has now been changed. */
+
+	if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+		loff_t size = op->setattr.attr->ia_size;
+		if (size > old_i_size)
+			pagecache_isize_extended(inode, old_i_size, size);
+	}
+}
 
-	_leave("");
+static void afs_setattr_edit_file(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_vnode *vnode = vp->vnode;
+	struct inode *inode = &vnode->netfs.inode;
+
+	if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+		loff_t size = op->setattr.attr->ia_size;
+		loff_t old = op->setattr.old_i_size;
+
+		/* Note: inode->i_size was updated by afs_apply_status() inside
+		 * the I/O and callback locks.
+		 */
+
+		if (size != old) {
+			truncate_pagecache(inode, size);
+			netfs_resize_file(&vnode->netfs, size, true);
+			fscache_resize_cookie(afs_vnode_cache(vnode), size);
+		}
+	}
 }
 
+static const struct afs_operation_ops afs_setattr_operation = {
+	.issue_afs_rpc	= afs_fs_setattr,
+	.issue_yfs_rpc	= yfs_fs_setattr,
+	.success	= afs_setattr_success,
+	.edit_dir	= afs_setattr_edit_file,
+};
+
 /*
  * set the attributes of an inode
  */
-int afs_setattr(struct dentry *dentry, struct iattr *attr)
+int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+		struct iattr *attr)
 {
-	struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
-	struct key *key;
+	const unsigned int supported =
+		ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
+		ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET | ATTR_TOUCH;
+	struct afs_operation *op;
+	struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+	struct inode *inode = &vnode->netfs.inode;
+	loff_t i_size;
 	int ret;
 
-	_enter("{%x:%u},{n=%s},%x",
-	       vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+	_enter("{%llx:%llu},{n=%pd},%x",
+	       vnode->fid.vid, vnode->fid.vnode, dentry,
 	       attr->ia_valid);
 
-	if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
-				ATTR_MTIME))) {
+	if (!(attr->ia_valid & supported)) {
 		_leave(" = 0 [unsupported]");
 		return 0;
 	}
 
-	/* flush any dirty data outstanding on a regular file */
-	if (S_ISREG(vnode->vfs_inode.i_mode)) {
-		filemap_write_and_wait(vnode->vfs_inode.i_mapping);
-		afs_writeback_all(vnode);
+	i_size = i_size_read(inode);
+	if (attr->ia_valid & ATTR_SIZE) {
+		if (!S_ISREG(inode->i_mode))
+			return -EISDIR;
+
+		ret = inode_newsize_ok(inode, attr->ia_size);
+		if (ret)
+			return ret;
+
+		if (attr->ia_size == i_size)
+			attr->ia_valid &= ~ATTR_SIZE;
 	}
 
-	if (attr->ia_valid & ATTR_FILE) {
-		key = attr->ia_file->private_data;
-	} else {
-		key = afs_request_key(vnode->volume->cell);
-		if (IS_ERR(key)) {
-			ret = PTR_ERR(key);
-			goto error;
+	fscache_use_cookie(afs_vnode_cache(vnode), true);
+
+	/* Prevent any new writebacks from starting whilst we do this. */
+	down_write(&vnode->validate_lock);
+
+	if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode)) {
+		loff_t size = attr->ia_size;
+
+		/* Wait for any outstanding writes to the server to complete */
+		loff_t from = min(size, i_size);
+		loff_t to = max(size, i_size);
+		ret = filemap_fdatawait_range(inode->i_mapping, from, to);
+		if (ret < 0)
+			goto out_unlock;
+
+		/* Don't talk to the server if we're just shortening in-memory
+		 * writes that haven't gone to the server yet.
+		 */
+		if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) &&
+		    attr->ia_size < i_size &&
+		    attr->ia_size > vnode->netfs.remote_i_size) {
+			truncate_setsize(inode, attr->ia_size);
+			netfs_resize_file(&vnode->netfs, size, false);
+			fscache_resize_cookie(afs_vnode_cache(vnode),
+					      attr->ia_size);
+			ret = 0;
+			goto out_unlock;
 		}
 	}
 
-	ret = afs_vnode_setattr(vnode, key, attr);
-	if (!(attr->ia_valid & ATTR_FILE))
-		key_put(key);
+	op = afs_alloc_operation(((attr->ia_valid & ATTR_FILE) ?
+				  afs_file_key(attr->ia_file) : NULL),
+				 vnode->volume);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
+		goto out_unlock;
+	}
 
-error:
+	afs_op_set_vnode(op, 0, vnode);
+	op->setattr.attr = attr;
+
+	if (attr->ia_valid & ATTR_SIZE) {
+		op->file[0].dv_delta = 1;
+		op->file[0].set_size = true;
+	}
+	op->ctime = attr->ia_ctime;
+	op->file[0].update_ctime = 1;
+	op->file[0].modification = true;
+
+	op->ops = &afs_setattr_operation;
+	ret = afs_do_sync_operation(op);
+
+out_unlock:
+	up_write(&vnode->validate_lock);
+	fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL);
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index a306bb6d88d9..009064b8d661 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1,25 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /* internal AFS stuff
  *
  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/fs.h>
+#include <linux/filelock.h>
 #include <linux/pagemap.h>
-#include <linux/skbuff.h>
 #include <linux/rxrpc.h>
 #include <linux/key.h>
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 #include <linux/fscache.h>
 #include <linux/backing-dev.h>
+#include <linux/uuid.h>
+#include <linux/mm_types.h>
+#include <linux/dns_resolver.h>
+#include <crypto/krb5.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
 
 #include "afs.h"
 #include "afs_vl.h"
@@ -28,148 +33,225 @@
 
 struct pagevec;
 struct afs_call;
+struct afs_vnode;
+struct afs_server_probe;
+
+/*
+ * Partial file-locking emulation mode.  (The problem being that AFS3 only
+ * allows whole-file locks and no upgrading/downgrading).
+ */
+enum afs_flock_mode {
+	afs_flock_mode_unset,
+	afs_flock_mode_local,	/* Local locking only */
+	afs_flock_mode_openafs,	/* Don't get server lock for a partial lock */
+	afs_flock_mode_strict,	/* Always get a server lock for a partial lock */
+	afs_flock_mode_write,	/* Get an exclusive server lock for a partial lock */
+};
 
-typedef enum {
-	AFS_VL_NEW,			/* new, uninitialised record */
-	AFS_VL_CREATING,		/* creating record */
-	AFS_VL_VALID,			/* record is pending */
-	AFS_VL_NO_VOLUME,		/* no such volume available */
-	AFS_VL_UPDATING,		/* update in progress */
-	AFS_VL_VOLUME_DELETED,		/* volume was deleted */
-	AFS_VL_UNCERTAIN,		/* uncertain state (update failed) */
-} __attribute__((packed)) afs_vlocation_state_t;
-
-struct afs_mount_params {
-	bool			rwpath;		/* T if the parent should be considered R/W */
+struct afs_fs_context {
 	bool			force;		/* T to force cell type */
 	bool			autocell;	/* T if set auto mount operation */
+	bool			dyn_root;	/* T if dynamic root */
+	bool			no_cell;	/* T if the source is "none" (for dynroot) */
+	enum afs_flock_mode	flock_mode;	/* Partial file-locking emulation mode */
 	afs_voltype_t		type;		/* type of volume requested */
-	int			volnamesz;	/* size of volume name */
+	unsigned int		volnamesz;	/* size of volume name */
 	const char		*volname;	/* name of volume to mount */
+	struct afs_net		*net;		/* the AFS net namespace stuff */
 	struct afs_cell		*cell;		/* cell in which to find volume */
 	struct afs_volume	*volume;	/* volume record */
 	struct key		*key;		/* key to use for secure mounting */
 };
 
+enum afs_call_state {
+	AFS_CALL_CL_REQUESTING,		/* Client: Request is being sent */
+	AFS_CALL_CL_AWAIT_REPLY,	/* Client: Awaiting reply */
+	AFS_CALL_CL_PROC_REPLY,		/* Client: rxrpc call complete; processing reply */
+	AFS_CALL_SV_AWAIT_OP_ID,	/* Server: Awaiting op ID */
+	AFS_CALL_SV_AWAIT_REQUEST,	/* Server: Awaiting request data */
+	AFS_CALL_SV_REPLYING,		/* Server: Replying */
+	AFS_CALL_SV_AWAIT_ACK,		/* Server: Awaiting final ACK */
+	AFS_CALL_COMPLETE,		/* Completed or failed */
+};
+
 /*
- * definition of how to wait for the completion of an operation
+ * Address preferences.
  */
-struct afs_wait_mode {
-	/* RxRPC received message notification */
-	void (*rx_wakeup)(struct afs_call *call);
+struct afs_addr_preference {
+	union {
+		struct in_addr	ipv4_addr;	/* AF_INET address to compare against */
+		struct in6_addr	ipv6_addr;	/* AF_INET6 address to compare against */
+	};
+	sa_family_t		family;		/* Which address to use */
+	u16			prio;		/* Priority */
+	u8			subnet_mask;	/* How many bits to compare */
+};
 
-	/* synchronous call waiter and call dispatched notification */
-	int (*wait)(struct afs_call *call);
+struct afs_addr_preference_list {
+	struct rcu_head		rcu;
+	u16			version;	/* Incremented when prefs list changes */
+	u8			ipv6_off;	/* Offset of IPv6 addresses */
+	u8			nr;		/* Number of addresses in total */
+	u8			max_prefs;	/* Number of prefs allocated */
+	struct afs_addr_preference prefs[] __counted_by(max_prefs);
+};
 
-	/* asynchronous call completion */
-	void (*async_complete)(void *reply, int error);
+struct afs_address {
+	struct rxrpc_peer	*peer;
+	short			last_error;	/* Last error from this address */
+	u16			prio;		/* Address priority */
 };
 
-extern const struct afs_wait_mode afs_sync_call;
-extern const struct afs_wait_mode afs_async_call;
+/*
+ * List of server addresses.
+ */
+struct afs_addr_list {
+	struct rcu_head		rcu;
+	refcount_t		usage;
+	u32			version;	/* Version */
+	unsigned int		debug_id;
+	unsigned int		addr_pref_version; /* Version of address preference list */
+	unsigned char		max_addrs;
+	unsigned char		nr_addrs;
+	unsigned char		preferred;	/* Preferred address */
+	unsigned char		nr_ipv4;	/* Number of IPv4 addresses */
+	enum dns_record_source	source:8;
+	enum dns_lookup_status	status:8;
+	unsigned long		probe_failed;	/* Mask of addrs that failed locally/ICMP */
+	unsigned long		responded;	/* Mask of addrs that responded */
+	struct afs_address	addrs[] __counted_by(max_addrs);
+#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
+};
 
 /*
  * a record of an in-progress RxRPC call
  */
 struct afs_call {
 	const struct afs_call_type *type;	/* type of call */
-	const struct afs_wait_mode *wait_mode;	/* completion wait mode */
 	wait_queue_head_t	waitq;		/* processes awaiting completion */
-	struct work_struct	async_work;	/* asynchronous work processor */
+	struct work_struct	async_work;	/* async I/O processor */
 	struct work_struct	work;		/* actual work processor */
-	struct sk_buff_head	rx_queue;	/* received packets */
+	struct work_struct	free_work;	/* Deferred free processor */
 	struct rxrpc_call	*rxcall;	/* RxRPC call handle */
+	struct rxrpc_peer	*peer;		/* Remote endpoint */
 	struct key		*key;		/* security for this call */
-	struct afs_server	*server;	/* server affected by incoming CM call */
+	struct afs_net		*net;		/* The network namespace */
+	struct afs_server	*server;	/* The fileserver record if fs op (pins ref) */
+	struct afs_vlserver	*vlserver;	/* The vlserver record if vl op */
 	void			*request;	/* request data (first part) */
-	struct address_space	*mapping;	/* page set */
-	struct afs_writeback	*wb;		/* writeback being performed */
+	size_t			iov_len;	/* Size of *iter to be used */
+	struct iov_iter		def_iter;	/* Default buffer/data iterator */
+	struct iov_iter		*write_iter;	/* Iterator defining write to be made */
+	struct iov_iter		*iter;		/* Iterator currently in use */
+	union {	/* Convenience for ->def_iter */
+		struct kvec	kvec[1];
+		struct bio_vec	bvec[1];
+	};
 	void			*buffer;	/* reply receive buffer */
-	void			*reply;		/* reply buffer (first part) */
-	void			*reply2;	/* reply buffer (second part) */
-	void			*reply3;	/* reply buffer (third part) */
-	void			*reply4;	/* reply buffer (fourth part) */
-	pgoff_t			first;		/* first page in mapping to deal with */
-	pgoff_t			last;		/* last page in mapping to deal with */
-	enum {					/* call state */
-		AFS_CALL_REQUESTING,	/* request is being sent for outgoing call */
-		AFS_CALL_AWAIT_REPLY,	/* awaiting reply to outgoing call */
-		AFS_CALL_AWAIT_OP_ID,	/* awaiting op ID on incoming call */
-		AFS_CALL_AWAIT_REQUEST,	/* awaiting request data on incoming call */
-		AFS_CALL_REPLYING,	/* replying to incoming call */
-		AFS_CALL_AWAIT_ACK,	/* awaiting final ACK of incoming call */
-		AFS_CALL_COMPLETE,	/* successfully completed */
-		AFS_CALL_BUSY,		/* server was busy */
-		AFS_CALL_ABORTED,	/* call was aborted */
-		AFS_CALL_ERROR,		/* call failed due to error */
-	}			state;
+	union {
+		struct afs_endpoint_state *probe;
+		struct afs_addr_list	*vl_probe;
+		struct afs_addr_list	*ret_alist;
+		struct afs_vldb_entry	*ret_vldb;
+		char			*ret_str;
+	};
+	struct afs_fid		fid;		/* Primary vnode ID (or all zeroes) */
+	unsigned char		probe_index;	/* Address in ->probe_alist */
+	struct afs_operation	*op;
+	unsigned int		server_index;
+	refcount_t		ref;
+	enum afs_call_state	state;
+	spinlock_t		state_lock;
 	int			error;		/* error code */
+	u32			abort_code;	/* Remote abort ID or 0 */
+	unsigned long long	remaining;	/* How much is left to receive */
+	unsigned int		max_lifespan;	/* Maximum lifespan in secs to set if not 0 */
 	unsigned		request_size;	/* size of request data */
 	unsigned		reply_max;	/* maximum size of reply */
-	unsigned		reply_size;	/* current size of reply */
-	unsigned		first_offset;	/* offset into mapping[first] */
-	unsigned		last_to;	/* amount of mapping[last] */
-	unsigned		offset;		/* offset into received data store */
+	unsigned		count2;		/* count used in unmarshalling */
 	unsigned char		unmarshall;	/* unmarshalling phase */
-	bool			incoming;	/* T if incoming call */
-	bool			send_pages;	/* T if data from mapping should be sent */
-	u16			service_id;	/* RxRPC service ID to call */
-	__be16			port;		/* target UDP port */
-	__be32			operation_ID;	/* operation ID for an incoming call */
+	bool			drop_ref;	/* T if need to drop ref for incoming call */
+	bool			need_attention;	/* T if RxRPC poked us */
+	bool			async;		/* T if asynchronous */
+	bool			upgrade;	/* T to request service upgrade */
+	bool			intr;		/* T if interruptible */
+	bool			unmarshalling_error; /* T if an unmarshalling error occurred */
+	bool			responded;	/* Got a response from the call (may be abort) */
+	u8			security_ix;	/* Security class */
+	u16			service_id;	/* Actual service ID (after upgrade) */
+	unsigned int		debug_id;	/* Trace ID */
+	u32			enctype;	/* Security encoding type */
+	u32			operation_ID;	/* operation ID for an incoming call */
 	u32			count;		/* count for use in unmarshalling */
-	__be32			tmp;		/* place to extract temporary data */
-	afs_dataversion_t	store_version;	/* updated version expected from store */
+	union {					/* place to extract temporary data */
+		struct {
+			__be32	tmp_u;
+			__be32	tmp;
+		} __attribute__((packed));
+		__be64		tmp64;
+	};
+	ktime_t			issue_time;	/* Time of issue of operation */
 };
 
 struct afs_call_type {
 	const char *name;
+	unsigned int op; /* Really enum afs_fs_operation */
 
 	/* deliver request or reply data to an call
 	 * - returning an error will cause the call to be aborted
 	 */
-	int (*deliver)(struct afs_call *call, struct sk_buff *skb,
-		       bool last);
-
-	/* map an abort code to an error number */
-	int (*abort_to_error)(u32 abort_code);
+	int (*deliver)(struct afs_call *call);
 
 	/* clean up a call */
 	void (*destructor)(struct afs_call *call);
+
+	/* Async receive processing function */
+	void (*async_rx)(struct work_struct *work);
+
+	/* Work function */
+	void (*work)(struct work_struct *work);
+
+	/* Call done function (gets called immediately on success or failure) */
+	void (*done)(struct afs_call *call);
+
+	/* Handle a call being immediately cancelled. */
+	void (*immediate_cancel)(struct afs_call *call);
+};
+
+/*
+ * Key available for writeback on a file.
+ */
+struct afs_wb_key {
+	refcount_t		usage;
+	struct key		*key;
+	struct list_head	vnode_link;	/* Link in vnode->wb_keys */
 };
 
 /*
- * record of an outstanding writeback on a vnode
+ * AFS open file information record.  Pointed to by file->private_data.
  */
-struct afs_writeback {
-	struct list_head	link;		/* link in vnode->writebacks */
-	struct work_struct	writer;		/* work item to perform the writeback */
-	struct afs_vnode	*vnode;		/* vnode to which this write applies */
-	struct key		*key;		/* owner of this write */
-	wait_queue_head_t	waitq;		/* completion and ready wait queue */
-	pgoff_t			first;		/* first page in batch */
-	pgoff_t			point;		/* last page in current store op */
-	pgoff_t			last;		/* last page in batch (inclusive) */
-	unsigned		offset_first;	/* offset into first page of start of write */
-	unsigned		to_last;	/* offset into last page of end of write */
-	int			num_conflicts;	/* count of conflicting writes in list */
-	int			usage;
-	bool			conflicts;	/* T if has dependent conflicts */
-	enum {
-		AFS_WBACK_SYNCING,		/* synchronisation being performed */
-		AFS_WBACK_PENDING,		/* write pending */
-		AFS_WBACK_CONFLICTING,		/* conflicting writes posted */
-		AFS_WBACK_WRITING,		/* writing back */
-		AFS_WBACK_COMPLETE		/* the writeback record has been unlinked */
-	} state __attribute__((packed));
+struct afs_file {
+	struct key		*key;		/* The key this file was opened with */
+	struct afs_wb_key	*wb;		/* Writeback key record for this file */
 };
 
+static inline struct key *afs_file_key(struct file *file)
+{
+	struct afs_file *af = file->private_data;
+
+	return af->key;
+}
+
 /*
  * AFS superblock private data
  * - there's one superblock per volume
  */
 struct afs_super_info {
+	struct net		*net_ns;	/* Network namespace */
+	struct afs_cell		*cell;		/* The cell in which the volume resides */
 	struct afs_volume	*volume;	/* volume record */
-	char			rwparent;	/* T if parent is R/W AFS volume */
+	enum afs_flock_mode	flock_mode:8;	/* File locking emulation mode */
+	bool			dyn_root;	/* True if dynamic root */
 };
 
 static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
@@ -180,302 +262,816 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
 extern struct file_system_type afs_fs_type;
 
 /*
- * entry in the cached cell catalogue
+ * Set of substitutes for @sys.
  */
-struct afs_cache_cell {
-	char		name[AFS_MAXCELLNAME];	/* cell name (padded with NULs) */
-	struct in_addr	vl_servers[15];		/* cached cell VL servers */
+struct afs_sysnames {
+#define AFS_NR_SYSNAME 16
+	char			*subs[AFS_NR_SYSNAME];
+	refcount_t		usage;
+	unsigned short		nr;
+	char			blank[1];
 };
 
 /*
- * AFS cell record
+ * AFS network namespace record.
+ */
+struct afs_net {
+	struct net		*net;		/* Backpointer to the owning net namespace */
+	struct afs_uuid		uuid;
+	bool			live;		/* F if this namespace is being removed */
+
+	/* AF_RXRPC I/O stuff */
+	struct socket		*socket;
+	struct afs_call		*spare_incoming_call;
+	struct work_struct	charge_preallocation_work;
+	struct work_struct	rx_oob_work;
+	struct mutex		socket_mutex;
+	atomic_t		nr_outstanding_calls;
+	atomic_t		nr_superblocks;
+
+	/* Cell database */
+	struct rb_root		cells;
+	struct idr		cells_dyn_ino;	/* cell->dynroot_ino mapping */
+	struct afs_cell __rcu	*ws_cell;
+	atomic_t		cells_outstanding;
+	struct rw_semaphore	cells_lock;
+	struct mutex		cells_alias_lock;
+
+	struct mutex		proc_cells_lock;
+	struct hlist_head	proc_cells;
+
+	/* Known servers.  Theoretically each fileserver can only be in one
+	 * cell, but in practice, people create aliases and subsets and there's
+	 * no easy way to distinguish them.
+	 */
+	seqlock_t		fs_lock;	/* For fs_probe_*, fs_proc */
+	struct list_head	fs_probe_fast;	/* List of afs_server to probe at 30s intervals */
+	struct list_head	fs_probe_slow;	/* List of afs_server to probe at 5m intervals */
+	struct hlist_head	fs_proc;	/* procfs servers list */
+
+	struct key		*fs_cm_token_key; /* Key for creating CM tokens */
+	struct work_struct	fs_prober;
+	struct timer_list	fs_probe_timer;
+	atomic_t		servers_outstanding;
+
+	/* File locking renewal management */
+	struct mutex		lock_manager_mutex;
+
+	/* Misc */
+	struct super_block	*dynroot_sb;	/* Dynamic root mount superblock */
+	struct proc_dir_entry	*proc_afs;	/* /proc/net/afs directory */
+	struct afs_sysnames	*sysnames;
+	rwlock_t		sysnames_lock;
+	struct afs_addr_preference_list __rcu *address_prefs;
+	u16			address_pref_version;
+
+	/* Statistics counters */
+	atomic_t		n_lookup;	/* Number of lookups done */
+	atomic_t		n_reval;	/* Number of dentries needing revalidation */
+	atomic_t		n_inval;	/* Number of invalidations by the server */
+	atomic_t		n_relpg;	/* Number of invalidations by release_folio */
+	atomic_t		n_read_dir;	/* Number of directory pages read */
+	atomic_t		n_dir_cr;	/* Number of directory entry creation edits */
+	atomic_t		n_dir_rm;	/* Number of directory entry removal edits */
+	atomic_t		n_stores;	/* Number of store ops */
+	atomic_long_t		n_store_bytes;	/* Number of bytes stored */
+	atomic_long_t		n_fetch_bytes;	/* Number of bytes fetched */
+	atomic_t		n_fetches;	/* Number of data fetch ops */
+};
+
+extern const char afs_init_sysname[];
+
+enum afs_cell_state {
+	AFS_CELL_SETTING_UP,
+	AFS_CELL_UNLOOKED,
+	AFS_CELL_ACTIVE,
+	AFS_CELL_REMOVING,
+	AFS_CELL_DEAD,
+};
+
+/*
+ * AFS cell record.
+ *
+ * This is a tricky concept to get right as it is possible to create aliases
+ * simply by pointing AFSDB/SRV records for two names at the same set of VL
+ * servers; it is also possible to do things like setting up two sets of VL
+ * servers, one of which provides a superset of the volumes provided by the
+ * other (for internal/external division, for example).
+ *
+ * Cells only exist in the sense that (a) a cell's name maps to a set of VL
+ * servers and (b) a cell's name is used by the client to select the key to use
+ * for authentication and encryption.  The cell name is not typically used in
+ * the protocol.
+ *
+ * Two cells are determined to be aliases if they have an explicit alias (YFS
+ * only), share any VL servers in common or have at least one volume in common.
+ * "In common" means that the address list of the VL servers or the fileservers
+ * share at least one endpoint.
  */
 struct afs_cell {
-	atomic_t		usage;
-	struct list_head	link;		/* main cell list link */
+	union {
+		struct rcu_head	rcu;
+		struct rb_node	net_node;	/* Node in net->cells */
+	};
+	struct afs_net		*net;
+	struct afs_cell		*alias_of;	/* The cell this is an alias of */
+	struct afs_volume	*root_volume;	/* The root.cell volume if there is one */
 	struct key		*anonymous_key;	/* anonymous user key for this cell */
-	struct list_head	proc_link;	/* /proc cell list link */
-	struct proc_dir_entry	*proc_dir;	/* /proc dir for this cell */
-#ifdef CONFIG_AFS_FSCACHE
-	struct fscache_cookie	*cache;		/* caching cookie */
-#endif
+	struct work_struct	destroyer;	/* Destroyer for cell */
+	struct work_struct	manager;	/* Manager for init/deinit/dns */
+	struct timer_list	management_timer; /* General management timer */
+	struct hlist_node	proc_link;	/* /proc cell list link */
+	time64_t		dns_expiry;	/* Time AFSDB/SRV record expires */
+	time64_t		last_inactive;	/* Time of last drop of usage count */
+	refcount_t		ref;		/* Struct refcount */
+	atomic_t		active;		/* Active usage counter */
+	unsigned long		flags;
+#define AFS_CELL_FL_NO_GC	0		/* The cell was added manually, don't auto-gc */
+#define AFS_CELL_FL_DO_LOOKUP	1		/* DNS lookup requested */
+#define AFS_CELL_FL_CHECK_ALIAS	2		/* Need to check for aliases */
+	enum afs_cell_state	state;
+	short			error;
+	enum dns_record_source	dns_source:8;	/* Latest source of data from lookup */
+	enum dns_lookup_status	dns_status:8;	/* Latest status of data from lookup */
+	unsigned int		dns_lookup_count; /* Counter of DNS lookups */
+	unsigned int		debug_id;
+	unsigned int		dynroot_ino;	/* Inode numbers for dynroot (a pair) */
+
+	/* The volumes belonging to this cell */
+	struct rw_semaphore	vs_lock;	/* Lock for server->volumes */
+	struct rb_root		volumes;	/* Tree of volumes on this server */
+	struct hlist_head	proc_volumes;	/* procfs volume list */
+	seqlock_t		volume_lock;	/* For volumes */
+
+	/* Active fileserver interaction state. */
+	struct rb_root		fs_servers;	/* afs_server (by server UUID) */
+	struct rw_semaphore	fs_lock;	/* For fs_servers  */
+
+	/* VL server list. */
+	rwlock_t		vl_servers_lock; /* Lock on vl_servers */
+	struct afs_vlserver_list __rcu *vl_servers;
+
+	u8			name_len;	/* Length of name */
+	char			*name;		/* Cell name, case-flattened and NUL-padded */
+	char			*key_desc;	/* Authentication key description */
+};
 
-	/* server record management */
-	rwlock_t		servers_lock;	/* active server list lock */
-	struct list_head	servers;	/* active server list */
+/*
+ * Volume Location server record.
+ */
+struct afs_vlserver {
+	struct rcu_head		rcu;
+	struct afs_addr_list	__rcu *addresses; /* List of addresses for this VL server */
+	unsigned long		flags;
+#define AFS_VLSERVER_FL_PROBED	0		/* The VL server has been probed */
+#define AFS_VLSERVER_FL_PROBING	1		/* VL server is being probed */
+#define AFS_VLSERVER_FL_IS_YFS	2		/* Server is YFS not AFS */
+#define AFS_VLSERVER_FL_RESPONDING 3		/* VL server is responding */
+	rwlock_t		lock;		/* Lock on addresses */
+	refcount_t		ref;
+	unsigned int		rtt;		/* Server's current RTT in uS */
+	unsigned int		debug_id;
+
+	/* Probe state */
+	wait_queue_head_t	probe_wq;
+	atomic_t		probe_outstanding;
+	spinlock_t		probe_lock;
+	struct {
+		unsigned int	rtt;		/* Best RTT in uS (or UINT_MAX) */
+		u32		abort_code;
+		short		error;
+		unsigned short	flags;
+#define AFS_VLSERVER_PROBE_RESPONDED		0x01 /* At least once response (may be abort) */
+#define AFS_VLSERVER_PROBE_IS_YFS		0x02 /* The peer appears to be YFS */
+#define AFS_VLSERVER_PROBE_NOT_YFS		0x04 /* The peer appears not to be YFS */
+#define AFS_VLSERVER_PROBE_LOCAL_FAILURE	0x08 /* A local failure prevented a probe */
+	} probe;
+
+	u16			service_id;	/* Service ID we're using */
+	u16			port;
+	u16			name_len;	/* Length of name */
+	char			name[];		/* Server name, case-flattened */
+};
 
-	/* volume location record management */
-	struct rw_semaphore	vl_sem;		/* volume management serialisation semaphore */
-	struct list_head	vl_list;	/* cell's active VL record list */
-	spinlock_t		vl_lock;	/* vl_list lock */
-	unsigned short		vl_naddrs;	/* number of VL servers in addr list */
-	unsigned short		vl_curr_svix;	/* current server index */
-	struct in_addr		vl_addrs[AFS_CELL_MAX_ADDRS];	/* cell VL server addresses */
+/*
+ * Weighted list of Volume Location servers.
+ */
+struct afs_vlserver_entry {
+	u16			priority;	/* Preference (as SRV) */
+	u16			weight;		/* Weight (as SRV) */
+	enum dns_record_source	source:8;
+	enum dns_lookup_status	status:8;
+	struct afs_vlserver	*server;
+};
 
-	char			name[0];	/* cell name - must go last */
+struct afs_vlserver_list {
+	struct rcu_head		rcu;
+	refcount_t		ref;
+	u8			nr_servers;
+	u8			index;		/* Server currently in use */
+	u8			preferred;	/* Preferred server */
+	enum dns_record_source	source:8;
+	enum dns_lookup_status	status:8;
+	rwlock_t		lock;
+	struct afs_vlserver_entry servers[];
 };
 
 /*
- * entry in the cached volume location catalogue
+ * Cached VLDB entry.
+ *
+ * This is pointed to by cell->vldb_entries, indexed by name.
  */
-struct afs_cache_vlocation {
-	/* volume name (lowercase, padded with NULs) */
-	uint8_t			name[AFS_MAXVOLNAME + 1];
+struct afs_vldb_entry {
+	afs_volid_t		vid[3];		/* Volume IDs for R/W, R/O and Bak volumes */
 
-	uint8_t			nservers;	/* number of entries used in servers[] */
-	uint8_t			vidmask;	/* voltype mask for vid[] */
-	uint8_t			srvtmask[8];	/* voltype masks for servers[] */
+	unsigned long		flags;
+#define AFS_VLDB_HAS_RW		0		/* - R/W volume exists */
+#define AFS_VLDB_HAS_RO		1		/* - R/O volume exists */
+#define AFS_VLDB_HAS_BAK	2		/* - Backup volume exists */
+#define AFS_VLDB_QUERY_VALID	3		/* - Record is valid */
+#define AFS_VLDB_QUERY_ERROR	4		/* - VL server returned error */
+
+	uuid_t			fs_server[AFS_NMAXNSERVERS];
+	u32			addr_version[AFS_NMAXNSERVERS]; /* Registration change counters */
+	u8			fs_mask[AFS_NMAXNSERVERS];
 #define AFS_VOL_VTM_RW	0x01 /* R/W version of the volume is available (on this server) */
 #define AFS_VOL_VTM_RO	0x02 /* R/O version of the volume is available (on this server) */
 #define AFS_VOL_VTM_BAK	0x04 /* backup version of the volume is available (on this server) */
-
-	afs_volid_t		vid[3];		/* volume IDs for R/W, R/O and Bak volumes */
-	struct in_addr		servers[8];	/* fileserver addresses */
-	time_t			rtime;		/* last retrieval time */
+	u8			vlsf_flags[AFS_NMAXNSERVERS];
+	short			error;
+	u8			nr_servers;	/* Number of server records */
+	u8			name_len;
+	u8			name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
 };
 
 /*
- * volume -> vnode hash table entry
- */
-struct afs_cache_vhash {
-	afs_voltype_t		vtype;		/* which volume variation */
-	uint8_t			hash_bucket;	/* which hash bucket this represents */
-} __attribute__((packed));
-
-/*
- * AFS volume location record
+ * Fileserver endpoint state.  The records the addresses of a fileserver's
+ * endpoints and the state and result of a round of probing on them.  This
+ * allows the rotation algorithm to access those results without them being
+ * erased by a subsequent round of probing.
  */
-struct afs_vlocation {
-	atomic_t		usage;
-	time_t			time_of_death;	/* time at which put reduced usage to 0 */
-	struct list_head	link;		/* link in cell volume location list */
-	struct list_head	grave;		/* link in master graveyard list */
-	struct list_head	update;		/* link in master update list */
-	struct afs_cell		*cell;		/* cell to which volume belongs */
-#ifdef CONFIG_AFS_FSCACHE
-	struct fscache_cookie	*cache;		/* caching cookie */
-#endif
-	struct afs_cache_vlocation vldb;	/* volume information DB record */
-	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
-	wait_queue_head_t	waitq;		/* status change waitqueue */
-	time_t			update_at;	/* time at which record should be updated */
-	spinlock_t		lock;		/* access lock */
-	afs_vlocation_state_t	state;		/* volume location state */
-	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
-	unsigned short		upd_busy_cnt;	/* EBUSY count during update */
-	bool			valid;		/* T if valid */
+struct afs_endpoint_state {
+	struct rcu_head		rcu;
+	struct afs_addr_list	*addresses;	/* The addresses being probed */
+	unsigned long		responsive_set;	/* Bitset of responsive endpoints */
+	unsigned long		failed_set;	/* Bitset of endpoints we failed to probe */
+	refcount_t		ref;
+	unsigned int		server_id;	/* Debug ID of server */
+	unsigned int		probe_seq;	/* Probe sequence (from server::probe_counter) */
+	atomic_t		nr_probing;	/* Number of outstanding probes */
+	unsigned int		rtt;		/* Best RTT in uS (or UINT_MAX) */
+	s32			abort_code;
+	short			error;
+	unsigned long		flags;
+#define AFS_ESTATE_RESPONDED	0		/* Set if the server responded */
+#define AFS_ESTATE_SUPERSEDED	1		/* Set if this record has been superseded */
+#define AFS_ESTATE_IS_YFS	2		/* Set if probe upgraded to YFS */
+#define AFS_ESTATE_NOT_YFS	3		/* Set if probe didn't upgrade to YFS */
+#define AFS_ESTATE_LOCAL_FAILURE 4		/* Set if there was a local failure (eg. ENOMEM) */
 };
 
 /*
- * AFS fileserver record
+ * Record of fileserver with which we're actively communicating.
  */
 struct afs_server {
-	atomic_t		usage;
-	time_t			time_of_death;	/* time at which put reduced usage to 0 */
-	struct in_addr		addr;		/* server address */
-	struct afs_cell		*cell;		/* cell in which server resides */
-	struct list_head	link;		/* link in cell's server list */
-	struct list_head	grave;		/* link in master graveyard list */
-	struct rb_node		master_rb;	/* link in master by-addr tree */
-	struct rw_semaphore	sem;		/* access lock */
+	struct rcu_head		rcu;
+	union {
+		uuid_t		uuid;		/* Server ID */
+		struct afs_uuid	_uuid;
+	};
+
+	struct afs_cell		*cell;		/* Cell to which belongs (pins ref) */
+	struct rb_node		uuid_rb;	/* Link in cell->fs_servers */
+	struct list_head	probe_link;	/* Link in net->fs_probe_* */
+	struct hlist_node	proc_link;	/* Link in net->fs_proc */
+	struct list_head	volumes;	/* RCU list of afs_server_entry objects */
+	struct work_struct	destroyer;	/* Work item to try and destroy a server */
+	struct timer_list	timer;		/* Management timer */
+	struct mutex		cm_token_lock;	/* Lock governing creation of appdata */
+	struct krb5_buffer	cm_rxgk_appdata; /* Appdata to be included in RESPONSE packet */
+	time64_t		unuse_time;	/* Time at which last unused */
+	unsigned long		flags;
+#define AFS_SERVER_FL_RESPONDING 0		/* The server is responding */
+#define AFS_SERVER_FL_UPDATING	1
+#define AFS_SERVER_FL_NEEDS_UPDATE 2		/* Fileserver address list is out of date */
+#define AFS_SERVER_FL_UNCREATED	3		/* The record needs creating */
+#define AFS_SERVER_FL_CREATING	4		/* The record is being created */
+#define AFS_SERVER_FL_EXPIRED	5		/* The record has expired */
+#define AFS_SERVER_FL_NOT_FOUND	6		/* VL server says no such server */
+#define AFS_SERVER_FL_VL_FAIL	7		/* Failed to access VL server */
+#define AFS_SERVER_FL_MAY_HAVE_CB 8		/* May have callbacks on this fileserver */
+#define AFS_SERVER_FL_IS_YFS	16		/* Server is YFS not AFS */
+#define AFS_SERVER_FL_NO_IBULK	17		/* Fileserver doesn't support FS.InlineBulkStatus */
+#define AFS_SERVER_FL_NO_RM2	18		/* Fileserver doesn't support YFS.RemoveFile2 */
+#define AFS_SERVER_FL_HAS_FS64	19		/* Fileserver supports FS.{Fetch,Store}Data64 */
+#define AFS_SERVER_FL_NO_RENAME2 20		/* YFS Fileserver doesn't support enhanced rename */
+	refcount_t		ref;		/* Object refcount */
+	atomic_t		active;		/* Active user count */
+	u32			addr_version;	/* Address list version */
+	u16			service_id;	/* Service ID we're using. */
+	short			create_error;	/* Creation error */
+	unsigned int		rtt;		/* Server's current RTT in uS */
+	unsigned int		debug_id;	/* Debugging ID for traces */
 
 	/* file service access */
-	struct rb_root		fs_vnodes;	/* vnodes backed by this server (ordered by FID) */
-	unsigned long		fs_act_jif;	/* time at which last activity occurred */
-	unsigned long		fs_dead_jif;	/* time at which no longer to be considered dead */
-	spinlock_t		fs_lock;	/* access lock */
-	int			fs_state;      	/* 0 or reason FS currently marked dead (-errno) */
+	rwlock_t		fs_lock;	/* access lock */
+
+	/* Probe state */
+	struct afs_endpoint_state __rcu *endpoint_state; /* Latest endpoint/probe state */
+	unsigned long		probed_at;	/* Time last probe was dispatched (jiffies) */
+	wait_queue_head_t	probe_wq;
+	unsigned int		probe_counter;	/* Number of probes issued */
+	spinlock_t		probe_lock;
+};
 
-	/* callback promise management */
-	struct rb_root		cb_promises;	/* vnode expiration list (ordered earliest first) */
-	struct delayed_work	cb_updater;	/* callback updater */
-	struct delayed_work	cb_break_work;	/* collected break dispatcher */
-	wait_queue_head_t	cb_break_waitq;	/* space available in cb_break waitqueue */
-	spinlock_t		cb_lock;	/* access lock */
-	struct afs_callback	cb_break[64];	/* ring of callbacks awaiting breaking */
-	atomic_t		cb_break_n;	/* number of pending breaks */
-	u8			cb_break_head;	/* head of callback breaking ring */
-	u8			cb_break_tail;	/* tail of callback breaking ring */
+enum afs_ro_replicating {
+	AFS_RO_NOT_REPLICATING,			/* Not doing replication */
+	AFS_RO_REPLICATING_USE_OLD,		/* Replicating; use old version */
+	AFS_RO_REPLICATING_USE_NEW,		/* Replicating; switch to new version */
+} __mode(byte);
+
+/*
+ * Replaceable volume server list.
+ */
+struct afs_server_entry {
+	struct afs_server	*server;
+	struct afs_volume	*volume;
+	struct list_head	slink;		/* Link in server->volumes */
+	time64_t		cb_expires_at;	/* Time at which volume-level callback expires */
+	unsigned long		flags;
+#define AFS_SE_EXCLUDED		0		/* Set if server is to be excluded in rotation */
+#define AFS_SE_VOLUME_OFFLINE	1		/* Set if volume offline notice given */
+#define AFS_SE_VOLUME_BUSY	2		/* Set if volume busy notice given */
+};
+
+struct afs_server_list {
+	struct rcu_head		rcu;
+	refcount_t		usage;
+	bool			attached;	/* T if attached to servers */
+	enum afs_ro_replicating	ro_replicating;	/* RW->RO update (probably) in progress */
+	unsigned char		nr_servers;
+	unsigned short		vnovol_mask;	/* Servers to be skipped due to VNOVOL */
+	unsigned int		seq;		/* Set to ->servers_seq when installed */
+	rwlock_t		lock;
+	struct afs_server_entry	servers[];
 };
 
 /*
- * AFS volume access record
+ * Live AFS volume management.
  */
 struct afs_volume {
-	atomic_t		usage;
-	struct afs_cell		*cell;		/* cell to which belongs (unrefd ptr) */
-	struct afs_vlocation	*vlocation;	/* volume location */
+	struct rcu_head	rcu;
+	afs_volid_t		vid;		/* The volume ID of this volume */
+	afs_volid_t		vids[AFS_MAXTYPES]; /* All associated volume IDs */
+	refcount_t		ref;
+	unsigned int		debug_id;	/* Debugging ID for traces */
+	time64_t		update_at;	/* Time at which to next update */
+	struct afs_cell		*cell;		/* Cell to which belongs (pins ref) */
+	struct rb_node		cell_node;	/* Link in cell->volumes */
+	struct hlist_node	proc_link;	/* Link in cell->proc_volumes */
+	struct super_block __rcu *sb;		/* Superblock on which inodes reside */
+	struct work_struct	destructor;	/* Deferred destructor */
+	unsigned long		flags;
+#define AFS_VOLUME_NEEDS_UPDATE	0	/* - T if an update needs performing */
+#define AFS_VOLUME_UPDATING	1	/* - T if an update is in progress */
+#define AFS_VOLUME_WAIT		2	/* - T if users must wait for update */
+#define AFS_VOLUME_DELETED	3	/* - T if volume appears deleted */
+#define AFS_VOLUME_MAYBE_NO_IBULK 4	/* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_RM_TREE	5	/* - Set if volume removed from cell->volumes */
 #ifdef CONFIG_AFS_FSCACHE
-	struct fscache_cookie	*cache;		/* caching cookie */
+	struct fscache_volume	*cache;		/* Caching cookie */
 #endif
-	afs_volid_t		vid;		/* volume ID */
+	struct afs_server_list __rcu *servers;	/* List of servers on which volume resides */
+	rwlock_t		servers_lock;	/* Lock for ->servers */
+	unsigned int		servers_seq;	/* Incremented each time ->servers changes */
+
+	/* RO release tracking */
+	struct mutex		volsync_lock;	/* Time/state evaluation lock */
+	time64_t		creation_time;	/* Volume creation time (or TIME64_MIN) */
+	time64_t		update_time;	/* Volume update time (or TIME64_MIN) */
+
+	/* Callback management */
+	struct mutex		cb_check_lock;	/* Lock to control race to check after v_break */
+	time64_t		cb_expires_at;	/* Earliest volume callback expiry time */
+	atomic_t		cb_ro_snapshot;	/* RO volume update-from-snapshot counter */
+	atomic_t		cb_v_break;	/* Volume-break event counter. */
+	atomic_t		cb_v_check;	/* Volume-break has-been-checked counter. */
+	atomic_t		cb_scrub;	/* Scrub-all-data event counter. */
+	rwlock_t		cb_v_break_lock;
+	struct rw_semaphore	open_mmaps_lock;
+	struct list_head	open_mmaps;	/* List of vnodes that are mmapped */
+
 	afs_voltype_t		type;		/* type of volume */
 	char			type_force;	/* force volume type (suppress R/O -> R/W) */
-	unsigned short		nservers;	/* number of server slots filled */
-	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
-	struct afs_server	*servers[8];	/* servers on which volume resides (ordered) */
-	struct rw_semaphore	server_sem;	/* lock for accessing current server */
-	struct backing_dev_info	bdi;
+	u8			name_len;
+	u8			name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
 };
 
-/*
- * vnode catalogue entry
- */
-struct afs_cache_vnode {
-	afs_vnodeid_t		vnode_id;	/* vnode ID */
-	unsigned		vnode_unique;	/* vnode ID uniquifier */
-	afs_dataversion_t	data_version;	/* data version */
+enum afs_lock_state {
+	AFS_VNODE_LOCK_NONE,		/* The vnode has no lock on the server */
+	AFS_VNODE_LOCK_WAITING_FOR_CB,	/* We're waiting for the server to break the callback */
+	AFS_VNODE_LOCK_SETTING,		/* We're asking the server for a lock */
+	AFS_VNODE_LOCK_GRANTED,		/* We have a lock on the server */
+	AFS_VNODE_LOCK_EXTENDING,	/* We're extending a lock on the server */
+	AFS_VNODE_LOCK_NEED_UNLOCK,	/* We need to unlock on the server */
+	AFS_VNODE_LOCK_UNLOCKING,	/* We're telling the server to unlock */
+	AFS_VNODE_LOCK_DELETED,		/* The vnode has been deleted whilst we have a lock */
 };
 
 /*
- * AFS inode private data
+ * AFS inode private data.
+ *
+ * Note that afs_alloc_inode() *must* reset anything that could incorrectly
+ * leak from one inode to another.
  */
 struct afs_vnode {
-	struct inode		vfs_inode;	/* the VFS's inode record */
-
+	struct netfs_inode	netfs;		/* Netfslib context and vfs inode */
 	struct afs_volume	*volume;	/* volume on which vnode resides */
-	struct afs_server	*server;	/* server currently supplying this file */
 	struct afs_fid		fid;		/* the file identifier for this inode */
 	struct afs_file_status	status;		/* AFS status info for this file */
-#ifdef CONFIG_AFS_FSCACHE
-	struct fscache_cookie	*cache;		/* caching cookie */
-#endif
-	struct afs_permits	*permits;	/* cache of permits so far obtained */
-	struct mutex		permits_lock;	/* lock for altering permits list */
-	struct mutex		validate_lock;	/* lock for validating this vnode */
-	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
-	int			update_cnt;	/* number of outstanding ops that will update the
-						 * status */
-	spinlock_t		writeback_lock;	/* lock for writebacks */
+	afs_dataversion_t	invalid_before;	/* Child dentries are invalid before this */
+	struct afs_permits __rcu *permit_cache;	/* cache of permits so far obtained */
+	struct list_head	io_lock_waiters; /* Threads waiting for the I/O lock */
+	struct rw_semaphore	validate_lock;	/* lock for validating this vnode */
+	struct rw_semaphore	rmdir_lock;	/* Lock for rmdir vs sillyrename */
+	struct key		*silly_key;	/* Silly rename key */
+	spinlock_t		wb_lock;	/* lock for wb_keys */
 	spinlock_t		lock;		/* waitqueue/flags lock */
 	unsigned long		flags;
-#define AFS_VNODE_CB_BROKEN	0		/* set if vnode's callback was broken */
+#define AFS_VNODE_IO_LOCK	0		/* Set if the I/O serialisation lock is held */
 #define AFS_VNODE_UNSET		1		/* set if vnode attributes not yet set */
-#define AFS_VNODE_MODIFIED	2		/* set if vnode's data modified */
+#define AFS_VNODE_DIR_VALID	2		/* Set if dir contents are valid */
 #define AFS_VNODE_ZAP_DATA	3		/* set if vnode's data should be invalidated */
 #define AFS_VNODE_DELETED	4		/* set if vnode deleted on server */
 #define AFS_VNODE_MOUNTPOINT	5		/* set if vnode is a mountpoint symlink */
-#define AFS_VNODE_LOCKING	6		/* set if waiting for lock on vnode */
-#define AFS_VNODE_READLOCKED	7		/* set if vnode is read-locked on the server */
-#define AFS_VNODE_WRITELOCKED	8		/* set if vnode is write-locked on the server */
-#define AFS_VNODE_UNLOCKING	9		/* set if vnode is being unlocked on the server */
-#define AFS_VNODE_AUTOCELL	10		/* set if Vnode is an auto mount point */
-#define AFS_VNODE_PSEUDODIR	11		/* set if Vnode is a pseudo directory */
-
-	long			acl_order;	/* ACL check count (callback break count) */
-
-	struct list_head	writebacks;	/* alterations in pagecache that need writing */
+#define AFS_VNODE_PSEUDODIR	7 		/* set if Vnode is a pseudo directory */
+#define AFS_VNODE_NEW_CONTENT	8		/* Set if file has new content (create/trunc-0) */
+#define AFS_VNODE_SILLY_DELETED	9		/* Set if file has been silly-deleted */
+#define AFS_VNODE_MODIFYING	10		/* Set if we're performing a modification op */
+#define AFS_VNODE_DIR_READ	11		/* Set if we've read a dir's contents */
+
+	struct folio_queue	*directory;	/* Directory contents */
+	struct list_head	wb_keys;	/* List of keys available for writeback */
 	struct list_head	pending_locks;	/* locks waiting to be granted */
 	struct list_head	granted_locks;	/* locks granted on this file */
 	struct delayed_work	lock_work;	/* work to be done in locking */
-	struct key		*unlock_key;	/* key to be used in unlocking */
+	struct key		*lock_key;	/* Key to be used in lock ops */
+	ktime_t			locked_at;	/* Time at which lock obtained */
+	enum afs_lock_state	lock_state : 8;
+	afs_lock_type_t		lock_type : 8;
+	unsigned int		directory_size;	/* Amount of space in ->directory */
 
 	/* outstanding callback notification on this file */
-	struct rb_node		server_rb;	/* link in server->fs_vnodes */
-	struct rb_node		cb_promise;	/* link in server->cb_promises */
-	struct work_struct	cb_broken_work;	/* work to be done on callback break */
-	time_t			cb_expires;	/* time at which callback expires */
-	time_t			cb_expires_at;	/* time used to order cb_promise */
-	unsigned		cb_version;	/* callback version */
-	unsigned		cb_expiry;	/* callback expiry time */
-	afs_callback_type_t	cb_type;	/* type of callback */
-	bool			cb_promised;	/* true if promise still holds */
+	struct work_struct	cb_work;	/* Work for mmap'd files */
+	struct list_head	cb_mmap_link;	/* Link in cell->fs_open_mmaps */
+	void			*cb_server;	/* Server with callback/filelock */
+	atomic_t		cb_nr_mmap;	/* Number of mmaps */
+	unsigned int		cb_ro_snapshot;	/* RO volume release counter on ->volume */
+	unsigned int		cb_scrub;	/* Scrub counter on ->volume */
+	unsigned int		cb_break;	/* Break counter on vnode */
+	unsigned int		cb_v_check;	/* Break check counter on ->volume */
+	seqlock_t		cb_lock;	/* Lock for ->cb_server, ->status, ->cb_*break */
+
+	atomic64_t		cb_expires_at;	/* time at which callback expires */
+#define AFS_NO_CB_PROMISE TIME64_MIN
 };
 
+static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+	return netfs_i_cookie(&vnode->netfs);
+#else
+	return NULL;
+#endif
+}
+
+static inline void afs_vnode_set_cache(struct afs_vnode *vnode,
+				       struct fscache_cookie *cookie)
+{
+#ifdef CONFIG_AFS_FSCACHE
+	vnode->netfs.cache = cookie;
+	if (cookie)
+		mapping_set_release_always(vnode->netfs.inode.i_mapping);
+#endif
+}
+
 /*
  * cached security record for one user's attempt to access a vnode
  */
 struct afs_permit {
 	struct key		*key;		/* RxRPC ticket holding a security context */
-	afs_access_t		access_mask;	/* access mask for this key */
+	afs_access_t		access;		/* CallerAccess value for this key */
 };
 
 /*
- * cache of security records from attempts to access a vnode
+ * Immutable cache of CallerAccess records from attempts to access vnodes.
+ * These may be shared between multiple vnodes.
  */
 struct afs_permits {
-	struct rcu_head		rcu;		/* disposal procedure */
-	int			count;		/* number of records */
-	struct afs_permit	permits[0];	/* the permits so far examined */
+	struct rcu_head		rcu;
+	struct hlist_node	hash_node;	/* Link in hash */
+	unsigned long		h;		/* Hash value for this permit list */
+	refcount_t		usage;
+	unsigned short		nr_permits;	/* Number of records */
+	bool			invalidated;	/* Invalidated due to key change */
+	struct afs_permit	permits[] __counted_by(nr_permits);	/* List of permits sorted by key pointer */
+};
+
+/*
+ * Error prioritisation and accumulation.
+ */
+struct afs_error {
+	s32	abort_code;		/* Cumulative abort code */
+	short	error;			/* Cumulative error */
+	bool	responded;		/* T if server responded */
+	bool	aborted;		/* T if ->error is from an abort */
 };
 
 /*
- * record of one of a system's set of network interfaces
+ * Cursor for iterating over a set of volume location servers.
  */
-struct afs_interface {
-	struct in_addr	address;	/* IPv4 address bound to interface */
-	struct in_addr	netmask;	/* netmask applied to address */
-	unsigned	mtu;		/* MTU of interface */
+struct afs_vl_cursor {
+	struct afs_cell		*cell;		/* The cell we're querying */
+	struct afs_vlserver_list *server_list;	/* Current server list (pins ref) */
+	struct afs_vlserver	*server;	/* Server on which this resides */
+	struct afs_addr_list	*alist;		/* Current address list (pins ref) */
+	struct key		*key;		/* Key for the server */
+	unsigned long		untried_servers; /* Bitmask of untried servers */
+	unsigned long		addr_tried;	/* Tried addresses */
+	struct afs_error	cumul_error;	/* Cumulative error */
+	unsigned int		debug_id;
+	s32			call_abort_code;
+	short			call_error;	/* Error from single call */
+	short			server_index;	/* Current server */
+	signed char		addr_index;	/* Current address */
+	unsigned short		flags;
+#define AFS_VL_CURSOR_STOP	0x0001		/* Set to cease iteration */
+#define AFS_VL_CURSOR_RETRY	0x0002		/* Set to do a retry */
+#define AFS_VL_CURSOR_RETRIED	0x0004		/* Set if started a retry */
+	short			nr_iterations;	/* Number of server iterations */
+	bool			call_responded;	/* T if the current address responded */
+};
+
+/*
+ * Fileserver state tracking for an operation.  An array of these is kept,
+ * indexed by server index.
+ */
+struct afs_server_state {
+	/* Tracking of fileserver probe state.  Other operations may interfere
+	 * by probing a fileserver when accessing other volumes.
+	 */
+	unsigned int		probe_seq;
+	unsigned long		untried_addrs;	/* Addresses we haven't tried yet */
+	struct wait_queue_entry	probe_waiter;
+	struct afs_endpoint_state *endpoint_state; /* Endpoint state being monitored */
 };
 
 /*
- * UUID definition [internet draft]
- * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
- *   increments since midnight 15th October 1582
- *   - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
- *     time
- * - the clock sequence is a 14-bit counter to avoid duplicate times
+ * Fileserver operation methods.
  */
-struct afs_uuid {
-	u32		time_low;			/* low part of timestamp */
-	u16		time_mid;			/* mid part of timestamp */
-	u16		time_hi_and_version;		/* high part of timestamp and version  */
-#define AFS_UUID_TO_UNIX_TIME	0x01b21dd213814000ULL
-#define AFS_UUID_TIMEHI_MASK	0x0fff
-#define AFS_UUID_VERSION_TIME	0x1000	/* time-based UUID */
-#define AFS_UUID_VERSION_NAME	0x3000	/* name-based UUID */
-#define AFS_UUID_VERSION_RANDOM	0x4000	/* (pseudo-)random generated UUID */
-	u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
-#define AFS_UUID_CLOCKHI_MASK	0x3f
-#define AFS_UUID_VARIANT_STD	0x80
-	u8		clock_seq_low;			/* clock seq low */
-	u8		node[6];			/* spatially unique node ID (MAC addr) */
+struct afs_operation_ops {
+	void (*issue_afs_rpc)(struct afs_operation *op);
+	void (*issue_yfs_rpc)(struct afs_operation *op);
+	void (*success)(struct afs_operation *op);
+	void (*aborted)(struct afs_operation *op);
+	void (*failed)(struct afs_operation *op);
+	void (*edit_dir)(struct afs_operation *op);
+	void (*put)(struct afs_operation *op);
 };
 
+struct afs_vnode_param {
+	struct afs_vnode	*vnode;
+	struct afs_fid		fid;		/* Fid to access */
+	struct afs_status_cb	scb;		/* Returned status and callback promise */
+	afs_dataversion_t	dv_before;	/* Data version before the call */
+	unsigned int		cb_break_before; /* cb_break before the call */
+	u8			dv_delta;	/* Expected change in data version */
+	bool			put_vnode:1;	/* T if we have a ref on the vnode */
+	bool			need_io_lock:1;	/* T if we need the I/O lock on this */
+	bool			update_ctime:1;	/* Need to update the ctime */
+	bool			set_size:1;	/* Must update i_size */
+	bool			op_unlinked:1;	/* True if file was unlinked by op */
+	bool			speculative:1;	/* T if speculative status fetch (no vnode lock) */
+	bool			modification:1;	/* Set if the content gets modified */
+};
+
+/*
+ * Fileserver operation wrapper, handling server and address rotation
+ * asynchronously.  May make simultaneous calls to multiple servers.
+ */
+struct afs_operation {
+	struct afs_net		*net;		/* Network namespace */
+	struct key		*key;		/* Key for the cell */
+	const struct afs_call_type *type;	/* Type of call done */
+	const struct afs_operation_ops *ops;
+
+	/* Parameters/results for the operation */
+	struct afs_volume	*volume;	/* Volume being accessed */
+	struct afs_vnode_param	file[2];
+	struct afs_vnode_param	*more_files;
+	struct afs_volsync	pre_volsync;	/* Volsync before op */
+	struct afs_volsync	volsync;	/* Volsync returned by op */
+	struct dentry		*dentry;	/* Dentry to be altered */
+	struct dentry		*dentry_2;	/* Second dentry to be altered */
+	struct timespec64	mtime;		/* Modification time to record */
+	struct timespec64	ctime;		/* Change time to set */
+	struct afs_error	cumul_error;	/* Cumulative error */
+	short			nr_files;	/* Number of entries in file[], more_files */
+	unsigned int		debug_id;
+
+	unsigned int		cb_v_break;	/* Volume break counter before op */
+
+	union {
+		struct {
+			int	which;		/* Which ->file[] to fetch for */
+		} fetch_status;
+		struct {
+			int	reason;		/* enum afs_edit_dir_reason */
+			mode_t	mode;
+			const char *symlink;
+		} create;
+		struct {
+			bool	need_rehash;
+		} unlink;
+		struct {
+			struct dentry	*rehash;
+			struct dentry	*tmp;
+			unsigned int	rename_flags;
+			bool		new_negative;
+		} rename;
+		struct {
+			struct netfs_io_subrequest *subreq;
+		} fetch;
+		struct {
+			afs_lock_type_t type;
+		} lock;
+		struct {
+			struct iov_iter	*write_iter;
+			loff_t	pos;
+			loff_t	size;
+			loff_t	i_size;
+		} store;
+		struct {
+			struct iattr	*attr;
+			loff_t		old_i_size;
+		} setattr;
+		struct afs_acl	*acl;
+		struct yfs_acl	*yacl;
+		struct {
+			struct afs_volume_status vs;
+			struct kstatfs		*buf;
+		} volstatus;
+	};
+
+	/* Fileserver iteration state */
+	struct afs_server_list	*server_list;	/* Current server list (pins ref) */
+	struct afs_server	*server;	/* Server we're using (ref pinned by server_list) */
+	struct afs_endpoint_state *estate;	/* Current endpoint state (doesn't pin ref) */
+	struct afs_server_state	*server_states;	/* States of the servers involved */
+	struct afs_call		*call;
+	unsigned long		untried_servers; /* Bitmask of untried servers */
+	unsigned long		addr_tried;	/* Tried addresses */
+	s32			call_abort_code; /* Abort code from single call */
+	short			call_error;	/* Error from single call */
+	short			server_index;	/* Current server */
+	short			nr_iterations;	/* Number of server iterations */
+	signed char		addr_index;	/* Current address */
+	bool			call_responded;	/* T if the current address responded */
+
+	unsigned int		flags;
+#define AFS_OPERATION_STOP		0x0001	/* Set to cease iteration */
+#define AFS_OPERATION_VBUSY		0x0002	/* Set if seen VBUSY */
+#define AFS_OPERATION_VMOVED		0x0004	/* Set if seen VMOVED */
+#define AFS_OPERATION_VNOVOL		0x0008	/* Set if seen VNOVOL */
+#define AFS_OPERATION_CUR_ONLY		0x0010	/* Set if current server only (file lock held) */
+#define AFS_OPERATION_NO_VSLEEP		0x0020	/* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+#define AFS_OPERATION_UNINTR		0x0040	/* Set if op is uninterruptible */
+#define AFS_OPERATION_DOWNGRADE		0x0080	/* Set to retry with downgraded opcode */
+#define AFS_OPERATION_LOCK_0		0x0100	/* Set if have io_lock on file[0] */
+#define AFS_OPERATION_LOCK_1		0x0200	/* Set if have io_lock on file[1] */
+#define AFS_OPERATION_TRIED_ALL		0x0400	/* Set if we've tried all the fileservers */
+#define AFS_OPERATION_RETRY_SERVER	0x0800	/* Set if we should retry the current server */
+#define AFS_OPERATION_DIR_CONFLICT	0x1000	/* Set if we detected a 3rd-party dir change */
+#define AFS_OPERATION_ASYNC		0x2000	/* Set if should run asynchronously */
+};
+
+/*
+ * Cache auxiliary data.
+ */
+struct afs_vnode_cache_aux {
+	__be64			data_version;
+} __packed;
+
+static inline void afs_set_cache_aux(struct afs_vnode *vnode,
+				     struct afs_vnode_cache_aux *aux)
+{
+	aux->data_version = cpu_to_be64(vnode->status.data_version);
+}
+
+static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int flags)
+{
+	struct afs_vnode_cache_aux aux;
+
+	afs_set_cache_aux(vnode, &aux);
+	fscache_invalidate(afs_vnode_cache(vnode), &aux,
+			   i_size_read(&vnode->netfs.inode), flags);
+}
+
+/*
+ * Directory iteration management.
+ */
+struct afs_dir_iter {
+	struct afs_vnode	*dvnode;
+	union afs_xdr_dir_block *block;
+	struct folio_queue	*fq;
+	unsigned int		fpos;
+	int			fq_slot;
+	unsigned int		loop_check;
+	u8			nr_slots;
+	u8			bucket;
+	unsigned int		prev_entry;
+};
+
+#include <trace/events/afs.h>
+
 /*****************************************************************************/
 /*
- * cache.c
+ * addr_list.c
  */
-#ifdef CONFIG_AFS_FSCACHE
-extern struct fscache_netfs afs_cache_netfs;
-extern struct fscache_cookie_def afs_cell_cache_index_def;
-extern struct fscache_cookie_def afs_vlocation_cache_index_def;
-extern struct fscache_cookie_def afs_volume_cache_index_def;
-extern struct fscache_cookie_def afs_vnode_cache_index_def;
-#else
-#define afs_cell_cache_index_def	(*(struct fscache_cookie_def *) NULL)
-#define afs_vlocation_cache_index_def	(*(struct fscache_cookie_def *) NULL)
-#define afs_volume_cache_index_def	(*(struct fscache_cookie_def *) NULL)
-#define afs_vnode_cache_index_def	(*(struct fscache_cookie_def *) NULL)
-#endif
+struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason);
+extern struct afs_addr_list *afs_alloc_addrlist(unsigned int nr);
+extern void afs_put_addrlist(struct afs_addr_list *alist, enum afs_alist_trace reason);
+extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
+						      const char *, size_t, char,
+						      unsigned short, unsigned short);
+bool afs_addr_list_same(const struct afs_addr_list *a,
+			const struct afs_addr_list *b);
+extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
+
+extern int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *addr,
+			      __be32 xdr, u16 port);
+extern int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *addr,
+			      __be32 *xdr, u16 port);
+void afs_set_peer_appdata(struct afs_server *server,
+			  struct afs_addr_list *old_alist,
+			  struct afs_addr_list *new_alist);
+
+/*
+ * addr_prefs.c
+ */
+int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size);
+void afs_get_address_preferences_rcu(struct afs_net *net, struct afs_addr_list *alist);
+void afs_get_address_preferences(struct afs_net *net, struct afs_addr_list *alist);
 
 /*
  * callback.c
  */
+extern void afs_invalidate_mmap_work(struct work_struct *);
 extern void afs_init_callback_state(struct afs_server *);
-extern void afs_broken_callback_work(struct work_struct *);
-extern void afs_break_callbacks(struct afs_server *, size_t,
-				struct afs_callback[]);
-extern void afs_discard_callback_on_delete(struct afs_vnode *);
-extern void afs_give_up_callback(struct afs_vnode *);
-extern void afs_dispatch_give_up_callbacks(struct work_struct *);
-extern void afs_flush_callback_breaks(struct afs_server *);
-extern int __init afs_callback_update_init(void);
-extern void afs_callback_update_kill(void);
+extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break *);
+
+static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
+{
+	return vnode->cb_break + vnode->cb_ro_snapshot + vnode->cb_scrub;
+}
+
+static inline bool afs_cb_is_broken(unsigned int cb_break,
+				    const struct afs_vnode *vnode)
+{
+	return cb_break != (vnode->cb_break +
+			    atomic_read(&vnode->volume->cb_ro_snapshot) +
+			    atomic_read(&vnode->volume->cb_scrub));
+}
 
 /*
  * cell.c
  */
-extern struct rw_semaphore afs_proc_cells_sem;
-extern struct list_head afs_proc_cells;
-
-#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
-extern int afs_cell_init(char *);
-extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool);
-extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool);
-extern struct afs_cell *afs_grab_cell(struct afs_cell *);
-extern void afs_put_cell(struct afs_cell *);
-extern void afs_cell_purge(void);
+extern int afs_cell_init(struct afs_net *, const char *);
+extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned,
+				      enum afs_cell_trace);
+enum afs_lookup_cell_for {
+	AFS_LOOKUP_CELL_DYNROOT,
+	AFS_LOOKUP_CELL_MOUNTPOINT,
+	AFS_LOOKUP_CELL_DIRECT_MOUNT,
+	AFS_LOOKUP_CELL_PRELOAD,
+	AFS_LOOKUP_CELL_ROOTCELL,
+	AFS_LOOKUP_CELL_ALIAS_CHECK,
+};
+struct afs_cell *afs_lookup_cell(struct afs_net *net,
+				 const char *name, unsigned int namesz,
+				 const char *vllist,
+				 enum afs_lookup_cell_for reason,
+				 enum afs_cell_trace trace);
+extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace);
+void afs_unuse_cell(struct afs_cell *cell, enum afs_cell_trace reason);
+extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace);
+extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace);
+void afs_set_cell_timer(struct afs_cell *cell, unsigned int delay_secs);
+extern void __net_exit afs_cell_purge(struct afs_net *);
 
 /*
  * cmservice.c
@@ -483,27 +1079,92 @@ extern void afs_cell_purge(void);
 extern bool afs_cm_incoming_call(struct afs_call *);
 
 /*
+ * cm_security.c
+ */
+void afs_process_oob_queue(struct work_struct *work);
+#ifdef CONFIG_RXGK
+int afs_create_token_key(struct afs_net *net, struct socket *socket);
+#else
+static inline int afs_create_token_key(struct afs_net *net, struct socket *socket)
+{
+	return 0;
+}
+#endif
+
+/*
  * dir.c
  */
+extern const struct file_operations afs_dir_file_operations;
 extern const struct inode_operations afs_dir_inode_operations;
+extern const struct address_space_operations afs_dir_aops;
 extern const struct dentry_operations afs_fs_dentry_operations;
-extern const struct file_operations afs_dir_file_operations;
+
+ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file);
+ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
+	__acquires(&dvnode->validate_lock);
+extern void afs_d_release(struct dentry *);
+extern void afs_check_for_remote_deletion(struct afs_operation *);
+int afs_single_writepages(struct address_space *mapping,
+			  struct writeback_control *wbc);
+
+/*
+ * dir_edit.c
+ */
+extern void afs_edit_dir_add(struct afs_vnode *, const struct qstr *, struct afs_fid *,
+			     enum afs_edit_dir_reason);
+extern void afs_edit_dir_remove(struct afs_vnode *, const struct qstr *, enum afs_edit_dir_reason);
+void afs_edit_dir_update(struct afs_vnode *vnode, const struct qstr *name,
+			 struct afs_vnode *new_dvnode, enum afs_edit_dir_reason why);
+void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_vnode);
+
+/*
+ * dir_search.c
+ */
+unsigned int afs_dir_hash_name(const struct qstr *name);
+bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name);
+union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block);
+int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
+			  struct afs_fid *_fid);
+int afs_dir_search(struct afs_vnode *dvnode, const struct qstr *name,
+		   struct afs_fid *_fid, afs_dataversion_t *_dir_version);
+
+/*
+ * dir_silly.c
+ */
+extern int afs_sillyrename(struct afs_vnode *, struct afs_vnode *,
+			   struct dentry *, struct key *);
+extern int afs_silly_iput(struct dentry *, struct inode *);
+
+/*
+ * dynroot.c
+ */
+extern const struct inode_operations afs_dynroot_inode_operations;
+extern const struct dentry_operations afs_dynroot_dentry_operations;
+
+struct inode *afs_dynroot_iget_root(struct super_block *sb);
 
 /*
  * file.c
  */
-extern const struct address_space_operations afs_fs_aops;
+extern const struct address_space_operations afs_file_aops;
 extern const struct inode_operations afs_file_inode_operations;
 extern const struct file_operations afs_file_operations;
+extern const struct afs_operation_ops afs_fetch_data_operation;
+extern const struct netfs_request_ops afs_req_ops;
 
+extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
+extern void afs_put_wb_key(struct afs_wb_key *);
 extern int afs_open(struct inode *, struct file *);
 extern int afs_release(struct inode *, struct file *);
-extern int afs_page_filler(void *, struct page *);
+void afs_fetch_data_async_rx(struct work_struct *work);
+void afs_fetch_data_immediate_cancel(struct afs_call *call);
 
 /*
  * flock.c
  */
-extern void __exit afs_kill_lock_manager(void);
+extern struct workqueue_struct *afs_lock_manager;
+
+extern void afs_lock_op_done(struct afs_call *);
 extern void afs_lock_work(struct work_struct *);
 extern void afs_lock_may_be_available(struct afs_vnode *);
 extern int afs_lock(struct file *, int, struct file_lock *);
@@ -512,65 +1173,92 @@ extern int afs_flock(struct file *, int, struct file_lock *);
 /*
  * fsclient.c
  */
-extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
-				    struct afs_vnode *, struct afs_volsync *,
-				    const struct afs_wait_mode *);
-extern int afs_fs_give_up_callbacks(struct afs_server *,
-				    const struct afs_wait_mode *);
-extern int afs_fs_fetch_data(struct afs_server *, struct key *,
-			     struct afs_vnode *, off_t, size_t, struct page *,
-			     const struct afs_wait_mode *);
-extern int afs_fs_create(struct afs_server *, struct key *,
-			 struct afs_vnode *, const char *, umode_t,
-			 struct afs_fid *, struct afs_file_status *,
-			 struct afs_callback *,
-			 const struct afs_wait_mode *);
-extern int afs_fs_remove(struct afs_server *, struct key *,
-			 struct afs_vnode *, const char *, bool,
-			 const struct afs_wait_mode *);
-extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *,
-		       struct afs_vnode *, const char *,
-		       const struct afs_wait_mode *);
-extern int afs_fs_symlink(struct afs_server *, struct key *,
-			  struct afs_vnode *, const char *, const char *,
-			  struct afs_fid *, struct afs_file_status *,
-			  const struct afs_wait_mode *);
-extern int afs_fs_rename(struct afs_server *, struct key *,
-			 struct afs_vnode *, const char *,
-			 struct afs_vnode *, const char *,
-			 const struct afs_wait_mode *);
-extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *,
-			     pgoff_t, pgoff_t, unsigned, unsigned,
-			     const struct afs_wait_mode *);
-extern int afs_fs_setattr(struct afs_server *, struct key *,
-			  struct afs_vnode *, struct iattr *,
-			  const struct afs_wait_mode *);
-extern int afs_fs_get_volume_status(struct afs_server *, struct key *,
-				    struct afs_vnode *,
-				    struct afs_volume_status *,
-				    const struct afs_wait_mode *);
-extern int afs_fs_set_lock(struct afs_server *, struct key *,
-			   struct afs_vnode *, afs_lock_type_t,
-			   const struct afs_wait_mode *);
-extern int afs_fs_extend_lock(struct afs_server *, struct key *,
-			      struct afs_vnode *,
-			      const struct afs_wait_mode *);
-extern int afs_fs_release_lock(struct afs_server *, struct key *,
-			       struct afs_vnode *,
-			       const struct afs_wait_mode *);
+extern void afs_fs_fetch_status(struct afs_operation *);
+extern void afs_fs_fetch_data(struct afs_operation *);
+extern void afs_fs_create_file(struct afs_operation *);
+extern void afs_fs_make_dir(struct afs_operation *);
+extern void afs_fs_remove_file(struct afs_operation *);
+extern void afs_fs_remove_dir(struct afs_operation *);
+extern void afs_fs_link(struct afs_operation *);
+extern void afs_fs_symlink(struct afs_operation *);
+extern void afs_fs_rename(struct afs_operation *);
+extern void afs_fs_store_data(struct afs_operation *);
+extern void afs_fs_setattr(struct afs_operation *);
+extern void afs_fs_get_volume_status(struct afs_operation *);
+extern void afs_fs_set_lock(struct afs_operation *);
+extern void afs_fs_extend_lock(struct afs_operation *);
+extern void afs_fs_release_lock(struct afs_operation *);
+int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server,
+				 struct afs_address *addr, struct key *key);
+bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
+			     struct afs_endpoint_state *estate, unsigned int addr_index,
+			     struct key *key);
+extern void afs_fs_inline_bulk_status(struct afs_operation *);
+
+struct afs_acl {
+	u32	size;
+	u8	data[] __counted_by(size);
+};
+
+extern void afs_fs_fetch_acl(struct afs_operation *);
+extern void afs_fs_store_acl(struct afs_operation *);
+
+/*
+ * fs_operation.c
+ */
+extern struct afs_operation *afs_alloc_operation(struct key *, struct afs_volume *);
+extern int afs_put_operation(struct afs_operation *);
+extern bool afs_begin_vnode_operation(struct afs_operation *);
+extern void afs_end_vnode_operation(struct afs_operation *op);
+extern void afs_wait_for_operation(struct afs_operation *);
+extern int afs_do_sync_operation(struct afs_operation *);
+
+static inline void afs_op_set_vnode(struct afs_operation *op, unsigned int n,
+				    struct afs_vnode *vnode)
+{
+	op->file[n].vnode = vnode;
+	op->file[n].need_io_lock = true;
+}
+
+static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n,
+				  const struct afs_fid *fid)
+{
+	op->file[n].fid = *fid;
+}
+
+/*
+ * fs_probe.c
+ */
+struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *estate,
+						  enum afs_estate_trace where);
+void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where);
+extern void afs_fileserver_probe_result(struct afs_call *);
+int afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
+			    struct afs_addr_list *new_alist, struct key *key);
+int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr);
+extern void afs_probe_fileserver(struct afs_net *, struct afs_server *);
+extern void afs_fs_probe_dispatcher(struct work_struct *);
+int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate,
+			      unsigned long exclude, bool is_intr);
+extern void afs_fs_probe_cleanup(struct afs_net *);
 
 /*
  * inode.c
  */
-extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
-				       struct key *);
-extern struct inode *afs_iget(struct super_block *, struct key *,
-			      struct afs_fid *, struct afs_file_status *,
-			      struct afs_callback *);
-extern void afs_zap_data(struct afs_vnode *);
-extern int afs_validate(struct afs_vnode *, struct key *);
-extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern int afs_setattr(struct dentry *, struct iattr *);
+extern const struct afs_operation_ops afs_fetch_status_operation;
+
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+			 struct delayed_call *callback);
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
+extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
+extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
+extern int afs_ilookup5_test_by_fid(struct inode *, void *);
+extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *);
+extern struct inode *afs_root_iget(struct super_block *, struct key *);
+extern int afs_getattr(struct mnt_idmap *idmap, const struct path *,
+		       struct kstat *, u32, unsigned int);
+extern int afs_setattr(struct mnt_idmap *idmap, struct dentry *, struct iattr *);
 extern void afs_evict_inode(struct inode *);
 extern int afs_drop_inode(struct inode *);
 
@@ -578,12 +1266,75 @@ extern int afs_drop_inode(struct inode *);
  * main.c
  */
 extern struct workqueue_struct *afs_wq;
-extern struct afs_uuid afs_uuid;
+extern int afs_net_id;
+
+static inline struct afs_net *afs_net(struct net *net)
+{
+	return net_generic(net, afs_net_id);
+}
+
+static inline struct afs_net *afs_sb2net(struct super_block *sb)
+{
+	return afs_net(AFS_FS_S(sb)->net_ns);
+}
+
+static inline struct afs_net *afs_d2net(struct dentry *dentry)
+{
+	return afs_sb2net(dentry->d_sb);
+}
+
+static inline struct afs_net *afs_i2net(struct inode *inode)
+{
+	return afs_sb2net(inode->i_sb);
+}
+
+static inline struct afs_net *afs_v2net(struct afs_vnode *vnode)
+{
+	return afs_i2net(&vnode->netfs.inode);
+}
+
+static inline struct afs_net *afs_sock2net(struct sock *sk)
+{
+	return net_generic(sock_net(sk), afs_net_id);
+}
+
+static inline void __afs_stat(atomic_t *s)
+{
+	atomic_inc(s);
+}
+
+#define afs_stat_v(vnode, n) __afs_stat(&afs_v2net(vnode)->n)
 
 /*
  * misc.c
  */
 extern int afs_abort_to_error(u32);
+extern void afs_prioritise_error(struct afs_error *, int, u32);
+
+static inline void afs_op_nomem(struct afs_operation *op)
+{
+	op->cumul_error.error = -ENOMEM;
+}
+
+static inline int afs_op_error(const struct afs_operation *op)
+{
+	return op->cumul_error.error;
+}
+
+static inline s32 afs_op_abort_code(const struct afs_operation *op)
+{
+	return op->cumul_error.abort_code;
+}
+
+static inline int afs_op_set_error(struct afs_operation *op, int error)
+{
+	return op->cumul_error.error = error;
+}
+
+static inline void afs_op_accumulate_error(struct afs_operation *op, int error, s32 abort_code)
+{
+	afs_prioritise_error(&op->cumul_error, error, abort_code);
+}
 
 /*
  * mntpt.c
@@ -593,165 +1344,486 @@ extern const struct inode_operations afs_autocell_inode_operations;
 extern const struct file_operations afs_mntpt_file_operations;
 
 extern struct vfsmount *afs_d_automount(struct path *);
-extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
 extern void afs_mntpt_kill_timer(void);
 
 /*
  * proc.c
  */
-extern int afs_proc_init(void);
-extern void afs_proc_cleanup(void);
+#ifdef CONFIG_PROC_FS
+extern int __net_init afs_proc_init(struct afs_net *);
+extern void __net_exit afs_proc_cleanup(struct afs_net *);
 extern int afs_proc_cell_setup(struct afs_cell *);
 extern void afs_proc_cell_remove(struct afs_cell *);
+extern void afs_put_sysnames(struct afs_sysnames *);
+#else
+static inline int afs_proc_init(struct afs_net *net) { return 0; }
+static inline void afs_proc_cleanup(struct afs_net *net) {}
+static inline int afs_proc_cell_setup(struct afs_cell *cell) { return 0; }
+static inline void afs_proc_cell_remove(struct afs_cell *cell) {}
+static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {}
+#endif
+
+/*
+ * rotate.c
+ */
+void afs_clear_server_states(struct afs_operation *op);
+extern bool afs_select_fileserver(struct afs_operation *);
+extern void afs_dump_edestaddrreq(const struct afs_operation *);
 
 /*
  * rxrpc.c
  */
-extern int afs_open_socket(void);
-extern void afs_close_socket(void);
-extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
-			 const struct afs_wait_mode *);
-extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
+extern struct workqueue_struct *afs_async_calls;
+
+extern int __net_init afs_open_socket(struct afs_net *);
+extern void __net_exit afs_close_socket(struct afs_net *);
+extern void afs_charge_preallocation(struct work_struct *);
+extern void afs_put_call(struct afs_call *);
+void afs_deferred_put_call(struct afs_call *call);
+void afs_make_call(struct afs_call *call, gfp_t gfp);
+void afs_deliver_to_call(struct afs_call *call);
+void afs_wait_for_call_to_complete(struct afs_call *call);
+extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
+					    const struct afs_call_type *,
 					    size_t, size_t);
 extern void afs_flat_call_destructor(struct afs_call *);
-extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
-			    size_t);
+extern int afs_extract_data(struct afs_call *, bool);
+extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause);
+
+static inline struct afs_call *afs_get_call(struct afs_call *call,
+					    enum afs_call_trace why)
+{
+	int r;
+
+	__refcount_inc(&call->ref, &r);
+
+	trace_afs_call(call->debug_id, why, r + 1,
+		       atomic_read(&call->net->nr_outstanding_calls),
+		       __builtin_return_address(0));
+	return call;
+}
+
+static inline void afs_see_call(struct afs_call *call, enum afs_call_trace why)
+{
+	int r = refcount_read(&call->ref);
+
+	trace_afs_call(call->debug_id, why, r,
+		       atomic_read(&call->net->nr_outstanding_calls),
+		       __builtin_return_address(0));
+}
+
+static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call,
+				    gfp_t gfp)
+{
+	struct afs_addr_list *alist = op->estate->addresses;
+
+	op->call	= call;
+	op->type	= call->type;
+	call->op	= op;
+	call->key	= op->key;
+	call->intr	= !(op->flags & AFS_OPERATION_UNINTR);
+	call->peer	= rxrpc_kernel_get_peer(alist->addrs[op->addr_index].peer);
+	call->service_id = op->server->service_id;
+	afs_make_call(call, gfp);
+}
+
+static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
+{
+	call->iov_len = size;
+	call->kvec[0].iov_base = buf;
+	call->kvec[0].iov_len = size;
+	iov_iter_kvec(&call->def_iter, ITER_DEST, call->kvec, 1, size);
+}
+
+static inline void afs_extract_to_tmp(struct afs_call *call)
+{
+	call->iov_len = sizeof(call->tmp);
+	afs_extract_begin(call, &call->tmp, sizeof(call->tmp));
+}
+
+static inline void afs_extract_to_tmp64(struct afs_call *call)
+{
+	call->iov_len = sizeof(call->tmp64);
+	afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64));
+}
+
+static inline void afs_extract_discard(struct afs_call *call, size_t size)
+{
+	call->iov_len = size;
+	iov_iter_discard(&call->def_iter, ITER_DEST, size);
+}
+
+static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
+{
+	call->iov_len = size;
+	afs_extract_begin(call, call->buffer, size);
+}
+
+static inline int afs_transfer_reply(struct afs_call *call)
+{
+	return afs_extract_data(call, false);
+}
+
+static inline bool afs_check_call_state(struct afs_call *call,
+					enum afs_call_state state)
+{
+	return READ_ONCE(call->state) == state;
+}
+
+static inline bool afs_set_call_state(struct afs_call *call,
+				      enum afs_call_state from,
+				      enum afs_call_state to)
+{
+	bool ok = false;
+
+	spin_lock_bh(&call->state_lock);
+	if (call->state == from) {
+		call->state = to;
+		trace_afs_call_state(call, from, to, 0, 0);
+		ok = true;
+	}
+	spin_unlock_bh(&call->state_lock);
+	return ok;
+}
+
+static inline void afs_set_call_complete(struct afs_call *call,
+					 int error, u32 remote_abort)
+{
+	enum afs_call_state state;
+	bool ok = false;
+
+	spin_lock_bh(&call->state_lock);
+	state = call->state;
+	if (state != AFS_CALL_COMPLETE) {
+		call->abort_code = remote_abort;
+		call->error = error;
+		call->state = AFS_CALL_COMPLETE;
+		trace_afs_call_state(call, state, AFS_CALL_COMPLETE,
+				     error, remote_abort);
+		ok = true;
+	}
+	spin_unlock_bh(&call->state_lock);
+	if (ok) {
+		trace_afs_call_done(call);
+
+		/* Asynchronous calls have two refs to release - one from the alloc and
+		 * one queued with the work item - and we can't just deallocate the
+		 * call because the work item may be queued again.
+		 */
+		if (call->drop_ref)
+			afs_put_call(call);
+	}
+}
 
 /*
  * security.c
  */
+extern void afs_put_permits(struct afs_permits *);
 extern void afs_clear_permits(struct afs_vnode *);
-extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
-extern void afs_zap_permits(struct rcu_head *);
+extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int,
+			     struct afs_status_cb *);
 extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int);
+extern struct key *afs_request_key_rcu(struct afs_cell *);
+extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
+extern int afs_permission(struct mnt_idmap *, struct inode *, int);
+extern void __exit afs_clean_up_permit_cache(void);
 
 /*
  * server.c
  */
 extern spinlock_t afs_server_peer_lock;
 
-#define afs_get_server(S)					\
-do {								\
-	_debug("GET SERVER %d", atomic_read(&(S)->usage));	\
-	atomic_inc(&(S)->usage);				\
-} while(0)
+struct afs_server *afs_find_server(const struct rxrpc_peer *peer);
+extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32);
+extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace);
+struct afs_server *afs_use_server(struct afs_server *server, bool activate,
+				  enum afs_server_trace reason);
+void afs_unuse_server(struct afs_net *net, struct afs_server *server,
+		      enum afs_server_trace reason);
+void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
+			     enum afs_server_trace reason);
+extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
+void afs_purge_servers(struct afs_cell *cell);
+extern void afs_fs_probe_timer(struct timer_list *);
+void __net_exit afs_wait_for_servers(struct afs_net *net);
+bool afs_check_server_record(struct afs_operation *op, struct afs_server *server, struct key *key);
+
+static inline void afs_see_server(struct afs_server *server, enum afs_server_trace trace)
+{
+	int r = refcount_read(&server->ref);
+	int a = atomic_read(&server->active);
+
+	trace_afs_server(server->debug_id, r, a, trace);
 
-extern struct afs_server *afs_lookup_server(struct afs_cell *,
-					    const struct in_addr *);
-extern struct afs_server *afs_find_server(const struct in_addr *);
-extern void afs_put_server(struct afs_server *);
-extern void __exit afs_purge_servers(void);
+}
+
+static inline void afs_inc_servers_outstanding(struct afs_net *net)
+{
+	atomic_inc(&net->servers_outstanding);
+}
+
+static inline void afs_dec_servers_outstanding(struct afs_net *net)
+{
+	if (atomic_dec_and_test(&net->servers_outstanding))
+		wake_up_var(&net->servers_outstanding);
+}
+
+static inline bool afs_is_probing_server(struct afs_server *server)
+{
+	return list_empty(&server->probe_link);
+}
+
+/*
+ * server_list.c
+ */
+static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list *slist)
+{
+	refcount_inc(&slist->usage);
+	return slist;
+}
+
+extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *);
+struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
+					      struct key *key,
+					      struct afs_vldb_entry *vldb);
+extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *);
+void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist);
+void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist,
+				    struct afs_server_list *old);
+void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist);
 
 /*
  * super.c
  */
-extern int afs_fs_init(void);
+extern int __init afs_fs_init(void);
 extern void afs_fs_exit(void);
 
 /*
- * use-rtnetlink.c
+ * validation.c
  */
-extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool);
-extern int afs_get_MAC_address(u8 *, size_t);
+bool afs_check_validity(const struct afs_vnode *vnode);
+int afs_update_volume_state(struct afs_operation *op);
+int afs_validate(struct afs_vnode *vnode, struct key *key);
 
 /*
  * vlclient.c
  */
-extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *,
-				    const char *, struct afs_cache_vlocation *,
-				    const struct afs_wait_mode *);
-extern int afs_vl_get_entry_by_id(struct in_addr *, struct key *,
-				  afs_volid_t, afs_voltype_t,
-				  struct afs_cache_vlocation *,
-				  const struct afs_wait_mode *);
+extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
+							 const char *, int);
+extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
+struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
+					 struct afs_addr_list *alist,
+					 unsigned int addr_index,
+					 struct key *key,
+					 struct afs_vlserver *server,
+					 unsigned int server_index);
+extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
+extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *);
 
 /*
- * vlocation.c
+ * vl_alias.c
  */
-#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
+extern int afs_cell_detect_alias(struct afs_cell *, struct key *);
 
-extern int __init afs_vlocation_update_init(void);
-extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *,
-						  struct key *,
-						  const char *, size_t);
-extern void afs_put_vlocation(struct afs_vlocation *);
-extern void afs_vlocation_purge(void);
+/*
+ * vl_probe.c
+ */
+extern void afs_vlserver_probe_result(struct afs_call *);
+extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *);
+extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long);
 
 /*
- * vnode.c
+ * vl_rotate.c
  */
-static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
+extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
+					 struct afs_cell *, struct key *);
+extern bool afs_select_vlserver(struct afs_vl_cursor *);
+extern bool afs_select_current_vlserver(struct afs_vl_cursor *);
+extern int afs_end_vlserver_operation(struct afs_vl_cursor *);
+
+/*
+ * vlserver_list.c
+ */
+static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver)
 {
-	return container_of(inode, struct afs_vnode, vfs_inode);
+	refcount_inc(&vlserver->ref);
+	return vlserver;
 }
 
-static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
+static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist)
 {
-	return &vnode->vfs_inode;
-}
-
-extern void afs_vnode_finalise_status_update(struct afs_vnode *,
-					     struct afs_server *);
-extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
-				  struct key *);
-extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
-				off_t, size_t, struct page *);
-extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
-			    umode_t, struct afs_fid *, struct afs_file_status *,
-			    struct afs_callback *, struct afs_server **);
-extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *,
-			    bool);
-extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *,
-			  const char *);
-extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
-			     const char *, struct afs_fid *,
-			     struct afs_file_status *, struct afs_server **);
-extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
-			    struct key *, const char *, const char *);
-extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
-				unsigned, unsigned);
-extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
-extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *,
-				       struct afs_volume_status *);
-extern int afs_vnode_set_lock(struct afs_vnode *, struct key *,
-			      afs_lock_type_t);
-extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *);
-extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
+	if (vllist)
+		refcount_inc(&vllist->ref);
+	return vllist;
+}
+
+extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short);
+extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *);
+extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int);
+extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *);
+extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
+							   const void *, size_t);
 
 /*
  * volume.c
  */
-#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
-
-extern void afs_put_volume(struct afs_volume *);
-extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
-extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
-extern int afs_volume_release_fileserver(struct afs_vnode *,
-					 struct afs_server *, int);
+extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
+extern int afs_activate_volume(struct afs_volume *);
+extern void afs_deactivate_volume(struct afs_volume *);
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason);
+extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace);
+void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason);
+extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
 
 /*
  * write.c
  */
-extern int afs_set_page_dirty(struct page *);
-extern void afs_put_writeback(struct afs_writeback *);
-extern int afs_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata);
-extern int afs_write_end(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata);
-extern int afs_writepage(struct page *, struct writeback_control *);
+void afs_prepare_write(struct netfs_io_subrequest *subreq);
+void afs_issue_write(struct netfs_io_subrequest *subreq);
+void afs_begin_writeback(struct netfs_io_request *wreq);
+void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream);
 extern int afs_writepages(struct address_space *, struct writeback_control *);
-extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
-extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
-			      unsigned long, loff_t);
-extern int afs_writeback_all(struct afs_vnode *);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
+extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
+extern void afs_prune_wb_keys(struct afs_vnode *);
+
+/*
+ * xattr.c
+ */
+extern const struct xattr_handler * const afs_xattr_handlers[];
+
+/*
+ * yfsclient.c
+ */
+extern void yfs_fs_fetch_data(struct afs_operation *);
+extern void yfs_fs_create_file(struct afs_operation *);
+extern void yfs_fs_make_dir(struct afs_operation *);
+extern void yfs_fs_remove_file2(struct afs_operation *);
+extern void yfs_fs_remove_file(struct afs_operation *);
+extern void yfs_fs_remove_dir(struct afs_operation *);
+extern void yfs_fs_link(struct afs_operation *);
+extern void yfs_fs_symlink(struct afs_operation *);
+extern void yfs_fs_rename(struct afs_operation *);
+void yfs_fs_rename_replace(struct afs_operation *op);
+void yfs_fs_rename_noreplace(struct afs_operation *op);
+void yfs_fs_rename_exchange(struct afs_operation *op);
+extern void yfs_fs_store_data(struct afs_operation *);
+extern void yfs_fs_setattr(struct afs_operation *);
+extern void yfs_fs_get_volume_status(struct afs_operation *);
+extern void yfs_fs_set_lock(struct afs_operation *);
+extern void yfs_fs_extend_lock(struct afs_operation *);
+extern void yfs_fs_release_lock(struct afs_operation *);
+extern void yfs_fs_fetch_status(struct afs_operation *);
+extern void yfs_fs_inline_bulk_status(struct afs_operation *);
+
+struct yfs_acl {
+	struct afs_acl	*acl;		/* Dir/file/symlink ACL */
+	struct afs_acl	*vol_acl;	/* Whole volume ACL */
+	u32		inherit_flag;	/* True if ACL is inherited from parent dir */
+	u32		num_cleaned;	/* Number of ACEs removed due to subject removal */
+	unsigned int	flags;
+#define YFS_ACL_WANT_ACL	0x01	/* Set if caller wants ->acl */
+#define YFS_ACL_WANT_VOL_ACL	0x02	/* Set if caller wants ->vol_acl */
+};
+
+extern void yfs_free_opaque_acl(struct yfs_acl *);
+extern void yfs_fs_fetch_opaque_acl(struct afs_operation *);
+extern void yfs_fs_store_opaque_acl2(struct afs_operation *);
+
+/*
+ * Miscellaneous inline functions.
+ */
+static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
+{
+	return container_of(inode, struct afs_vnode, netfs.inode);
+}
+
+static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
+{
+	return &vnode->netfs.inode;
+}
+
+/*
+ * Note that a dentry got changed.  We need to set d_fsdata to the data version
+ * number derived from the result of the operation.  It doesn't matter if
+ * d_fsdata goes backwards as we'll just revalidate.
+ */
+static inline void afs_update_dentry_version(struct afs_operation *op,
+					     struct afs_vnode_param *dir_vp,
+					     struct dentry *dentry)
+{
+	if (!op->cumul_error.error)
+		dentry->d_fsdata =
+			(void *)(unsigned long)dir_vp->scb.status.data_version;
+}
+
+/*
+ * Set the file size and block count.  Estimate the number of 512 bytes blocks
+ * used, rounded up to nearest 1K for consistency with other AFS clients.
+ */
+static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size)
+{
+	i_size_write(&vnode->netfs.inode, size);
+	vnode->netfs.inode.i_blocks = ((size + 1023) >> 10) << 1;
+}
+
+/*
+ * Check for a conflicting operation on a directory that we just unlinked from.
+ * If someone managed to sneak a link or an unlink in on the file we just
+ * unlinked, we won't be able to trust nlink on an AFS file (but not YFS).
+ */
+static inline void afs_check_dir_conflict(struct afs_operation *op,
+					  struct afs_vnode_param *dvp)
+{
+	if (dvp->dv_before + dvp->dv_delta != dvp->scb.status.data_version)
+		op->flags |= AFS_OPERATION_DIR_CONFLICT;
+}
 
+static inline int afs_io_error(struct afs_call *call, enum afs_io_error where)
+{
+	trace_afs_io_error(call->debug_id, -EIO, where);
+	return -EIO;
+}
+
+static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
+{
+	trace_afs_file_error(vnode, -EIO, where);
+	return -EIO;
+}
+
+/*
+ * Set the callback promise on a vnode.
+ */
+static inline void afs_set_cb_promise(struct afs_vnode *vnode, time64_t expires_at,
+				      enum afs_cb_promise_trace trace)
+{
+	atomic64_set(&vnode->cb_expires_at, expires_at);
+	trace_afs_cb_promise(vnode, trace);
+}
+
+/*
+ * Clear the callback promise on a vnode, returning true if it was promised.
+ */
+static inline bool afs_clear_cb_promise(struct afs_vnode *vnode,
+					enum afs_cb_promise_trace trace)
+{
+	trace_afs_cb_promise(vnode, trace);
+	return atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE;
+}
+
+/*
+ * Mark a directory as being invalid.
+ */
+static inline void afs_invalidate_dir(struct afs_vnode *dvnode,
+				      enum afs_dir_invalid_trace trace)
+{
+	if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+		trace_afs_dir_invalid(dvnode, trace);
+		afs_stat_v(dvnode, n_inval);
+	}
+}
 
 /*****************************************************************************/
 /*
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 42dd2e499ed8..e6bb8237db98 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS client file system
  *
  * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/module.h>
@@ -14,6 +10,9 @@
 #include <linux/init.h>
 #include <linux/completion.h>
 #include <linux/sched.h>
+#include <linux/random.h>
+#include <linux/proc_fs.h>
+#define CREATE_TRACE_POINTS
 #include "internal.h"
 
 MODULE_DESCRIPTION("AFS Client File System");
@@ -29,126 +28,185 @@ static char *rootcell;
 module_param(rootcell, charp, 0);
 MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
 
-struct afs_uuid afs_uuid;
 struct workqueue_struct *afs_wq;
+static struct proc_dir_entry *afs_proc_symlink;
+
+#if defined(CONFIG_ALPHA)
+const char afs_init_sysname[] = "alpha_linux26";
+#elif defined(CONFIG_X86_64)
+const char afs_init_sysname[] = "amd64_linux26";
+#elif defined(CONFIG_ARM)
+const char afs_init_sysname[] = "arm_linux26";
+#elif defined(CONFIG_ARM64)
+const char afs_init_sysname[] = "aarch64_linux26";
+#elif defined(CONFIG_X86_32)
+const char afs_init_sysname[] = "i386_linux26";
+#elif defined(CONFIG_PPC64)
+const char afs_init_sysname[] = "ppc64_linux26";
+#elif defined(CONFIG_PPC32)
+const char afs_init_sysname[] = "ppc_linux26";
+#elif defined(CONFIG_S390)
+#ifdef CONFIG_64BIT
+const char afs_init_sysname[] = "s390x_linux26";
+#else
+const char afs_init_sysname[] = "s390_linux26";
+#endif
+#elif defined(CONFIG_SPARC64)
+const char afs_init_sysname[] = "sparc64_linux26";
+#elif defined(CONFIG_SPARC32)
+const char afs_init_sysname[] = "sparc_linux26";
+#else
+const char afs_init_sysname[] = "unknown_linux26";
+#endif
 
 /*
- * get a client UUID
+ * Initialise an AFS network namespace record.
  */
-static int __init afs_get_client_UUID(void)
+static int __net_init afs_net_init(struct net *net_ns)
 {
-	struct timespec ts;
-	u64 uuidtime;
-	u16 clockseq;
+	struct afs_sysnames *sysnames;
+	struct afs_net *net = afs_net(net_ns);
 	int ret;
 
-	/* read the MAC address of one of the external interfaces and construct
-	 * a UUID from it */
-	ret = afs_get_MAC_address(afs_uuid.node, sizeof(afs_uuid.node));
-	if (ret < 0)
-		return ret;
-
-	getnstimeofday(&ts);
-	uuidtime = (u64) ts.tv_sec * 1000 * 1000 * 10;
-	uuidtime += ts.tv_nsec / 100;
-	uuidtime += AFS_UUID_TO_UNIX_TIME;
-	afs_uuid.time_low = uuidtime;
-	afs_uuid.time_mid = uuidtime >> 32;
-	afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK;
-	afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME;
-
-	get_random_bytes(&clockseq, 2);
-	afs_uuid.clock_seq_low = clockseq;
-	afs_uuid.clock_seq_hi_and_reserved =
-		(clockseq >> 8) & AFS_UUID_CLOCKHI_MASK;
-	afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD;
-
-	_debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
-	       afs_uuid.time_low,
-	       afs_uuid.time_mid,
-	       afs_uuid.time_hi_and_version,
-	       afs_uuid.clock_seq_hi_and_reserved,
-	       afs_uuid.clock_seq_low,
-	       afs_uuid.node[0], afs_uuid.node[1], afs_uuid.node[2],
-	       afs_uuid.node[3], afs_uuid.node[4], afs_uuid.node[5]);
+	net->net = net_ns;
+	net->live = true;
+	generate_random_uuid((unsigned char *)&net->uuid);
 
-	return 0;
-}
+	INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation);
+	INIT_WORK(&net->rx_oob_work, afs_process_oob_queue);
+	mutex_init(&net->socket_mutex);
 
-/*
- * initialise the AFS client FS module
- */
-static int __init afs_init(void)
-{
-	int ret;
+	net->cells = RB_ROOT;
+	idr_init(&net->cells_dyn_ino);
+	init_rwsem(&net->cells_lock);
+	mutex_init(&net->cells_alias_lock);
+	mutex_init(&net->proc_cells_lock);
+	INIT_HLIST_HEAD(&net->proc_cells);
 
-	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
+	seqlock_init(&net->fs_lock);
+	INIT_LIST_HEAD(&net->fs_probe_fast);
+	INIT_LIST_HEAD(&net->fs_probe_slow);
+	INIT_HLIST_HEAD(&net->fs_proc);
 
-	ret = afs_get_client_UUID();
-	if (ret < 0)
-		return ret;
+	INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher);
+	timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0);
+	atomic_set(&net->servers_outstanding, 1);
 
-	/* create workqueue */
 	ret = -ENOMEM;
-	afs_wq = alloc_workqueue("afs", 0, 0);
-	if (!afs_wq)
-		return ret;
-
-	/* register the /proc stuff */
-	ret = afs_proc_init();
+	sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL);
+	if (!sysnames)
+		goto error_sysnames;
+	sysnames->subs[0] = (char *)&afs_init_sysname;
+	sysnames->nr = 1;
+	refcount_set(&sysnames->usage, 1);
+	net->sysnames = sysnames;
+	rwlock_init(&net->sysnames_lock);
+
+	/* Register the /proc stuff */
+	ret = afs_proc_init(net);
 	if (ret < 0)
 		goto error_proc;
 
-#ifdef CONFIG_AFS_FSCACHE
-	/* we want to be able to cache */
-	ret = fscache_register_netfs(&afs_cache_netfs);
-	if (ret < 0)
-		goto error_cache;
-#endif
-
-	/* initialise the cell DB */
-	ret = afs_cell_init(rootcell);
+	/* Initialise the cell DB */
+	ret = afs_cell_init(net, rootcell);
 	if (ret < 0)
 		goto error_cell_init;
 
-	/* initialise the VL update process */
-	ret = afs_vlocation_update_init();
+	/* Create the RxRPC transport */
+	ret = afs_open_socket(net);
 	if (ret < 0)
-		goto error_vl_update_init;
+		goto error_open_socket;
 
-	/* initialise the callback update process */
-	ret = afs_callback_update_init();
-	if (ret < 0)
-		goto error_callback_update_init;
+	return 0;
 
-	/* create the RxRPC transport */
-	ret = afs_open_socket();
+error_open_socket:
+	net->live = false;
+	afs_fs_probe_cleanup(net);
+	afs_cell_purge(net);
+	afs_wait_for_servers(net);
+error_cell_init:
+	net->live = false;
+	afs_proc_cleanup(net);
+error_proc:
+	afs_put_sysnames(net->sysnames);
+error_sysnames:
+	idr_destroy(&net->cells_dyn_ino);
+	net->live = false;
+	return ret;
+}
+
+/*
+ * Clean up and destroy an AFS network namespace record.
+ */
+static void __net_exit afs_net_exit(struct net *net_ns)
+{
+	struct afs_net *net = afs_net(net_ns);
+
+	net->live = false;
+	afs_fs_probe_cleanup(net);
+	afs_cell_purge(net);
+	afs_wait_for_servers(net);
+	afs_close_socket(net);
+	afs_proc_cleanup(net);
+	afs_put_sysnames(net->sysnames);
+	idr_destroy(&net->cells_dyn_ino);
+	kfree_rcu(rcu_access_pointer(net->address_prefs), rcu);
+}
+
+static struct pernet_operations afs_net_ops = {
+	.init	= afs_net_init,
+	.exit	= afs_net_exit,
+	.id	= &afs_net_id,
+	.size	= sizeof(struct afs_net),
+};
+
+/*
+ * initialise the AFS client FS module
+ */
+static int __init afs_init(void)
+{
+	int ret = -ENOMEM;
+
+	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
+
+	afs_wq = alloc_workqueue("afs", WQ_PERCPU, 0);
+	if (!afs_wq)
+		goto error_afs_wq;
+	afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+	if (!afs_async_calls)
+		goto error_async;
+	afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
+	if (!afs_lock_manager)
+		goto error_lockmgr;
+
+	ret = register_pernet_device(&afs_net_ops);
 	if (ret < 0)
-		goto error_open_socket;
+		goto error_net;
 
 	/* register the filesystems */
 	ret = afs_fs_init();
 	if (ret < 0)
 		goto error_fs;
 
+	afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs");
+	if (!afs_proc_symlink) {
+		ret = -ENOMEM;
+		goto error_proc;
+	}
+
 	return ret;
 
-error_fs:
-	afs_close_socket();
-error_open_socket:
-	afs_callback_update_kill();
-error_callback_update_init:
-	afs_vlocation_purge();
-error_vl_update_init:
-	afs_cell_purge();
-error_cell_init:
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_unregister_netfs(&afs_cache_netfs);
-error_cache:
-#endif
-	afs_proc_cleanup();
 error_proc:
+	afs_fs_exit();
+error_fs:
+	unregister_pernet_device(&afs_net_ops);
+error_net:
+	destroy_workqueue(afs_lock_manager);
+error_lockmgr:
+	destroy_workqueue(afs_async_calls);
+error_async:
 	destroy_workqueue(afs_wq);
+error_afs_wq:
 	rcu_barrier();
 	printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
 	return ret;
@@ -166,18 +224,13 @@ static void __exit afs_exit(void)
 {
 	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
 
+	proc_remove(afs_proc_symlink);
 	afs_fs_exit();
-	afs_kill_lock_manager();
-	afs_close_socket();
-	afs_purge_servers();
-	afs_callback_update_kill();
-	afs_vlocation_purge();
+	unregister_pernet_device(&afs_net_ops);
+	destroy_workqueue(afs_lock_manager);
+	destroy_workqueue(afs_async_calls);
 	destroy_workqueue(afs_wq);
-	afs_cell_purge();
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_unregister_netfs(&afs_cache_netfs);
-#endif
-	afs_proc_cleanup();
+	afs_clean_up_permit_cache();
 	rcu_barrier();
 }
 
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 0dd4dafee10b..c8a7f266080d 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -1,20 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* miscellaneous bits
  *
  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
-#include <rxrpc/packet.h>
+#include <crypto/krb5.h>
 #include "internal.h"
 #include "afs_fs.h"
+#include "protocol_uae.h"
 
 /*
  * convert an AFS abort code to a Linux error number
@@ -22,9 +19,12 @@
 int afs_abort_to_error(u32 abort_code)
 {
 	switch (abort_code) {
+		/* Low errno codes inserted into abort namespace */
 	case 13:		return -EACCES;
 	case 27:		return -EFBIG;
 	case 30:		return -EROFS;
+
+		/* VICE "special error" codes; 101 - 111 */
 	case VSALVAGE:		return -EIO;
 	case VNOVNODE:		return -ENOENT;
 	case VNOVOL:		return -ENOMEDIUM;
@@ -36,26 +36,60 @@ int afs_abort_to_error(u32 abort_code)
 	case VOVERQUOTA:	return -EDQUOT;
 	case VBUSY:		return -EBUSY;
 	case VMOVED:		return -ENXIO;
-	case 0x2f6df0a:		return -EWOULDBLOCK;
-	case 0x2f6df0c:		return -EACCES;
-	case 0x2f6df0f:		return -EBUSY;
-	case 0x2f6df10:		return -EEXIST;
-	case 0x2f6df11:		return -EXDEV;
-	case 0x2f6df13:		return -ENOTDIR;
-	case 0x2f6df14:		return -EISDIR;
-	case 0x2f6df15:		return -EINVAL;
-	case 0x2f6df1a:		return -EFBIG;
-	case 0x2f6df1b:		return -ENOSPC;
-	case 0x2f6df1d:		return -EROFS;
-	case 0x2f6df1e:		return -EMLINK;
-	case 0x2f6df20:		return -EDOM;
-	case 0x2f6df21:		return -ERANGE;
-	case 0x2f6df22:		return -EDEADLK;
-	case 0x2f6df23:		return -ENAMETOOLONG;
-	case 0x2f6df24:		return -ENOLCK;
-	case 0x2f6df26:		return -ENOTEMPTY;
-	case 0x2f6df78:		return -EDQUOT;
 
+		/* Volume Location server errors */
+	case AFSVL_IDEXIST:		return -EEXIST;
+	case AFSVL_IO:			return -EREMOTEIO;
+	case AFSVL_NAMEEXIST:		return -EEXIST;
+	case AFSVL_CREATEFAIL:		return -EREMOTEIO;
+	case AFSVL_NOENT:		return -ENOMEDIUM;
+	case AFSVL_EMPTY:		return -ENOMEDIUM;
+	case AFSVL_ENTDELETED:		return -ENOMEDIUM;
+	case AFSVL_BADNAME:		return -EINVAL;
+	case AFSVL_BADINDEX:		return -EINVAL;
+	case AFSVL_BADVOLTYPE:		return -EINVAL;
+	case AFSVL_BADSERVER:		return -EINVAL;
+	case AFSVL_BADPARTITION:	return -EINVAL;
+	case AFSVL_REPSFULL:		return -EFBIG;
+	case AFSVL_NOREPSERVER:		return -ENOENT;
+	case AFSVL_DUPREPSERVER:	return -EEXIST;
+	case AFSVL_RWNOTFOUND:		return -ENOENT;
+	case AFSVL_BADREFCOUNT:		return -EINVAL;
+	case AFSVL_SIZEEXCEEDED:	return -EINVAL;
+	case AFSVL_BADENTRY:		return -EINVAL;
+	case AFSVL_BADVOLIDBUMP:	return -EINVAL;
+	case AFSVL_IDALREADYHASHED:	return -EINVAL;
+	case AFSVL_ENTRYLOCKED:		return -EBUSY;
+	case AFSVL_BADVOLOPER:		return -EBADRQC;
+	case AFSVL_BADRELLOCKTYPE:	return -EINVAL;
+	case AFSVL_RERELEASE:		return -EREMOTEIO;
+	case AFSVL_BADSERVERFLAG:	return -EINVAL;
+	case AFSVL_PERM:		return -EACCES;
+	case AFSVL_NOMEM:		return -EREMOTEIO;
+
+		/* Unified AFS error table */
+	case UAEPERM:			return -EPERM;
+	case UAENOENT:			return -ENOENT;
+	case UAEAGAIN:			return -EAGAIN;
+	case UAEACCES:			return -EACCES;
+	case UAEBUSY:			return -EBUSY;
+	case UAEEXIST:			return -EEXIST;
+	case UAENOTDIR:			return -ENOTDIR;
+	case UAEISDIR:			return -EISDIR;
+	case UAEFBIG:			return -EFBIG;
+	case UAENOSPC:			return -ENOSPC;
+	case UAEROFS:			return -EROFS;
+	case UAEMLINK:			return -EMLINK;
+	case UAEDEADLK:			return -EDEADLK;
+	case UAENAMETOOLONG:		return -ENAMETOOLONG;
+	case UAENOLCK:			return -ENOLCK;
+	case UAENOTEMPTY:		return -ENOTEMPTY;
+	case UAELOOP:			return -ELOOP;
+	case UAEOVERFLOW:		return -EOVERFLOW;
+	case UAENOMEDIUM:		return -ENOMEDIUM;
+	case UAEDQUOT:			return -EDQUOT;
+
+		/* RXKAD abort codes; from include/rxrpc/packet.h.  ET "RXK" == 0x1260B00 */
 	case RXKADINCONSISTENCY: return -EPROTO;
 	case RXKADPACKETSHORT:	return -EPROTO;
 	case RXKADLEVELFAIL:	return -EKEYREJECTED;
@@ -70,6 +104,105 @@ int afs_abort_to_error(u32 abort_code)
 	case RXKADDATALEN:	return -EKEYREJECTED;
 	case RXKADILLEGALLEVEL:	return -EKEYREJECTED;
 
+	case RXGK_INCONSISTENCY:	return -EPROTO;
+	case RXGK_PACKETSHORT:		return -EPROTO;
+	case RXGK_BADCHALLENGE:		return -EPROTO;
+	case RXGK_SEALEDINCON:		return -EKEYREJECTED;
+	case RXGK_NOTAUTH:		return -EKEYREJECTED;
+	case RXGK_EXPIRED:		return -EKEYEXPIRED;
+	case RXGK_BADLEVEL:		return -EKEYREJECTED;
+	case RXGK_BADKEYNO:		return -EKEYREJECTED;
+	case RXGK_NOTRXGK:		return -EKEYREJECTED;
+	case RXGK_UNSUPPORTED:		return -EKEYREJECTED;
+	case RXGK_GSSERROR:		return -EKEYREJECTED;
+#ifdef RXGK_BADETYPE
+	case RXGK_BADETYPE:		return -ENOPKG;
+#endif
+#ifdef RXGK_BADTOKEN
+	case RXGK_BADTOKEN:		return -EKEYREJECTED;
+#endif
+#ifdef RXGK_BADETYPE
+	case RXGK_DATALEN:		return -EPROTO;
+#endif
+#ifdef RXGK_BADQOP
+	case RXGK_BADQOP:		return -EKEYREJECTED;
+#endif
+
+	case KRB5_PROG_KEYTYPE_NOSUPP:	return -ENOPKG;
+
+	case RXGEN_OPCODE:	return -ENOTSUPP;
+	case RX_INVALID_OPERATION:	return -ENOTSUPP;
+
 	default:		return -EREMOTEIO;
 	}
 }
+
+/*
+ * Select the error to report from a set of errors.
+ */
+void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
+{
+	switch (error) {
+	case 0:
+		e->aborted = false;
+		e->error = 0;
+		return;
+	default:
+		if (e->error == -ETIMEDOUT ||
+		    e->error == -ETIME)
+			return;
+		fallthrough;
+	case -ETIMEDOUT:
+	case -ETIME:
+		if (e->error == -ENOMEM ||
+		    e->error == -ENONET)
+			return;
+		fallthrough;
+	case -ENOMEM:
+	case -ENONET:
+		if (e->error == -ERFKILL)
+			return;
+		fallthrough;
+	case -ERFKILL:
+		if (e->error == -EADDRNOTAVAIL)
+			return;
+		fallthrough;
+	case -EADDRNOTAVAIL:
+		if (e->error == -ENETUNREACH)
+			return;
+		fallthrough;
+	case -ENETUNREACH:
+		if (e->error == -EHOSTUNREACH)
+			return;
+		fallthrough;
+	case -EHOSTUNREACH:
+		if (e->error == -EHOSTDOWN)
+			return;
+		fallthrough;
+	case -EHOSTDOWN:
+		if (e->error == -ECONNREFUSED)
+			return;
+		fallthrough;
+	case -ECONNREFUSED:
+		if (e->error == -ECONNRESET)
+			return;
+		fallthrough;
+	case -ECONNRESET: /* Responded, but call expired. */
+		if (e->responded)
+			return;
+		e->error = error;
+		e->aborted = false;
+		return;
+
+	case -ECONNABORTED:
+		e->error = afs_abort_to_error(abort_code);
+		e->aborted = true;
+		e->responded = true;
+		return;
+	case -ENETRESET: /* Responded, but we seem to have changed address */
+		e->aborted = false;
+		e->responded = true;
+		e->error = error;
+		return;
+	}
+}
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 9682c33d5daf..57c204a3c04e 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* mountpoint management
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
@@ -17,6 +13,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/gfp.h>
+#include <linux/fs_context.h>
 #include "internal.h"
 
 
@@ -33,7 +30,7 @@ const struct file_operations afs_mntpt_file_operations = {
 
 const struct inode_operations afs_mntpt_inode_operations = {
 	.lookup		= afs_mntpt_lookup,
-	.readlink	= page_readlink,
+	.readlink	= afs_readlink,
 	.getattr	= afs_getattr,
 };
 
@@ -46,58 +43,7 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
 
 static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
 
-/*
- * check a symbolic link to see whether it actually encodes a mountpoint
- * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
- */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
-{
-	struct page *page;
-	size_t size;
-	char *buf;
-	int ret;
-
-	_enter("{%x:%u,%u}",
-	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
-	/* read the contents of the symlink into the pagecache */
-	page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
-			       afs_page_filler, key);
-	if (IS_ERR(page)) {
-		ret = PTR_ERR(page);
-		goto out;
-	}
-
-	ret = -EIO;
-	if (PageError(page))
-		goto out_free;
-
-	buf = kmap(page);
-
-	/* examine the symlink's contents */
-	size = vnode->status.size;
-	_debug("symlink to %*.*s", (int) size, (int) size, buf);
-
-	if (size > 2 &&
-	    (buf[0] == '%' || buf[0] == '#') &&
-	    buf[size - 1] == '.'
-	    ) {
-		_debug("symlink is a mountpoint");
-		spin_lock(&vnode->lock);
-		set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-		vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
-		spin_unlock(&vnode->lock);
-	}
-
-	ret = 0;
-
-	kunmap(page);
-out_free:
-	page_cache_release(page);
-out:
-	_leave(" = %d", ret);
-	return ret;
-}
+static const char afs_root_volume[] = "root.cell";
 
 /*
  * no valid lookup procedure on this sort of dir
@@ -106,14 +52,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
 				       struct dentry *dentry,
 				       unsigned int flags)
 {
-	_enter("%p,%p{%p{%s},%s}",
-	       dir,
-	       dentry,
-	       dentry->d_parent,
-	       dentry->d_parent ?
-	       dentry->d_parent->d_name.name : (const unsigned char *) "",
-	       dentry->d_name.name);
-
+	_enter("%p,%p{%pd2}", dir, dentry, dentry);
 	return ERR_PTR(-EREMOTE);
 }
 
@@ -122,117 +61,121 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
  */
 static int afs_mntpt_open(struct inode *inode, struct file *file)
 {
-	_enter("%p,%p{%p{%s},%s}",
-	       inode, file,
-	       file->f_path.dentry->d_parent,
-	       file->f_path.dentry->d_parent ?
-	       file->f_path.dentry->d_parent->d_name.name :
-	       (const unsigned char *) "",
-	       file->f_path.dentry->d_name.name);
-
+	_enter("%p,%p{%pD2}", inode, file, file);
 	return -EREMOTE;
 }
 
 /*
- * create a vfsmount to be automounted
+ * Set the parameters for the proposed superblock.
  */
-static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
+static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
 {
-	struct afs_super_info *super;
-	struct vfsmount *mnt;
-	struct afs_vnode *vnode;
-	struct page *page;
-	char *devname, *options;
-	bool rwpath = false;
+	struct afs_fs_context *ctx = fc->fs_private;
+	struct afs_super_info *src_as = AFS_FS_S(mntpt->d_sb);
+	struct afs_vnode *vnode = AFS_FS_I(d_inode(mntpt));
+	struct afs_cell *cell;
+	const char *p;
 	int ret;
 
-	_enter("{%s}", mntpt->d_name.name);
-
-	BUG_ON(!mntpt->d_inode);
-
-	ret = -ENOMEM;
-	devname = (char *) get_zeroed_page(GFP_KERNEL);
-	if (!devname)
-		goto error_no_devname;
-
-	options = (char *) get_zeroed_page(GFP_KERNEL);
-	if (!options)
-		goto error_no_options;
+	if (fc->net_ns != src_as->net_ns) {
+		put_net(fc->net_ns);
+		fc->net_ns = get_net(src_as->net_ns);
+	}
 
-	vnode = AFS_FS_I(mntpt->d_inode);
+	if (src_as->volume && src_as->volume->type == AFSVL_RWVOL) {
+		ctx->type = AFSVL_RWVOL;
+		ctx->force = true;
+	}
+	if (ctx->cell) {
+		afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_mntpt);
+		ctx->cell = NULL;
+	}
 	if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
 		/* if the directory is a pseudo directory, use the d_name */
-		static const char afs_root_cell[] = ":root.cell.";
 		unsigned size = mntpt->d_name.len;
 
-		ret = -ENOENT;
-		if (size < 2 || size > AFS_MAXCELLNAME)
-			goto error_no_page;
+		if (size < 2)
+			return -ENOENT;
 
+		p = mntpt->d_name.name;
 		if (mntpt->d_name.name[0] == '.') {
-			devname[0] = '#';
-			memcpy(devname + 1, mntpt->d_name.name, size - 1);
-			memcpy(devname + size, afs_root_cell,
-			       sizeof(afs_root_cell));
-			rwpath = true;
-		} else {
-			devname[0] = '%';
-			memcpy(devname + 1, mntpt->d_name.name, size);
-			memcpy(devname + size + 1, afs_root_cell,
-			       sizeof(afs_root_cell));
+			size--;
+			p++;
+			ctx->type = AFSVL_RWVOL;
+			ctx->force = true;
+		}
+		if (size > AFS_MAXCELLNAME)
+			return -ENAMETOOLONG;
+
+		cell = afs_lookup_cell(ctx->net, p, size, NULL,
+				       AFS_LOOKUP_CELL_MOUNTPOINT,
+				       afs_cell_trace_use_lookup_mntpt);
+		if (IS_ERR(cell)) {
+			pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt);
+			return PTR_ERR(cell);
 		}
+		ctx->cell = cell;
+
+		ctx->volname = afs_root_volume;
+		ctx->volnamesz = sizeof(afs_root_volume) - 1;
 	} else {
 		/* read the contents of the AFS special symlink */
-		loff_t size = i_size_read(mntpt->d_inode);
-		char *buf;
+		DEFINE_DELAYED_CALL(cleanup);
+		const char *content;
+		loff_t size = i_size_read(d_inode(mntpt));
 
-		ret = -EINVAL;
-		if (size > PAGE_SIZE - 1)
-			goto error_no_page;
+		if (src_as->cell)
+			ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt);
 
-		page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
-		if (IS_ERR(page)) {
-			ret = PTR_ERR(page);
-			goto error_no_page;
-		}
+		if (size < 2 || size > PAGE_SIZE - 1)
+			return -EINVAL;
 
-		ret = -EIO;
-		if (PageError(page))
-			goto error;
+		content = afs_get_link(mntpt, d_inode(mntpt), &cleanup);
+		if (IS_ERR(content)) {
+			do_delayed_call(&cleanup);
+			return PTR_ERR(content);
+		}
 
-		buf = kmap_atomic(page);
-		memcpy(devname, buf, size);
-		kunmap_atomic(buf);
-		page_cache_release(page);
-		page = NULL;
+		ret = -EINVAL;
+		if (content[size - 1] == '.')
+			ret = vfs_parse_fs_qstr(fc, "source",
+						&QSTR_LEN(content, size - 1));
+		do_delayed_call(&cleanup);
+		if (ret < 0)
+			return ret;
+
+		/* Don't cross a backup volume mountpoint from a backup volume */
+		if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL &&
+		    ctx->type == AFSVL_BACKVOL)
+			return -ENODEV;
 	}
 
-	/* work out what options we want */
-	super = AFS_FS_S(mntpt->d_sb);
-	memcpy(options, "cell=", 5);
-	strcpy(options + 5, super->volume->cell->name);
-	if (super->volume->type == AFSVL_RWVOL || rwpath)
-		strcat(options, ",rwpath");
-
-	/* try and do the mount */
-	_debug("--- attempting mount %s -o %s ---", devname, options);
-	mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
-	_debug("--- mount result %p ---", mnt);
-
-	free_page((unsigned long) devname);
-	free_page((unsigned long) options);
-	_leave(" = %p", mnt);
-	return mnt;
+	return 0;
+}
+
+/*
+ * create a vfsmount to be automounted
+ */
+static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
+{
+	struct fs_context *fc;
+	struct vfsmount *mnt;
+	int ret;
+
+	BUG_ON(!d_inode(mntpt));
+
+	fc = fs_context_for_submount(&afs_fs_type, mntpt);
+	if (IS_ERR(fc))
+		return ERR_CAST(fc);
 
-error:
-	page_cache_release(page);
-error_no_page:
-	free_page((unsigned long) options);
-error_no_options:
-	free_page((unsigned long) devname);
-error_no_devname:
-	_leave(" = %d", ret);
-	return ERR_PTR(ret);
+	ret = afs_mntpt_set_params(fc, mntpt);
+	if (!ret)
+		mnt = fc_mount(fc);
+	else
+		mnt = ERR_PTR(ret);
+
+	put_fs_context(fc);
+	return mnt;
 }
 
 /*
@@ -242,13 +185,12 @@ struct vfsmount *afs_d_automount(struct path *path)
 {
 	struct vfsmount *newmnt;
 
-	_enter("{%s}", path->dentry->d_name.name);
+	_enter("{%pd}", path->dentry);
 
 	newmnt = afs_mntpt_do_automount(path->dentry);
 	if (IS_ERR(newmnt))
 		return newmnt;
 
-	mntget(newmnt); /* prevent immediate expiration */
 	mnt_set_expiry(newmnt, &afs_vfsmounts);
 	queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
 			   afs_mntpt_expiry_timeout * HZ);
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c
deleted file mode 100644
index 7ad36506c256..000000000000
--- a/fs/afs/netdevices.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/* AFS network device helpers
- *
- * Copyright (c) 2007 Patrick McHardy <kaber@trash.net>
- */
-
-#include <linux/string.h>
-#include <linux/rtnetlink.h>
-#include <linux/inetdevice.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <net/net_namespace.h>
-#include "internal.h"
-
-/*
- * get a MAC address from a random ethernet interface that has a real one
- * - the buffer will normally be 6 bytes in size
- */
-int afs_get_MAC_address(u8 *mac, size_t maclen)
-{
-	struct net_device *dev;
-	int ret = -ENODEV;
-
-	BUG_ON(maclen != ETH_ALEN);
-
-	rtnl_lock();
-	dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER);
-	if (dev) {
-		memcpy(mac, dev->dev_addr, maclen);
-		ret = 0;
-	}
-	rtnl_unlock();
-	return ret;
-}
-
-/*
- * get a list of this system's interface IPv4 addresses, netmasks and MTUs
- * - maxbufs must be at least 1
- * - returns the number of interface records in the buffer
- */
-int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs,
-			    bool wantloopback)
-{
-	struct net_device *dev;
-	struct in_device *idev;
-	int n = 0;
-
-	ASSERT(maxbufs > 0);
-
-	rtnl_lock();
-	for_each_netdev(&init_net, dev) {
-		if (dev->type == ARPHRD_LOOPBACK && !wantloopback)
-			continue;
-		idev = __in_dev_get_rtnl(dev);
-		if (!idev)
-			continue;
-		for_primary_ifa(idev) {
-			bufs[n].address.s_addr = ifa->ifa_address;
-			bufs[n].netmask.s_addr = ifa->ifa_mask;
-			bufs[n].mtu = dev->mtu;
-			n++;
-			if (n >= maxbufs)
-				goto out;
-		} endfor_ifa(idev);
-	}
-out:
-	rtnl_unlock();
-	return n;
-}
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 526e4bbbde59..44520549b509 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* /proc interface for AFS
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/slab.h>
@@ -14,263 +10,95 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "internal.h"
 
-static struct proc_dir_entry *proc_afs;
-
-
-static int afs_proc_cells_open(struct inode *inode, struct file *file);
-static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos);
-static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos);
-static void afs_proc_cells_stop(struct seq_file *p, void *v);
-static int afs_proc_cells_show(struct seq_file *m, void *v);
-static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
-				    size_t size, loff_t *_pos);
-
-static const struct seq_operations afs_proc_cells_ops = {
-	.start	= afs_proc_cells_start,
-	.next	= afs_proc_cells_next,
-	.stop	= afs_proc_cells_stop,
-	.show	= afs_proc_cells_show,
-};
-
-static const struct file_operations afs_proc_cells_fops = {
-	.open		= afs_proc_cells_open,
-	.read		= seq_read,
-	.write		= afs_proc_cells_write,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-	.owner		= THIS_MODULE,
-};
-
-static int afs_proc_rootcell_open(struct inode *inode, struct file *file);
-static int afs_proc_rootcell_release(struct inode *inode, struct file *file);
-static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf,
-				      size_t size, loff_t *_pos);
-static ssize_t afs_proc_rootcell_write(struct file *file,
-				       const char __user *buf,
-				       size_t size, loff_t *_pos);
-
-static const struct file_operations afs_proc_rootcell_fops = {
-	.open		= afs_proc_rootcell_open,
-	.read		= afs_proc_rootcell_read,
-	.write		= afs_proc_rootcell_write,
-	.llseek		= no_llseek,
-	.release	= afs_proc_rootcell_release,
-	.owner		= THIS_MODULE,
-};
-
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
-static int afs_proc_cell_volumes_release(struct inode *inode,
-					 struct file *file);
-static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos);
-static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
-					loff_t *pos);
-static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v);
-static int afs_proc_cell_volumes_show(struct seq_file *m, void *v);
-
-static const struct seq_operations afs_proc_cell_volumes_ops = {
-	.start	= afs_proc_cell_volumes_start,
-	.next	= afs_proc_cell_volumes_next,
-	.stop	= afs_proc_cell_volumes_stop,
-	.show	= afs_proc_cell_volumes_show,
-};
-
-static const struct file_operations afs_proc_cell_volumes_fops = {
-	.open		= afs_proc_cell_volumes_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= afs_proc_cell_volumes_release,
-	.owner		= THIS_MODULE,
-};
-
-static int afs_proc_cell_vlservers_open(struct inode *inode,
-					struct file *file);
-static int afs_proc_cell_vlservers_release(struct inode *inode,
-					   struct file *file);
-static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos);
-static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
-					  loff_t *pos);
-static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v);
-static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v);
-
-static const struct seq_operations afs_proc_cell_vlservers_ops = {
-	.start	= afs_proc_cell_vlservers_start,
-	.next	= afs_proc_cell_vlservers_next,
-	.stop	= afs_proc_cell_vlservers_stop,
-	.show	= afs_proc_cell_vlservers_show,
-};
-
-static const struct file_operations afs_proc_cell_vlservers_fops = {
-	.open		= afs_proc_cell_vlservers_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= afs_proc_cell_vlservers_release,
-	.owner		= THIS_MODULE,
-};
-
-static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
-static int afs_proc_cell_servers_release(struct inode *inode,
-					 struct file *file);
-static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos);
-static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
-					loff_t *pos);
-static void afs_proc_cell_servers_stop(struct seq_file *p, void *v);
-static int afs_proc_cell_servers_show(struct seq_file *m, void *v);
-
-static const struct seq_operations afs_proc_cell_servers_ops = {
-	.start	= afs_proc_cell_servers_start,
-	.next	= afs_proc_cell_servers_next,
-	.stop	= afs_proc_cell_servers_stop,
-	.show	= afs_proc_cell_servers_show,
-};
-
-static const struct file_operations afs_proc_cell_servers_fops = {
-	.open		= afs_proc_cell_servers_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= afs_proc_cell_servers_release,
-	.owner		= THIS_MODULE,
+struct afs_vl_seq_net_private {
+	struct seq_net_private		seq;	/* Must be first */
+	struct afs_vlserver_list	*vllist;
 };
 
-/*
- * initialise the /proc/fs/afs/ directory
- */
-int afs_proc_init(void)
+static inline struct afs_net *afs_seq2net(struct seq_file *m)
 {
-	struct proc_dir_entry *p;
-
-	_enter("");
-
-	proc_afs = proc_mkdir("fs/afs", NULL);
-	if (!proc_afs)
-		goto error_dir;
-
-	p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
-	if (!p)
-		goto error_cells;
-
-	p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops);
-	if (!p)
-		goto error_rootcell;
-
-	_leave(" = 0");
-	return 0;
-
-error_rootcell:
- 	remove_proc_entry("cells", proc_afs);
-error_cells:
-	remove_proc_entry("fs/afs", NULL);
-error_dir:
-	_leave(" = -ENOMEM");
-	return -ENOMEM;
+	return afs_net(seq_file_net(m));
 }
 
-/*
- * clean up the /proc/fs/afs/ directory
- */
-void afs_proc_cleanup(void)
+static inline struct afs_net *afs_seq2net_single(struct seq_file *m)
 {
-	remove_proc_entry("rootcell", proc_afs);
-	remove_proc_entry("cells", proc_afs);
-	remove_proc_entry("fs/afs", NULL);
+	return afs_net(seq_file_single_net(m));
 }
 
 /*
- * open "/proc/fs/afs/cells" which provides a summary of extant cells
+ * Display the list of cells known to the namespace.
  */
-static int afs_proc_cells_open(struct inode *inode, struct file *file)
+static int afs_proc_cells_show(struct seq_file *m, void *v)
 {
-	struct seq_file *m;
-	int ret;
+	struct afs_vlserver_list *vllist;
+	struct afs_cell *cell;
 
-	ret = seq_open(file, &afs_proc_cells_ops);
-	if (ret < 0)
-		return ret;
+	if (v == SEQ_START_TOKEN) {
+		/* display header on line 1 */
+		seq_puts(m, "USE ACT    TTL SV ST NAME\n");
+		return 0;
+	}
 
-	m = file->private_data;
-	m->private = PDE_DATA(inode);
+	cell = list_entry(v, struct afs_cell, proc_link);
+	vllist = rcu_dereference(cell->vl_servers);
 
+	/* display one cell per line on subsequent lines */
+	seq_printf(m, "%3u %3u %6lld %2u %2u %s\n",
+		   refcount_read(&cell->ref),
+		   atomic_read(&cell->active),
+		   cell->dns_expiry - ktime_get_real_seconds(),
+		   vllist ? vllist->nr_servers : 0,
+		   cell->state,
+		   cell->name);
 	return 0;
 }
 
-/*
- * set up the iterator to start reading from the cells list and return the
- * first item
- */
 static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
+	__acquires(rcu)
 {
-	/* lock the list against modification */
-	down_read(&afs_proc_cells_sem);
-	return seq_list_start_head(&afs_proc_cells, *_pos);
+	rcu_read_lock();
+	return seq_hlist_start_head_rcu(&afs_seq2net(m)->proc_cells, *_pos);
 }
 
-/*
- * move to next cell in cells list
- */
-static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
+static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	return seq_list_next(v, &afs_proc_cells, pos);
+	return seq_hlist_next_rcu(v, &afs_seq2net(m)->proc_cells, pos);
 }
 
-/*
- * clean up after reading from the cells list
- */
-static void afs_proc_cells_stop(struct seq_file *p, void *v)
+static void afs_proc_cells_stop(struct seq_file *m, void *v)
+	__releases(rcu)
 {
-	up_read(&afs_proc_cells_sem);
+	rcu_read_unlock();
 }
 
-/*
- * display a header line followed by a load of cell lines
- */
-static int afs_proc_cells_show(struct seq_file *m, void *v)
-{
-	struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
-
-	if (v == &afs_proc_cells) {
-		/* display header on line 1 */
-		seq_puts(m, "USE NAME\n");
-		return 0;
-	}
-
-	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%3d %s\n",
-		   atomic_read(&cell->usage), cell->name);
-	return 0;
-}
+static const struct seq_operations afs_proc_cells_ops = {
+	.start	= afs_proc_cells_start,
+	.next	= afs_proc_cells_next,
+	.stop	= afs_proc_cells_stop,
+	.show	= afs_proc_cells_show,
+};
 
 /*
  * handle writes to /proc/fs/afs/cells
  * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]"
  */
-static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
-				    size_t size, loff_t *_pos)
+static int afs_proc_cells_write(struct file *file, char *buf, size_t size)
 {
-	char *kbuf, *name, *args;
+	struct seq_file *m = file->private_data;
+	struct afs_net *net = afs_seq2net(m);
+	char *name, *args;
 	int ret;
 
-	/* start by dragging the command into memory */
-	if (size <= 1 || size >= PAGE_SIZE)
-		return -EINVAL;
-
-	kbuf = kmalloc(size + 1, GFP_KERNEL);
-	if (!kbuf)
-		return -ENOMEM;
-
-	ret = -EFAULT;
-	if (copy_from_user(kbuf, buf, size) != 0)
-		goto done;
-	kbuf[size] = 0;
-
 	/* trim to first NL */
-	name = memchr(kbuf, '\n', size);
+	name = memchr(buf, '\n', size);
 	if (name)
 		*name = 0;
 
 	/* split into command, name and argslist */
-	name = strchr(kbuf, ' ');
+	name = strchr(buf, ' ');
 	if (!name)
 		goto inval;
 	do {
@@ -280,36 +108,37 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 		goto inval;
 
 	args = strchr(name, ' ');
-	if (!args)
-		goto inval;
-	do {
-		*args++ = 0;
-	} while(*args == ' ');
-	if (!*args)
-		goto inval;
+	if (args) {
+		do {
+			*args++ = 0;
+		} while(*args == ' ');
+		if (!*args)
+			goto inval;
+	}
 
 	/* determine command to perform */
-	_debug("cmd=%s name=%s args=%s", kbuf, name, args);
+	_debug("cmd=%s name=%s args=%s", buf, name, args);
 
-	if (strcmp(kbuf, "add") == 0) {
+	if (strcmp(buf, "add") == 0) {
 		struct afs_cell *cell;
 
-		cell = afs_cell_create(name, strlen(name), args, false);
+		cell = afs_lookup_cell(net, name, strlen(name), args,
+				       AFS_LOOKUP_CELL_PRELOAD,
+				       afs_cell_trace_use_lookup_add);
 		if (IS_ERR(cell)) {
 			ret = PTR_ERR(cell);
 			goto done;
 		}
 
-		afs_put_cell(cell);
-		printk("kAFS: Added new cell '%s'\n", name);
+		if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags))
+			afs_unuse_cell(cell, afs_cell_trace_unuse_no_pin);
 	} else {
 		goto inval;
 	}
 
-	ret = size;
+	ret = 0;
 
 done:
-	kfree(kbuf);
 	_leave(" = %d", ret);
 	return ret;
 
@@ -320,425 +149,638 @@ inval:
 }
 
 /*
- * Stubs for /proc/fs/afs/rootcell
+ * Display the list of addr_prefs known to the namespace.
  */
-static int afs_proc_rootcell_open(struct inode *inode, struct file *file)
-{
-	return 0;
-}
+static int afs_proc_addr_prefs_show(struct seq_file *m, void *v)
+{
+	struct afs_addr_preference_list *preflist;
+	struct afs_addr_preference *pref;
+	struct afs_net *net = afs_seq2net_single(m);
+	union {
+		struct sockaddr_in sin;
+		struct sockaddr_in6 sin6;
+	} addr;
+	unsigned int i;
+	char buf[44]; /* Maximum ipv6 + max subnet is 43 */
+
+	rcu_read_lock();
+	preflist = rcu_dereference(net->address_prefs);
+
+	if (!preflist) {
+		seq_puts(m, "NO PREFS\n");
+		goto out;
+	}
 
-static int afs_proc_rootcell_release(struct inode *inode, struct file *file)
-{
+	seq_printf(m, "PROT SUBNET                                      PRIOR (v=%u n=%u/%u/%u)\n",
+		   preflist->version, preflist->ipv6_off, preflist->nr, preflist->max_prefs);
+
+	memset(&addr, 0, sizeof(addr));
+
+	for (i = 0; i < preflist->nr; i++) {
+		pref = &preflist->prefs[i];
+
+		addr.sin.sin_family = pref->family;
+		if (pref->family == AF_INET) {
+			memcpy(&addr.sin.sin_addr, &pref->ipv4_addr,
+			       sizeof(addr.sin.sin_addr));
+			snprintf(buf, sizeof(buf), "%pISc/%u", &addr.sin, pref->subnet_mask);
+			seq_printf(m, "UDP  %-43.43s %5u\n", buf, pref->prio);
+		} else {
+			memcpy(&addr.sin6.sin6_addr, &pref->ipv6_addr,
+			       sizeof(addr.sin6.sin6_addr));
+			snprintf(buf, sizeof(buf), "%pISc/%u", &addr.sin6, pref->subnet_mask);
+			seq_printf(m, "UDP  %-43.43s %5u\n", buf, pref->prio);
+		}
+	}
+
+out:
+	rcu_read_unlock();
 	return 0;
 }
 
-static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf,
-				      size_t size, loff_t *_pos)
+/*
+ * Display the name of the current workstation cell.
+ */
+static int afs_proc_rootcell_show(struct seq_file *m, void *v)
 {
+	struct afs_cell *cell;
+	struct afs_net *net;
+
+	net = afs_seq2net_single(m);
+	down_read(&net->cells_lock);
+	cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
+	if (cell)
+		seq_printf(m, "%s\n", cell->name);
+	up_read(&net->cells_lock);
 	return 0;
 }
 
 /*
- * handle writes to /proc/fs/afs/rootcell
- * - to initialize rootcell: echo "cell.name:192.168.231.14"
+ * Set the current workstation cell and optionally supply its list of volume
+ * location servers.
+ *
+ *	echo "cell.name:192.168.231.14" >/proc/fs/afs/rootcell
  */
-static ssize_t afs_proc_rootcell_write(struct file *file,
-				       const char __user *buf,
-				       size_t size, loff_t *_pos)
+static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size)
 {
-	char *kbuf, *s;
+	struct seq_file *m = file->private_data;
+	struct afs_net *net = afs_seq2net_single(m);
+	char *s;
 	int ret;
 
-	/* start by dragging the command into memory */
-	if (size <= 1 || size >= PAGE_SIZE)
-		return -EINVAL;
-
-	ret = -ENOMEM;
-	kbuf = kmalloc(size + 1, GFP_KERNEL);
-	if (!kbuf)
-		goto nomem;
-
-	ret = -EFAULT;
-	if (copy_from_user(kbuf, buf, size) != 0)
-		goto infault;
-	kbuf[size] = 0;
+	ret = -EINVAL;
+	if (buf[0] == '.')
+		goto out;
+	if (memchr(buf, '/', size))
+		goto out;
 
 	/* trim to first NL */
-	s = memchr(kbuf, '\n', size);
+	s = memchr(buf, '\n', size);
 	if (s)
 		*s = 0;
 
 	/* determine command to perform */
-	_debug("rootcell=%s", kbuf);
+	_debug("rootcell=%s", buf);
 
-	ret = afs_cell_init(kbuf);
-	if (ret >= 0)
-		ret = size;	/* consume everything, always */
+	ret = -EEXIST;
+	inode_lock(file_inode(file));
+	if (!rcu_access_pointer(net->ws_cell))
+		ret = afs_cell_init(net, buf);
+	else
+		printk("busy\n");
+	inode_unlock(file_inode(file));
 
-infault:
-	kfree(kbuf);
-nomem:
+out:
 	_leave(" = %d", ret);
 	return ret;
 }
 
+static const char afs_vol_types[3][3] = {
+	[AFSVL_RWVOL]	= "RW",
+	[AFSVL_ROVOL]	= "RO",
+	[AFSVL_BACKVOL]	= "BK",
+};
+
 /*
- * initialise /proc/fs/afs/<cell>/
+ * Display the list of volumes known to a cell.
  */
-int afs_proc_cell_setup(struct afs_cell *cell)
+static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
 {
-	struct proc_dir_entry *p;
-
-	_enter("%p{%s}", cell, cell->name);
-
-	cell->proc_dir = proc_mkdir(cell->name, proc_afs);
-	if (!cell->proc_dir)
-		goto error_dir;
+	struct afs_volume *vol = hlist_entry(v, struct afs_volume, proc_link);
 
-	p = proc_create_data("servers", 0, cell->proc_dir,
-			     &afs_proc_cell_servers_fops, cell);
-	if (!p)
-		goto error_servers;
-
-	p = proc_create_data("vlservers", 0, cell->proc_dir,
-			     &afs_proc_cell_vlservers_fops, cell);
-	if (!p)
-		goto error_vlservers;
+	/* Display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "USE VID      TY NAME\n");
+		return 0;
+	}
 
-	p = proc_create_data("volumes", 0, cell->proc_dir,
-			     &afs_proc_cell_volumes_fops, cell);
-	if (!p)
-		goto error_volumes;
+	seq_printf(m, "%3d %08llx %s %s\n",
+		   refcount_read(&vol->ref), vol->vid,
+		   afs_vol_types[vol->type],
+		   vol->name);
 
-	_leave(" = 0");
 	return 0;
-
-error_volumes:
-	remove_proc_entry("vlservers", cell->proc_dir);
-error_vlservers:
-	remove_proc_entry("servers", cell->proc_dir);
-error_servers:
-	remove_proc_entry(cell->name, proc_afs);
-error_dir:
-	_leave(" = -ENOMEM");
-	return -ENOMEM;
 }
 
-/*
- * remove /proc/fs/afs/<cell>/
- */
-void afs_proc_cell_remove(struct afs_cell *cell)
+static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
+	__acquires(cell->proc_lock)
 {
-	_enter("");
+	struct afs_cell *cell = pde_data(file_inode(m->file));
 
-	remove_proc_entry("volumes", cell->proc_dir);
-	remove_proc_entry("vlservers", cell->proc_dir);
-	remove_proc_entry("servers", cell->proc_dir);
-	remove_proc_entry(cell->name, proc_afs);
-
-	_leave("");
+	rcu_read_lock();
+	return seq_hlist_start_head_rcu(&cell->proc_volumes, *_pos);
 }
 
-/*
- * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
- */
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
+static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v,
+					loff_t *_pos)
 {
-	struct afs_cell *cell;
-	struct seq_file *m;
-	int ret;
+	struct afs_cell *cell = pde_data(file_inode(m->file));
 
-	cell = PDE_DATA(inode);
-	if (!cell)
-		return -ENOENT;
+	return seq_hlist_next_rcu(v, &cell->proc_volumes, _pos);
+}
 
-	ret = seq_open(file, &afs_proc_cell_volumes_ops);
-	if (ret < 0)
-		return ret;
+static void afs_proc_cell_volumes_stop(struct seq_file *m, void *v)
+	__releases(cell->proc_lock)
+{
+	rcu_read_unlock();
+}
+
+static const struct seq_operations afs_proc_cell_volumes_ops = {
+	.start	= afs_proc_cell_volumes_start,
+	.next	= afs_proc_cell_volumes_next,
+	.stop	= afs_proc_cell_volumes_stop,
+	.show	= afs_proc_cell_volumes_show,
+};
 
-	m = file->private_data;
-	m->private = cell;
+static const char *const dns_record_sources[NR__dns_record_source + 1] = {
+	[DNS_RECORD_UNAVAILABLE]	= "unav",
+	[DNS_RECORD_FROM_CONFIG]	= "cfg",
+	[DNS_RECORD_FROM_DNS_A]		= "A",
+	[DNS_RECORD_FROM_DNS_AFSDB]	= "AFSDB",
+	[DNS_RECORD_FROM_DNS_SRV]	= "SRV",
+	[DNS_RECORD_FROM_NSS]		= "nss",
+	[NR__dns_record_source]		= "[weird]"
+};
 
-	return 0;
-}
+static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = {
+	[DNS_LOOKUP_NOT_DONE]		= "no-lookup",
+	[DNS_LOOKUP_GOOD]		= "good",
+	[DNS_LOOKUP_GOOD_WITH_BAD]	= "good/bad",
+	[DNS_LOOKUP_BAD]		= "bad",
+	[DNS_LOOKUP_GOT_NOT_FOUND]	= "not-found",
+	[DNS_LOOKUP_GOT_LOCAL_FAILURE]	= "local-failure",
+	[DNS_LOOKUP_GOT_TEMP_FAILURE]	= "temp-failure",
+	[DNS_LOOKUP_GOT_NS_FAILURE]	= "ns-failure",
+	[NR__dns_lookup_status]		= "[weird]"
+};
 
 /*
- * close the file and release the ref to the cell
+ * Display the list of Volume Location servers we're using for a cell.
  */
-static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
+static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
 {
-	return seq_release(inode, file);
+	const struct afs_vl_seq_net_private *priv = m->private;
+	const struct afs_vlserver_list *vllist = priv->vllist;
+	const struct afs_vlserver_entry *entry;
+	const struct afs_vlserver *vlserver;
+	const struct afs_addr_list *alist;
+	int i;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(m, "# source %s, status %s\n",
+			   dns_record_sources[vllist ? vllist->source : 0],
+			   dns_lookup_statuses[vllist ? vllist->status : 0]);
+		return 0;
+	}
+
+	entry = v;
+	vlserver = entry->server;
+	alist = rcu_dereference(vlserver->addresses);
+
+	seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n",
+		   vlserver->name, entry->priority, entry->weight,
+		   dns_record_sources[alist ? alist->source : entry->source],
+		   dns_lookup_statuses[alist ? alist->status : entry->status]);
+	if (alist) {
+		for (i = 0; i < alist->nr_addrs; i++)
+			seq_printf(m, " %c %pISpc\n",
+				   alist->preferred == i ? '>' : '-',
+				   rxrpc_kernel_remote_addr(alist->addrs[i].peer));
+	}
+	seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt);
+	seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n",
+		   vlserver->probe.flags, vlserver->probe.error,
+		   vlserver->probe.abort_code,
+		   atomic_read(&vlserver->probe_outstanding));
+	return 0;
 }
 
-/*
- * set up the iterator to start reading from the cells list and return the
- * first item
- */
-static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
+static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
+	__acquires(rcu)
 {
-	struct afs_cell *cell = m->private;
+	struct afs_vl_seq_net_private *priv = m->private;
+	struct afs_vlserver_list *vllist;
+	struct afs_cell *cell = pde_data(file_inode(m->file));
+	loff_t pos = *_pos;
+
+	rcu_read_lock();
+
+	vllist = rcu_dereference(cell->vl_servers);
+	priv->vllist = vllist;
 
-	_enter("cell=%p pos=%Ld", cell, *_pos);
+	if (pos < 0)
+		*_pos = pos = 0;
+	if (pos == 0)
+		return SEQ_START_TOKEN;
+
+	if (pos - 1 >= vllist->nr_servers)
+		return NULL;
 
-	/* lock the list against modification */
-	down_read(&cell->vl_sem);
-	return seq_list_start_head(&cell->vl_list, *_pos);
+	return &vllist->servers[pos - 1];
 }
 
-/*
- * move to next cell in cells list
- */
-static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
-					loff_t *_pos)
+static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v,
+					  loff_t *_pos)
 {
-	struct afs_cell *cell = p->private;
+	struct afs_vl_seq_net_private *priv = m->private;
+	struct afs_vlserver_list *vllist = priv->vllist;
+	loff_t pos;
 
-	_enter("cell=%p pos=%Ld", cell, *_pos);
-	return seq_list_next(v, &cell->vl_list, _pos);
+	pos = *_pos;
+	pos++;
+	*_pos = pos;
+	if (!vllist || pos - 1 >= vllist->nr_servers)
+		return NULL;
+
+	return &vllist->servers[pos - 1];
 }
 
-/*
- * clean up after reading from the cells list
- */
-static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
+static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v)
+	__releases(rcu)
 {
-	struct afs_cell *cell = p->private;
-
-	up_read(&cell->vl_sem);
+	rcu_read_unlock();
 }
 
-static const char afs_vlocation_states[][4] = {
-	[AFS_VL_NEW]			= "New",
-	[AFS_VL_CREATING]		= "Crt",
-	[AFS_VL_VALID]			= "Val",
-	[AFS_VL_NO_VOLUME]		= "NoV",
-	[AFS_VL_UPDATING]		= "Upd",
-	[AFS_VL_VOLUME_DELETED]		= "Del",
-	[AFS_VL_UNCERTAIN]		= "Unc",
+static const struct seq_operations afs_proc_cell_vlservers_ops = {
+	.start	= afs_proc_cell_vlservers_start,
+	.next	= afs_proc_cell_vlservers_next,
+	.stop	= afs_proc_cell_vlservers_stop,
+	.show	= afs_proc_cell_vlservers_show,
 };
 
 /*
- * display a header line followed by a load of volume lines
+ * Display the list of fileservers we're using within a namespace.
  */
-static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
+static int afs_proc_servers_show(struct seq_file *m, void *v)
 {
-	struct afs_cell *cell = m->private;
-	struct afs_vlocation *vlocation =
-		list_entry(v, struct afs_vlocation, link);
+	struct afs_endpoint_state *estate;
+	struct afs_addr_list *alist;
+	struct afs_server *server;
+	unsigned long failed;
+	int i;
 
-	/* display header on line 1 */
-	if (v == &cell->vl_list) {
-		seq_puts(m, "USE STT VLID[0]  VLID[1]  VLID[2]  NAME\n");
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "UUID                                 REF ACT CELL\n");
 		return 0;
 	}
 
-	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%3d %s %08x %08x %08x %s\n",
-		   atomic_read(&vlocation->usage),
-		   afs_vlocation_states[vlocation->state],
-		   vlocation->vldb.vid[0],
-		   vlocation->vldb.vid[1],
-		   vlocation->vldb.vid[2],
-		   vlocation->vldb.name);
+	server = list_entry(v, struct afs_server, proc_link);
+	seq_printf(m, "%pU %3d %3d %s\n",
+		   &server->uuid,
+		   refcount_read(&server->ref),
+		   atomic_read(&server->active),
+		   server->cell->name);
+	seq_printf(m, "  - info: fl=%lx rtt=%u\n",
+		   server->flags, server->rtt);
+	seq_printf(m, "  - probe: last=%d\n",
+		   (int)(jiffies - server->probed_at) / HZ);
+
+	estate = rcu_dereference(server->endpoint_state);
+	if (!estate)
+		goto out;
+	failed = estate->failed_set;
+	seq_printf(m, "  - ESTATE pq=%x np=%u rsp=%lx f=%lx\n",
+		   estate->probe_seq, atomic_read(&estate->nr_probing),
+		   estate->responsive_set, estate->failed_set);
+
+	alist = estate->addresses;
+	seq_printf(m, "  - ALIST v=%u ap=%u\n",
+		   alist->version, alist->addr_pref_version);
+	for (i = 0; i < alist->nr_addrs; i++) {
+		const struct afs_address *addr = &alist->addrs[i];
+
+		seq_printf(m, "    [%x] %pISpc%s rtt=%d err=%d p=%u\n",
+			   i, rxrpc_kernel_remote_addr(addr->peer),
+			   alist->preferred == i ? "*" :
+			   test_bit(i, &failed) ? "!" : "",
+			   rxrpc_kernel_get_srtt(addr->peer),
+			   addr->last_error, addr->prio);
+	}
 
+out:
 	return 0;
 }
 
-/*
- * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume
- * location server
- */
-static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
+static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos)
+	__acquires(rcu)
 {
-	struct afs_cell *cell;
-	struct seq_file *m;
-	int ret;
-
-	cell = PDE_DATA(inode);
-	if (!cell)
-		return -ENOENT;
-
-	ret = seq_open(file, &afs_proc_cell_vlservers_ops);
-	if (ret<0)
-		return ret;
-
-	m = file->private_data;
-	m->private = cell;
+	rcu_read_lock();
+	return seq_hlist_start_head_rcu(&afs_seq2net(m)->fs_proc, *_pos);
+}
 
-	return 0;
+static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos)
+{
+	return seq_hlist_next_rcu(v, &afs_seq2net(m)->fs_proc, _pos);
 }
 
-/*
- * close the file and release the ref to the cell
- */
-static int afs_proc_cell_vlservers_release(struct inode *inode,
-					   struct file *file)
+static void afs_proc_servers_stop(struct seq_file *m, void *v)
+	__releases(rcu)
 {
-	return seq_release(inode, file);
+	rcu_read_unlock();
 }
 
+static const struct seq_operations afs_proc_servers_ops = {
+	.start	= afs_proc_servers_start,
+	.next	= afs_proc_servers_next,
+	.stop	= afs_proc_servers_stop,
+	.show	= afs_proc_servers_show,
+};
+
 /*
- * set up the iterator to start reading from the cells list and return the
- * first item
+ * Display the list of strings that may be substituted for the @sys pathname
+ * macro.
  */
-static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
+static int afs_proc_sysname_show(struct seq_file *m, void *v)
 {
-	struct afs_cell *cell = m->private;
-	loff_t pos = *_pos;
+	struct afs_net *net = afs_seq2net(m);
+	struct afs_sysnames *sysnames = net->sysnames;
+	unsigned int i = (unsigned long)v - 1;
 
-	_enter("cell=%p pos=%Ld", cell, *_pos);
+	if (i < sysnames->nr)
+		seq_printf(m, "%s\n", sysnames->subs[i]);
+	return 0;
+}
 
-	/* lock the list against modification */
-	down_read(&cell->vl_sem);
+static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos)
+	__acquires(&net->sysnames_lock)
+{
+	struct afs_net *net = afs_seq2net(m);
+	struct afs_sysnames *names;
 
-	/* allow for the header line */
-	if (!pos)
-		return (void *) 1;
-	pos--;
+	read_lock(&net->sysnames_lock);
 
-	if (pos >= cell->vl_naddrs)
+	names = net->sysnames;
+	if (*pos >= names->nr)
 		return NULL;
-
-	return &cell->vl_addrs[pos];
+	return (void *)(unsigned long)(*pos + 1);
 }
 
-/*
- * move to next cell in cells list
- */
-static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
-					  loff_t *_pos)
+static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct afs_cell *cell = p->private;
-	loff_t pos;
+	struct afs_net *net = afs_seq2net(m);
+	struct afs_sysnames *names = net->sysnames;
 
-	_enter("cell=%p{nad=%u} pos=%Ld", cell, cell->vl_naddrs, *_pos);
-
-	pos = *_pos;
-	(*_pos)++;
-	if (pos >= cell->vl_naddrs)
+	*pos += 1;
+	if (*pos >= names->nr)
 		return NULL;
-
-	return &cell->vl_addrs[pos];
+	return (void *)(unsigned long)(*pos + 1);
 }
 
-/*
- * clean up after reading from the cells list
- */
-static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
+static void afs_proc_sysname_stop(struct seq_file *m, void *v)
+	__releases(&net->sysnames_lock)
 {
-	struct afs_cell *cell = p->private;
+	struct afs_net *net = afs_seq2net(m);
 
-	up_read(&cell->vl_sem);
+	read_unlock(&net->sysnames_lock);
 }
 
+static const struct seq_operations afs_proc_sysname_ops = {
+	.start	= afs_proc_sysname_start,
+	.next	= afs_proc_sysname_next,
+	.stop	= afs_proc_sysname_stop,
+	.show	= afs_proc_sysname_show,
+};
+
 /*
- * display a header line followed by a load of volume lines
+ * Allow the @sys substitution to be configured.
  */
-static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
+static int afs_proc_sysname_write(struct file *file, char *buf, size_t size)
 {
-	struct in_addr *addr = v;
+	struct afs_sysnames *sysnames, *kill;
+	struct seq_file *m = file->private_data;
+	struct afs_net *net = afs_seq2net(m);
+	char *s, *p, *sub;
+	int ret, len;
 
-	/* display header on line 1 */
-	if (v == (struct in_addr *) 1) {
-		seq_puts(m, "ADDRESS\n");
-		return 0;
+	sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL);
+	if (!sysnames)
+		return -ENOMEM;
+	refcount_set(&sysnames->usage, 1);
+	kill = sysnames;
+
+	p = buf;
+	while ((s = strsep(&p, " \t\n"))) {
+		len = strlen(s);
+		if (len == 0)
+			continue;
+		ret = -ENAMETOOLONG;
+		if (len >= AFSNAMEMAX)
+			goto error;
+
+		if (len >= 4 &&
+		    s[len - 4] == '@' &&
+		    s[len - 3] == 's' &&
+		    s[len - 2] == 'y' &&
+		    s[len - 1] == 's')
+			/* Protect against recursion */
+			goto invalid;
+
+		if (s[0] == '.' &&
+		    (len < 2 || (len == 2 && s[1] == '.')))
+			goto invalid;
+
+		if (memchr(s, '/', len))
+			goto invalid;
+
+		ret = -EFBIG;
+		if (sysnames->nr >= AFS_NR_SYSNAME)
+			goto out;
+
+		if (strcmp(s, afs_init_sysname) == 0) {
+			sub = (char *)afs_init_sysname;
+		} else {
+			ret = -ENOMEM;
+			sub = kmemdup(s, len + 1, GFP_KERNEL);
+			if (!sub)
+				goto out;
+		}
+
+		sysnames->subs[sysnames->nr] = sub;
+		sysnames->nr++;
 	}
 
-	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%pI4\n", &addr->s_addr);
-	return 0;
+	if (sysnames->nr == 0) {
+		sysnames->subs[0] = sysnames->blank;
+		sysnames->nr++;
+	}
+
+	write_lock(&net->sysnames_lock);
+	kill = net->sysnames;
+	net->sysnames = sysnames;
+	write_unlock(&net->sysnames_lock);
+	ret = 0;
+out:
+	afs_put_sysnames(kill);
+	return ret;
+
+invalid:
+	ret = -EINVAL;
+error:
+	goto out;
+}
+
+void afs_put_sysnames(struct afs_sysnames *sysnames)
+{
+	int i;
+
+	if (sysnames && refcount_dec_and_test(&sysnames->usage)) {
+		for (i = 0; i < sysnames->nr; i++)
+			if (sysnames->subs[i] != afs_init_sysname &&
+			    sysnames->subs[i] != sysnames->blank)
+				kfree(sysnames->subs[i]);
+		kfree(sysnames);
+	}
 }
 
 /*
- * open "/proc/fs/afs/<cell>/servers" which provides a summary of active
- * servers
+ * Display general per-net namespace statistics
  */
-static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
+static int afs_proc_stats_show(struct seq_file *m, void *v)
 {
-	struct afs_cell *cell;
-	struct seq_file *m;
-	int ret;
+	struct afs_net *net = afs_seq2net_single(m);
+
+	seq_puts(m, "kAFS statistics\n");
 
-	cell = PDE_DATA(inode);
-	if (!cell)
-		return -ENOENT;
+	seq_printf(m, "dir-mgmt: look=%u reval=%u inval=%u relpg=%u\n",
+		   atomic_read(&net->n_lookup),
+		   atomic_read(&net->n_reval),
+		   atomic_read(&net->n_inval),
+		   atomic_read(&net->n_relpg));
 
-	ret = seq_open(file, &afs_proc_cell_servers_ops);
-	if (ret < 0)
-		return ret;
+	seq_printf(m, "dir-data: rdpg=%u\n",
+		   atomic_read(&net->n_read_dir));
 
-	m = file->private_data;
-	m->private = cell;
+	seq_printf(m, "dir-edit: cr=%u rm=%u\n",
+		   atomic_read(&net->n_dir_cr),
+		   atomic_read(&net->n_dir_rm));
+
+	seq_printf(m, "file-rd : n=%u nb=%lu\n",
+		   atomic_read(&net->n_fetches),
+		   atomic_long_read(&net->n_fetch_bytes));
+	seq_printf(m, "file-wr : n=%u nb=%lu\n",
+		   atomic_read(&net->n_stores),
+		   atomic_long_read(&net->n_store_bytes));
 	return 0;
 }
 
 /*
- * close the file and release the ref to the cell
+ * initialise /proc/fs/afs/<cell>/
  */
-static int afs_proc_cell_servers_release(struct inode *inode,
-					 struct file *file)
+int afs_proc_cell_setup(struct afs_cell *cell)
 {
-	return seq_release(inode, file);
-}
+	struct proc_dir_entry *dir;
+	struct afs_net *net = cell->net;
 
-/*
- * set up the iterator to start reading from the cells list and return the
- * first item
- */
-static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
-	__acquires(m->private->servers_lock)
-{
-	struct afs_cell *cell = m->private;
+	_enter("%p{%s},%p", cell, cell->name, net->proc_afs);
 
-	_enter("cell=%p pos=%Ld", cell, *_pos);
+	dir = proc_net_mkdir(net->net, cell->name, net->proc_afs);
+	if (!dir)
+		goto error_dir;
 
-	/* lock the list against modification */
-	read_lock(&cell->servers_lock);
-	return seq_list_start_head(&cell->servers, *_pos);
-}
+	if (!proc_create_net_data("vlservers", 0444, dir,
+				  &afs_proc_cell_vlservers_ops,
+				  sizeof(struct afs_vl_seq_net_private),
+				  cell) ||
+	    !proc_create_net_data("volumes", 0444, dir,
+				  &afs_proc_cell_volumes_ops,
+				  sizeof(struct seq_net_private),
+				  cell))
+		goto error_tree;
 
-/*
- * move to next cell in cells list
- */
-static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
-					loff_t *_pos)
-{
-	struct afs_cell *cell = p->private;
+	_leave(" = 0");
+	return 0;
 
-	_enter("cell=%p pos=%Ld", cell, *_pos);
-	return seq_list_next(v, &cell->servers, _pos);
+error_tree:
+	remove_proc_subtree(cell->name, net->proc_afs);
+error_dir:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
 }
 
 /*
- * clean up after reading from the cells list
+ * remove /proc/fs/afs/<cell>/
  */
-static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
-	__releases(p->private->servers_lock)
+void afs_proc_cell_remove(struct afs_cell *cell)
 {
-	struct afs_cell *cell = p->private;
+	struct afs_net *net = cell->net;
 
-	read_unlock(&cell->servers_lock);
+	_enter("");
+	remove_proc_subtree(cell->name, net->proc_afs);
+	_leave("");
 }
 
 /*
- * display a header line followed by a load of volume lines
+ * initialise the /proc/fs/afs/ directory
  */
-static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
+int afs_proc_init(struct afs_net *net)
 {
-	struct afs_cell *cell = m->private;
-	struct afs_server *server = list_entry(v, struct afs_server, link);
-	char ipaddr[20];
+	struct proc_dir_entry *p;
 
-	/* display header on line 1 */
-	if (v == &cell->servers) {
-		seq_puts(m, "USE ADDR            STATE\n");
-		return 0;
-	}
+	_enter("");
 
-	/* display one cell per line on subsequent lines */
-	sprintf(ipaddr, "%pI4", &server->addr);
-	seq_printf(m, "%3d %-15.15s %5d\n",
-		   atomic_read(&server->usage), ipaddr, server->fs_state);
+	p = proc_net_mkdir(net->net, "afs", net->net->proc_net);
+	if (!p)
+		goto error_dir;
 
+	if (!proc_create_net_data_write("cells", 0644, p,
+					&afs_proc_cells_ops,
+					afs_proc_cells_write,
+					sizeof(struct seq_net_private),
+					NULL) ||
+	    !proc_create_net_single_write("rootcell", 0644, p,
+					  afs_proc_rootcell_show,
+					  afs_proc_rootcell_write,
+					  NULL) ||
+	    !proc_create_net("servers", 0444, p, &afs_proc_servers_ops,
+			     sizeof(struct seq_net_private)) ||
+	    !proc_create_net_single("stats", 0444, p, afs_proc_stats_show, NULL) ||
+	    !proc_create_net_data_write("sysname", 0644, p,
+					&afs_proc_sysname_ops,
+					afs_proc_sysname_write,
+					sizeof(struct seq_net_private),
+					NULL) ||
+	    !proc_create_net_single_write("addr_prefs", 0644, p,
+					  afs_proc_addr_prefs_show,
+					  afs_proc_addr_prefs_write,
+					  NULL))
+		goto error_tree;
+
+	net->proc_afs = p;
+	_leave(" = 0");
 	return 0;
+
+error_tree:
+	proc_remove(p);
+error_dir:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/afs/ directory
+ */
+void afs_proc_cleanup(struct afs_net *net)
+{
+	proc_remove(net->proc_afs);
+	net->proc_afs = NULL;
 }
diff --git a/fs/afs/protocol_afs.h b/fs/afs/protocol_afs.h
new file mode 100644
index 000000000000..0c39358c8b70
--- /dev/null
+++ b/fs/afs/protocol_afs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AFS protocol bits
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+
+#define AFSCAPABILITIESMAX 196 /* Maximum number of words in a capability set */
+
+/* AFS3 Fileserver capabilities word 0 */
+#define AFS3_VICED_CAPABILITY_ERRORTRANS	0x0001 /* Uses UAE errors */
+#define AFS3_VICED_CAPABILITY_64BITFILES	0x0002 /* FetchData64 & StoreData64 supported */
+#define AFS3_VICED_CAPABILITY_WRITELOCKACL	0x0004 /* Can lock a file even without lock perm */
+#define AFS3_VICED_CAPABILITY_SANEACLS		0x0008 /* ACLs reviewed for sanity - don't use */
diff --git a/fs/afs/protocol_uae.h b/fs/afs/protocol_uae.h
new file mode 100644
index 000000000000..1b3d1060bd34
--- /dev/null
+++ b/fs/afs/protocol_uae.h
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Universal AFS Error codes (UAE).
+ *
+ * Copyright (C) 2003, Daria Phoebe Brashear
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ */
+
+enum {
+	UAEPERM			= 0x2f6df00, /* Operation not permitted */
+	UAENOENT		= 0x2f6df01, /* No such file or directory */
+	UAESRCH			= 0x2f6df02, /* No such process */
+	UAEINTR			= 0x2f6df03, /* Interrupted system call */
+	UAEIO			= 0x2f6df04, /* I/O error */
+	UAENXIO			= 0x2f6df05, /* No such device or address */
+	UAE2BIG			= 0x2f6df06, /* Arg list too long */
+	UAENOEXEC		= 0x2f6df07, /* Exec format error */
+	UAEBADF			= 0x2f6df08, /* Bad file number */
+	UAECHILD		= 0x2f6df09, /* No child processes */
+	UAEAGAIN		= 0x2f6df0a, /* Try again */
+	UAENOMEM		= 0x2f6df0b, /* Out of memory */
+	UAEACCES		= 0x2f6df0c, /* Permission denied */
+	UAEFAULT		= 0x2f6df0d, /* Bad address */
+	UAENOTBLK		= 0x2f6df0e, /* Block device required */
+	UAEBUSY			= 0x2f6df0f, /* Device or resource busy */
+	UAEEXIST		= 0x2f6df10, /* File exists */
+	UAEXDEV			= 0x2f6df11, /* Cross-device link */
+	UAENODEV		= 0x2f6df12, /* No such device */
+	UAENOTDIR		= 0x2f6df13, /* Not a directory */
+	UAEISDIR		= 0x2f6df14, /* Is a directory */
+	UAEINVAL		= 0x2f6df15, /* Invalid argument */
+	UAENFILE		= 0x2f6df16, /* File table overflow */
+	UAEMFILE		= 0x2f6df17, /* Too many open files */
+	UAENOTTY		= 0x2f6df18, /* Not a typewriter */
+	UAETXTBSY		= 0x2f6df19, /* Text file busy */
+	UAEFBIG			= 0x2f6df1a, /* File too large */
+	UAENOSPC		= 0x2f6df1b, /* No space left on device */
+	UAESPIPE		= 0x2f6df1c, /* Illegal seek */
+	UAEROFS			= 0x2f6df1d, /* Read-only file system */
+	UAEMLINK		= 0x2f6df1e, /* Too many links */
+	UAEPIPE			= 0x2f6df1f, /* Broken pipe */
+	UAEDOM			= 0x2f6df20, /* Math argument out of domain of func */
+	UAERANGE		= 0x2f6df21, /* Math result not representable */
+	UAEDEADLK		= 0x2f6df22, /* Resource deadlock would occur */
+	UAENAMETOOLONG		= 0x2f6df23, /* File name too long */
+	UAENOLCK		= 0x2f6df24, /* No record locks available */
+	UAENOSYS		= 0x2f6df25, /* Function not implemented */
+	UAENOTEMPTY		= 0x2f6df26, /* Directory not empty */
+	UAELOOP			= 0x2f6df27, /* Too many symbolic links encountered */
+	UAEWOULDBLOCK		= 0x2f6df28, /* Operation would block */
+	UAENOMSG		= 0x2f6df29, /* No message of desired type */
+	UAEIDRM			= 0x2f6df2a, /* Identifier removed */
+	UAECHRNG		= 0x2f6df2b, /* Channel number out of range */
+	UAEL2NSYNC		= 0x2f6df2c, /* Level 2 not synchronized */
+	UAEL3HLT		= 0x2f6df2d, /* Level 3 halted */
+	UAEL3RST		= 0x2f6df2e, /* Level 3 reset */
+	UAELNRNG		= 0x2f6df2f, /* Link number out of range */
+	UAEUNATCH		= 0x2f6df30, /* Protocol driver not attached */
+	UAENOCSI		= 0x2f6df31, /* No CSI structure available */
+	UAEL2HLT		= 0x2f6df32, /* Level 2 halted */
+	UAEBADE			= 0x2f6df33, /* Invalid exchange */
+	UAEBADR			= 0x2f6df34, /* Invalid request descriptor */
+	UAEXFULL		= 0x2f6df35, /* Exchange full */
+	UAENOANO		= 0x2f6df36, /* No anode */
+	UAEBADRQC		= 0x2f6df37, /* Invalid request code */
+	UAEBADSLT		= 0x2f6df38, /* Invalid slot */
+	UAEBFONT		= 0x2f6df39, /* Bad font file format */
+	UAENOSTR		= 0x2f6df3a, /* Device not a stream */
+	UAENODATA		= 0x2f6df3b, /* No data available */
+	UAETIME			= 0x2f6df3c, /* Timer expired */
+	UAENOSR			= 0x2f6df3d, /* Out of streams resources */
+	UAENONET		= 0x2f6df3e, /* Machine is not on the network */
+	UAENOPKG		= 0x2f6df3f, /* Package not installed */
+	UAEREMOTE		= 0x2f6df40, /* Object is remote */
+	UAENOLINK		= 0x2f6df41, /* Link has been severed */
+	UAEADV			= 0x2f6df42, /* Advertise error */
+	UAESRMNT		= 0x2f6df43, /* Srmount error */
+	UAECOMM			= 0x2f6df44, /* Communication error on send */
+	UAEPROTO		= 0x2f6df45, /* Protocol error */
+	UAEMULTIHOP		= 0x2f6df46, /* Multihop attempted */
+	UAEDOTDOT		= 0x2f6df47, /* RFS specific error */
+	UAEBADMSG		= 0x2f6df48, /* Not a data message */
+	UAEOVERFLOW		= 0x2f6df49, /* Value too large for defined data type */
+	UAENOTUNIQ		= 0x2f6df4a, /* Name not unique on network */
+	UAEBADFD		= 0x2f6df4b, /* File descriptor in bad state */
+	UAEREMCHG		= 0x2f6df4c, /* Remote address changed */
+	UAELIBACC		= 0x2f6df4d, /* Can not access a needed shared library */
+	UAELIBBAD		= 0x2f6df4e, /* Accessing a corrupted shared library */
+	UAELIBSCN		= 0x2f6df4f, /* .lib section in a.out corrupted */
+	UAELIBMAX		= 0x2f6df50, /* Attempting to link in too many shared libraries */
+	UAELIBEXEC		= 0x2f6df51, /* Cannot exec a shared library directly */
+	UAEILSEQ		= 0x2f6df52, /* Illegal byte sequence */
+	UAERESTART		= 0x2f6df53, /* Interrupted system call should be restarted */
+	UAESTRPIPE		= 0x2f6df54, /* Streams pipe error */
+	UAEUSERS		= 0x2f6df55, /* Too many users */
+	UAENOTSOCK		= 0x2f6df56, /* Socket operation on non-socket */
+	UAEDESTADDRREQ		= 0x2f6df57, /* Destination address required */
+	UAEMSGSIZE		= 0x2f6df58, /* Message too long */
+	UAEPROTOTYPE		= 0x2f6df59, /* Protocol wrong type for socket */
+	UAENOPROTOOPT		= 0x2f6df5a, /* Protocol not available */
+	UAEPROTONOSUPPORT	= 0x2f6df5b, /* Protocol not supported */
+	UAESOCKTNOSUPPORT	= 0x2f6df5c, /* Socket type not supported */
+	UAEOPNOTSUPP		= 0x2f6df5d, /* Operation not supported on transport endpoint */
+	UAEPFNOSUPPORT		= 0x2f6df5e, /* Protocol family not supported */
+	UAEAFNOSUPPORT		= 0x2f6df5f, /* Address family not supported by protocol */
+	UAEADDRINUSE		= 0x2f6df60, /* Address already in use */
+	UAEADDRNOTAVAIL		= 0x2f6df61, /* Cannot assign requested address */
+	UAENETDOWN		= 0x2f6df62, /* Network is down */
+	UAENETUNREACH		= 0x2f6df63, /* Network is unreachable */
+	UAENETRESET		= 0x2f6df64, /* Network dropped connection because of reset */
+	UAECONNABORTED		= 0x2f6df65, /* Software caused connection abort */
+	UAECONNRESET		= 0x2f6df66, /* Connection reset by peer */
+	UAENOBUFS		= 0x2f6df67, /* No buffer space available */
+	UAEISCONN		= 0x2f6df68, /* Transport endpoint is already connected */
+	UAENOTCONN		= 0x2f6df69, /* Transport endpoint is not connected */
+	UAESHUTDOWN		= 0x2f6df6a, /* Cannot send after transport endpoint shutdown */
+	UAETOOMANYREFS		= 0x2f6df6b, /* Too many references: cannot splice */
+	UAETIMEDOUT		= 0x2f6df6c, /* Connection timed out */
+	UAECONNREFUSED		= 0x2f6df6d, /* Connection refused */
+	UAEHOSTDOWN		= 0x2f6df6e, /* Host is down */
+	UAEHOSTUNREACH		= 0x2f6df6f, /* No route to host */
+	UAEALREADY		= 0x2f6df70, /* Operation already in progress */
+	UAEINPROGRESS		= 0x2f6df71, /* Operation now in progress */
+	UAESTALE		= 0x2f6df72, /* Stale NFS file handle */
+	UAEUCLEAN		= 0x2f6df73, /* Structure needs cleaning */
+	UAENOTNAM		= 0x2f6df74, /* Not a XENIX named type file */
+	UAENAVAIL		= 0x2f6df75, /* No XENIX semaphores available */
+	UAEISNAM		= 0x2f6df76, /* Is a named type file */
+	UAEREMOTEIO		= 0x2f6df77, /* Remote I/O error */
+	UAEDQUOT		= 0x2f6df78, /* Quota exceeded */
+	UAENOMEDIUM		= 0x2f6df79, /* No medium found */
+	UAEMEDIUMTYPE		= 0x2f6df7a, /* Wrong medium type */
+};
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
new file mode 100644
index 000000000000..b2f06c1917c2
--- /dev/null
+++ b/fs/afs/protocol_yfs.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* YFS protocol bits
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define YFS_FS_SERVICE	2500
+#define YFS_CM_SERVICE	2501
+
+#define YFSCBMAX	1024
+
+enum YFS_CM_Operations {
+	YFSCBProbe		= 206,	/* probe client */
+	YFSCBGetLock		= 207,	/* get contents of CM lock table */
+	YFSCBXStatsVersion	= 209,	/* get version of extended statistics */
+	YFSCBGetXStats		= 210,	/* get contents of extended statistics data */
+	YFSCBInitCallBackState3	= 213,	/* initialise callback state, version 3 */
+	YFSCBProbeUuid		= 214,	/* check the client hasn't rebooted */
+	YFSCBGetServerPrefs	= 215,
+	YFSCBGetCellServDV	= 216,
+	YFSCBGetLocalCell	= 217,
+	YFSCBGetCacheConfig	= 218,
+	YFSCBGetCellByNum	= 65537,
+	YFSCBTellMeAboutYourself = 65538, /* get client capabilities */
+	YFSCBCallBack		= 64204,
+};
+
+enum YFS_FS_Operations {
+	YFSFETCHACL		= 64131, /* YFS Fetch file AFS3 ACL */
+	YFSFETCHSTATUS		= 64132, /* YFS Fetch file status */
+	YFSSTOREACL		= 64134, /* YFS Store file AFS3 ACL */
+	YFSSTORESTATUS		= 64135, /* YFS Store file status */
+	YFSREMOVEFILE		= 64136, /* YFS Remove a file */
+	YFSCREATEFILE		= 64137, /* YFS Create a file */
+	YFSRENAME		= 64138, /* YFS Rename or move a file or directory */
+	YFSSYMLINK		= 64139, /* YFS Create a symbolic link */
+	YFSLINK			= 64140, /* YFS Create a hard link */
+	YFSMAKEDIR		= 64141, /* YFS Create a directory */
+	YFSREMOVEDIR		= 64142, /* YFS Remove a directory */
+	YFSGETVOLUMESTATUS	= 64149, /* YFS Get volume status information */
+	YFSSETVOLUMESTATUS	= 64150, /* YFS Set volume status information */
+	YFSSETLOCK		= 64156, /* YFS Request a file lock */
+	YFSEXTENDLOCK		= 64157, /* YFS Extend a file lock */
+	YFSRELEASELOCK		= 64158, /* YFS Release a file lock */
+	YFSLOOKUP		= 64161, /* YFS lookup file in directory */
+	YFSFLUSHCPS		= 64165,
+	YFSFETCHOPAQUEACL	= 64168, /* YFS Fetch file YFS ACL */
+	YFSWHOAMI		= 64170,
+	YFSREMOVEACL		= 64171,
+	YFSREMOVEFILE2		= 64173,
+	YFSSTOREOPAQUEACL2	= 64174,
+	YFSRENAME_REPLACE	= 64176,
+	YFSRENAME_NOREPLACE	= 64177,
+	YFSRENAME_EXCHANGE	= 64187,
+	YFSINLINEBULKSTATUS	= 64536, /* YFS Fetch multiple file statuses with errors */
+	YFSFETCHDATA64		= 64537, /* YFS Fetch file data */
+	YFSSTOREDATA64		= 64538, /* YFS Store file data */
+	YFSUPDATESYMLINK	= 64540,
+};
+
+struct yfs_xdr_u64 {
+	__be32			msw;
+	__be32			lsw;
+} __packed;
+
+static inline u64 xdr_to_u64(const struct yfs_xdr_u64 x)
+{
+	return ((u64)ntohl(x.msw) << 32) | ntohl(x.lsw);
+}
+
+static inline struct yfs_xdr_u64 u64_to_xdr(const u64 x)
+{
+	return (struct yfs_xdr_u64){ .msw = htonl(x >> 32), .lsw = htonl(x) };
+}
+
+struct yfs_xdr_vnode {
+	struct yfs_xdr_u64	lo;
+	__be32			hi;
+	__be32			unique;
+} __packed;
+
+struct yfs_xdr_YFSFid {
+	struct yfs_xdr_u64	volume;
+	struct yfs_xdr_vnode	vnode;
+} __packed;
+
+
+struct yfs_xdr_YFSFetchStatus {
+	__be32			type;
+	__be32			nlink;
+	struct yfs_xdr_u64	size;
+	struct yfs_xdr_u64	data_version;
+	struct yfs_xdr_u64	author;
+	struct yfs_xdr_u64	owner;
+	struct yfs_xdr_u64	group;
+	__be32			mode;
+	__be32			caller_access;
+	__be32			anon_access;
+	struct yfs_xdr_vnode	parent;
+	__be32			data_access_protocol;
+	struct yfs_xdr_u64	mtime_client;
+	struct yfs_xdr_u64	mtime_server;
+	__be32			lock_count;
+	__be32			abort_code;
+} __packed;
+
+struct yfs_xdr_YFSCallBack {
+	__be32			version;
+	struct yfs_xdr_u64	expiration_time;
+	__be32			type;
+} __packed;
+
+struct yfs_xdr_YFSStoreStatus {
+	__be32			mask;
+	__be32			mode;
+	struct yfs_xdr_u64	mtime_client;
+	struct yfs_xdr_u64	owner;
+	struct yfs_xdr_u64	group;
+} __packed;
+
+struct yfs_xdr_RPCFlags {
+	__be32			rpc_flags;
+} __packed;
+
+struct yfs_xdr_YFSVolSync {
+	struct yfs_xdr_u64	vol_creation_date;
+	struct yfs_xdr_u64	vol_update_date;
+	struct yfs_xdr_u64	max_quota;
+	struct yfs_xdr_u64	blocks_in_use;
+	struct yfs_xdr_u64	blocks_avail;
+} __packed;
+
+enum yfs_volume_type {
+	yfs_volume_type_ro = 0,
+	yfs_volume_type_rw = 1,
+};
+
+#define yfs_FVSOnline		0x1
+#define yfs_FVSInservice	0x2
+#define yfs_FVSBlessed		0x4
+#define yfs_FVSNeedsSalvage	0x8
+
+struct yfs_xdr_YFSFetchVolumeStatus {
+	struct yfs_xdr_u64	vid;
+	struct yfs_xdr_u64	parent_id;
+	__be32			flags;
+	__be32			type;
+	struct yfs_xdr_u64	max_quota;
+	struct yfs_xdr_u64	blocks_in_use;
+	struct yfs_xdr_u64	part_blocks_avail;
+	struct yfs_xdr_u64	part_max_blocks;
+	struct yfs_xdr_u64	vol_copy_date;
+	struct yfs_xdr_u64	vol_backup_date;
+} __packed;
+
+struct yfs_xdr_YFSStoreVolumeStatus {
+	__be32			mask;
+	struct yfs_xdr_u64	min_quota;
+	struct yfs_xdr_u64	max_quota;
+	struct yfs_xdr_u64	file_quota;
+} __packed;
+
+enum yfs_lock_type {
+	yfs_LockNone		= -1,
+	yfs_LockRead		= 0,
+	yfs_LockWrite		= 1,
+	yfs_LockExtend		= 2,
+	yfs_LockRelease		= 3,
+	yfs_LockMandatoryRead	= 0x100,
+	yfs_LockMandatoryWrite	= 0x101,
+	yfs_LockMandatoryExtend	= 0x102,
+};
+
+/* RXYFS Viced Capability Flags */
+#define YFS_VICED_CAPABILITY_ERRORTRANS		0x0001 /* Deprecated v0.195 */
+#define YFS_VICED_CAPABILITY_64BITFILES		0x0002 /* Deprecated v0.195 */
+#define YFS_VICED_CAPABILITY_WRITELOCKACL	0x0004 /* Can lock a file even without lock perm */
+#define YFS_VICED_CAPABILITY_SANEACLS		0x0008 /* Deprecated v0.195 */
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
new file mode 100644
index 000000000000..6a4e7da10fc4
--- /dev/null
+++ b/fs/afs/rotate.c
@@ -0,0 +1,763 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Handle fileserver selection and rotation.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/sched/signal.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "protocol_uae.h"
+
+void afs_clear_server_states(struct afs_operation *op)
+{
+	unsigned int i;
+
+	if (op->server_states) {
+		for (i = 0; i < op->server_list->nr_servers; i++)
+			afs_put_endpoint_state(op->server_states[i].endpoint_state,
+					       afs_estate_trace_put_server_state);
+		kfree(op->server_states);
+	}
+}
+
+/*
+ * Begin iteration through a server list, starting with the vnode's last used
+ * server if possible, or the last recorded good server if not.
+ */
+static bool afs_start_fs_iteration(struct afs_operation *op,
+				   struct afs_vnode *vnode)
+{
+	struct afs_server *server;
+	void *cb_server;
+	int i;
+
+	trace_afs_rotate(op, afs_rotate_trace_start, 0);
+
+	read_lock(&op->volume->servers_lock);
+	op->server_list = afs_get_serverlist(
+		rcu_dereference_protected(op->volume->servers,
+					  lockdep_is_held(&op->volume->servers_lock)));
+	read_unlock(&op->volume->servers_lock);
+
+	op->server_states = kcalloc(op->server_list->nr_servers, sizeof(op->server_states[0]),
+				    GFP_KERNEL);
+	if (!op->server_states) {
+		afs_op_nomem(op);
+		trace_afs_rotate(op, afs_rotate_trace_nomem, 0);
+		return false;
+	}
+
+	rcu_read_lock();
+	for (i = 0; i < op->server_list->nr_servers; i++) {
+		struct afs_endpoint_state *estate;
+		struct afs_server_state *s = &op->server_states[i];
+
+		server = op->server_list->servers[i].server;
+		estate = rcu_dereference(server->endpoint_state);
+		s->endpoint_state = afs_get_endpoint_state(estate,
+							   afs_estate_trace_get_server_state);
+		s->probe_seq = estate->probe_seq;
+		s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1;
+		init_waitqueue_entry(&s->probe_waiter, current);
+		afs_get_address_preferences(op->net, estate->addresses);
+	}
+	rcu_read_unlock();
+
+
+	op->untried_servers = (1UL << op->server_list->nr_servers) - 1;
+	op->server_index = -1;
+
+	cb_server = vnode->cb_server;
+	if (cb_server) {
+		/* See if the vnode's preferred record is still available */
+		for (i = 0; i < op->server_list->nr_servers; i++) {
+			server = op->server_list->servers[i].server;
+			if (server == cb_server) {
+				op->server_index = i;
+				goto found_interest;
+			}
+		}
+
+		/* If we have a lock outstanding on a server that's no longer
+		 * serving this vnode, then we can't switch to another server
+		 * and have to return an error.
+		 */
+		if (op->flags & AFS_OPERATION_CUR_ONLY) {
+			afs_op_set_error(op, -ESTALE);
+			trace_afs_rotate(op, afs_rotate_trace_stale_lock, 0);
+			return false;
+		}
+
+		/* Note that the callback promise is effectively broken */
+		write_seqlock(&vnode->cb_lock);
+		ASSERTCMP(cb_server, ==, vnode->cb_server);
+		vnode->cb_server = NULL;
+		if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_rotate_server))
+			vnode->cb_break++;
+		write_sequnlock(&vnode->cb_lock);
+	}
+
+found_interest:
+	return true;
+}
+
+/*
+ * Post volume busy note.
+ */
+static void afs_busy(struct afs_operation *op, u32 abort_code)
+{
+	const char *m;
+
+	switch (abort_code) {
+	case VOFFLINE:		m = "offline";		break;
+	case VRESTARTING:	m = "restarting";	break;
+	case VSALVAGING:	m = "being salvaged";	break;
+	default:		m = "busy";		break;
+	}
+
+	pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n",
+		  op->volume->vid, op->volume->name, &op->server->uuid, m);
+}
+
+/*
+ * Sleep and retry the operation to the same fileserver.
+ */
+static bool afs_sleep_and_retry(struct afs_operation *op)
+{
+	trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0);
+	if (!(op->flags & AFS_OPERATION_UNINTR)) {
+		msleep_interruptible(1000);
+		if (signal_pending(current)) {
+			afs_op_set_error(op, -ERESTARTSYS);
+			return false;
+		}
+	} else {
+		msleep(1000);
+	}
+
+	return true;
+}
+
+/*
+ * Select the fileserver to use.  May be called multiple times to rotate
+ * through the fileservers.
+ */
+bool afs_select_fileserver(struct afs_operation *op)
+{
+	struct afs_addr_list *alist;
+	struct afs_server *server;
+	struct afs_vnode *vnode = op->file[0].vnode;
+	unsigned long set, failed;
+	s32 abort_code = op->call_abort_code;
+	int best_prio = 0;
+	int error = op->call_error, addr_index, i, j;
+
+	op->nr_iterations++;
+
+	_enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d",
+	       op->debug_id, op->nr_iterations, op->volume->vid,
+	       op->server_index, op->untried_servers,
+	       op->addr_index, op->addr_tried,
+	       error, abort_code);
+
+	if (op->flags & AFS_OPERATION_STOP) {
+		trace_afs_rotate(op, afs_rotate_trace_stopped, 0);
+		_leave(" = f [stopped]");
+		return false;
+	}
+
+	if (op->nr_iterations == 0)
+		goto start;
+
+	WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error);
+	trace_afs_rotate(op, afs_rotate_trace_iter, op->call_error);
+
+	/* Evaluate the result of the previous operation, if there was one. */
+	switch (op->call_error) {
+	case 0:
+		clear_bit(AFS_SE_VOLUME_OFFLINE,
+			  &op->server_list->servers[op->server_index].flags);
+		clear_bit(AFS_SE_VOLUME_BUSY,
+			  &op->server_list->servers[op->server_index].flags);
+		op->cumul_error.responded = true;
+
+		/* We succeeded, but we may need to redo the op from another
+		 * server if we're looking at a set of RO volumes where some of
+		 * the servers have not yet been brought up to date lest we
+		 * regress the data.  We only switch to the new version once
+		 * >=50% of the servers are updated.
+		 */
+		error = afs_update_volume_state(op);
+		if (error != 0) {
+			if (error == 1) {
+				afs_sleep_and_retry(op);
+				goto restart_from_beginning;
+			}
+			afs_op_set_error(op, error);
+			goto failed;
+		}
+		fallthrough;
+	default:
+		/* Success or local failure.  Stop. */
+		afs_op_set_error(op, error);
+		op->flags |= AFS_OPERATION_STOP;
+		trace_afs_rotate(op, afs_rotate_trace_stop, error);
+		_leave(" = f [okay/local %d]", error);
+		return false;
+
+	case -ECONNABORTED:
+		/* The far side rejected the operation on some grounds.  This
+		 * might involve the server being busy or the volume having been moved.
+		 *
+		 * Note that various V* errors should not be sent to a cache manager
+		 * by a fileserver as they should be translated to more modern UAE*
+		 * errors instead.  IBM AFS and OpenAFS fileservers, however, do leak
+		 * these abort codes.
+		 */
+		trace_afs_rotate(op, afs_rotate_trace_aborted, abort_code);
+		op->cumul_error.responded = true;
+		switch (abort_code) {
+		case VNOVOL:
+			/* This fileserver doesn't know about the volume.
+			 * - May indicate that the VL is wrong - retry once and compare
+			 *   the results.
+			 * - May indicate that the fileserver couldn't attach to the vol.
+			 * - The volume might have been temporarily removed so that it can
+			 *   be replaced by a volume restore.  "vos" might have ended one
+			 *   transaction and has yet to create the next.
+			 * - The volume might not be blessed or might not be in-service
+			 *   (administrative action).
+			 */
+			if (op->flags & AFS_OPERATION_VNOVOL) {
+				afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
+				goto next_server;
+			}
+
+			write_lock(&op->volume->servers_lock);
+			op->server_list->vnovol_mask |= 1 << op->server_index;
+			write_unlock(&op->volume->servers_lock);
+
+			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
+			error = afs_check_volume_status(op->volume, op);
+			if (error < 0) {
+				afs_op_set_error(op, error);
+				goto failed;
+			}
+
+			if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
+				afs_op_set_error(op, -ENOMEDIUM);
+				goto failed;
+			}
+
+			/* If the server list didn't change, then assume that
+			 * it's the fileserver having trouble.
+			 */
+			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
+				afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
+				goto next_server;
+			}
+
+			/* Try again */
+			op->flags |= AFS_OPERATION_VNOVOL;
+			_leave(" = t [vnovol]");
+			return true;
+
+		case VVOLEXISTS:
+		case VONLINE:
+			/* These should not be returned from the fileserver. */
+			pr_warn("Fileserver returned unexpected abort %d\n",
+				abort_code);
+			afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
+			goto next_server;
+
+		case VNOSERVICE:
+			/* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver
+			 * if the volume was neither in-service nor administratively
+			 * blessed.  All usage was replaced by VNOVOL because AFS 3.1 and
+			 * earlier cache managers did not handle VNOSERVICE and assumed
+			 * it was the client OSes errno 105.
+			 *
+			 * Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the
+			 * fileserver idle dead time error which was sent in place of
+			 * RX_CALL_TIMEOUT (-3).  The error was intended to be sent if the
+			 * fileserver took too long to send a reply to the client.
+			 * RX_CALL_TIMEOUT would have caused the cache manager to mark the
+			 * server down whereas VNOSERVICE since AFS 3.2 would cause cache
+			 * manager to temporarily (up to 15 minutes) mark the volume
+			 * instance as unusable.
+			 *
+			 * The idle dead logic resulted in cache inconsistency since a
+			 * state changing call that the cache manager assumed was dead
+			 * could still be processed to completion by the fileserver.  This
+			 * logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer
+			 * returned.  However, many 1.4.8 through 1.6.24 fileservers are
+			 * still in existence.
+			 *
+			 * AuriStorFS fileservers have never returned VNOSERVICE.
+			 *
+			 * VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT.
+			 */
+		case RX_CALL_TIMEOUT:
+			afs_op_accumulate_error(op, -ETIMEDOUT, abort_code);
+			goto next_server;
+
+		case VSALVAGING: /* This error should not be leaked to cache managers
+				  * but is from OpenAFS demand attach fileservers.
+				  * It should be treated as an alias for VOFFLINE.
+				  */
+		case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */
+		case VOFFLINE:
+			/* The volume is in use by the volserver or another volume utility
+			 * for an operation that might alter the contents.  The volume is
+			 * expected to come back but it might take a long time (could be
+			 * days).
+			 */
+			if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE,
+					      &op->server_list->servers[op->server_index].flags)) {
+				afs_busy(op, abort_code);
+				clear_bit(AFS_SE_VOLUME_BUSY,
+					  &op->server_list->servers[op->server_index].flags);
+			}
+			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
+				afs_op_set_error(op, -EADV);
+				goto failed;
+			}
+			goto busy;
+
+		case VRESTARTING: /* The fileserver is either shutting down or starting up. */
+		case VBUSY:
+			/* The volume is in use by the volserver or another volume
+			 * utility for an operation that is not expected to alter the
+			 * contents of the volume.  VBUSY does not need to be returned
+			 * for a ROVOL or BACKVOL bound to an ITBusy volserver
+			 * transaction.  The fileserver is permitted to continue serving
+			 * content from ROVOLs and BACKVOLs during an ITBusy transaction
+			 * because the content will not change.  However, many fileserver
+			 * releases do return VBUSY for ROVOL and BACKVOL instances under
+			 * many circumstances.
+			 *
+			 * Retry after going round all the servers unless we have a file
+			 * lock we need to maintain.
+			 */
+			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
+				afs_op_set_error(op, -EBUSY);
+				goto failed;
+			}
+			if (!test_and_set_bit(AFS_SE_VOLUME_BUSY,
+					      &op->server_list->servers[op->server_index].flags)) {
+				afs_busy(op, abort_code);
+				clear_bit(AFS_SE_VOLUME_OFFLINE,
+					  &op->server_list->servers[op->server_index].flags);
+			}
+		busy:
+			if (op->flags & AFS_OPERATION_CUR_ONLY) {
+				if (!afs_sleep_and_retry(op))
+					goto failed;
+
+				/* Retry with same server & address */
+				_leave(" = t [vbusy]");
+				return true;
+			}
+
+			op->flags |= AFS_OPERATION_VBUSY;
+			goto next_server;
+
+		case VMOVED:
+			/* The volume migrated to another server.  We consider
+			 * consider all locks and callbacks broken and request
+			 * an update from the VLDB.
+			 *
+			 * We also limit the number of VMOVED hops we will
+			 * honour, just in case someone sets up a loop.
+			 */
+			if (op->flags & AFS_OPERATION_VMOVED) {
+				afs_op_set_error(op, -EREMOTEIO);
+				goto failed;
+			}
+			op->flags |= AFS_OPERATION_VMOVED;
+
+			set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
+			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
+			error = afs_check_volume_status(op->volume, op);
+			if (error < 0) {
+				afs_op_set_error(op, error);
+				goto failed;
+			}
+
+			/* If the server list didn't change, then the VLDB is
+			 * out of sync with the fileservers.  This is hopefully
+			 * a temporary condition, however, so we don't want to
+			 * permanently block access to the file.
+			 *
+			 * TODO: Try other fileservers if we can.
+			 *
+			 * TODO: Retry a few times with sleeps.
+			 */
+			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
+				afs_op_accumulate_error(op, -ENOMEDIUM, abort_code);
+				goto failed;
+			}
+
+			goto restart_from_beginning;
+
+		case UAEIO:
+		case VIO:
+			afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
+			if (op->volume->type != AFSVL_RWVOL)
+				goto next_server;
+			goto failed;
+
+		case VDISKFULL:
+		case UAENOSPC:
+			/* The partition is full.  Only applies to RWVOLs.
+			 * Translate locally and return ENOSPC.
+			 * No replicas to failover to.
+			 */
+			afs_op_set_error(op, -ENOSPC);
+			goto failed_but_online;
+
+		case VOVERQUOTA:
+		case UAEDQUOT:
+			/* Volume is full.  Only applies to RWVOLs.
+			 * Translate locally and return EDQUOT.
+			 * No replicas to failover to.
+			 */
+			afs_op_set_error(op, -EDQUOT);
+			goto failed_but_online;
+
+		case RX_INVALID_OPERATION:
+		case RXGEN_OPCODE:
+			/* Handle downgrading to an older operation. */
+			afs_op_set_error(op, -ENOTSUPP);
+			if (op->flags & AFS_OPERATION_DOWNGRADE) {
+				op->flags &= ~AFS_OPERATION_DOWNGRADE;
+				goto go_again;
+			}
+			goto failed_but_online;
+
+		default:
+			afs_op_accumulate_error(op, error, abort_code);
+		failed_but_online:
+			clear_bit(AFS_SE_VOLUME_OFFLINE,
+				  &op->server_list->servers[op->server_index].flags);
+			clear_bit(AFS_SE_VOLUME_BUSY,
+				  &op->server_list->servers[op->server_index].flags);
+			goto failed;
+		}
+
+	case -ETIMEDOUT:
+	case -ETIME:
+		if (afs_op_error(op) != -EDESTADDRREQ)
+			goto iterate_address;
+		fallthrough;
+	case -ERFKILL:
+	case -EADDRNOTAVAIL:
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -EHOSTDOWN:
+	case -ECONNREFUSED:
+		_debug("no conn");
+		afs_op_accumulate_error(op, error, 0);
+		goto iterate_address;
+
+	case -ENETRESET:
+		pr_warn("kAFS: Peer reset %s (op=%x)\n",
+			op->type ? op->type->name : "???", op->debug_id);
+		fallthrough;
+	case -ECONNRESET:
+		_debug("call reset");
+		afs_op_set_error(op, error);
+		goto failed;
+	}
+
+restart_from_beginning:
+	trace_afs_rotate(op, afs_rotate_trace_restart, 0);
+	_debug("restart");
+	op->estate = NULL;
+	op->server = NULL;
+	afs_clear_server_states(op);
+	op->server_states = NULL;
+	afs_put_serverlist(op->net, op->server_list);
+	op->server_list = NULL;
+start:
+	_debug("start");
+	ASSERTCMP(op->estate, ==, NULL);
+	/* See if we need to do an update of the volume record.  Note that the
+	 * volume may have moved or even have been deleted.
+	 */
+	error = afs_check_volume_status(op->volume, op);
+	trace_afs_rotate(op, afs_rotate_trace_check_vol_status, error);
+	if (error < 0) {
+		afs_op_set_error(op, error);
+		goto failed;
+	}
+
+	if (!afs_start_fs_iteration(op, vnode))
+		goto failed;
+
+	_debug("__ VOL %llx __", op->volume->vid);
+
+pick_server:
+	_debug("pick [%lx]", op->untried_servers);
+	ASSERTCMP(op->estate, ==, NULL);
+
+	error = afs_wait_for_fs_probes(op, op->server_states,
+				       !(op->flags & AFS_OPERATION_UNINTR));
+	switch (error) {
+	case 0: /* No untried responsive servers and no outstanding probes */
+		trace_afs_rotate(op, afs_rotate_trace_probe_none, 0);
+		goto no_more_servers;
+	case 1: /* Got a response */
+		trace_afs_rotate(op, afs_rotate_trace_probe_response, 0);
+		break;
+	case 2: /* Probe data superseded */
+		trace_afs_rotate(op, afs_rotate_trace_probe_superseded, 0);
+		goto restart_from_beginning;
+	default:
+		trace_afs_rotate(op, afs_rotate_trace_probe_error, error);
+		afs_op_set_error(op, error);
+		goto failed;
+	}
+
+	/* Pick the untried server with the highest priority untried endpoint.
+	 * If we have outstanding callbacks, we stick with the server we're
+	 * already using if we can.
+	 */
+	if (op->server) {
+		_debug("server %u", op->server_index);
+		if (test_bit(op->server_index, &op->untried_servers))
+			goto selected_server;
+		op->server = NULL;
+		_debug("no server");
+	}
+
+	rcu_read_lock();
+	op->server_index = -1;
+	best_prio = -1;
+	for (i = 0; i < op->server_list->nr_servers; i++) {
+		struct afs_endpoint_state *es;
+		struct afs_server_entry *se = &op->server_list->servers[i];
+		struct afs_addr_list *sal;
+		struct afs_server *s = se->server;
+
+		if (!test_bit(i, &op->untried_servers) ||
+		    test_bit(AFS_SE_EXCLUDED, &se->flags) ||
+		    !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
+			continue;
+		es = op->server_states[i].endpoint_state;
+		sal = es->addresses;
+
+		afs_get_address_preferences_rcu(op->net, sal);
+		for (j = 0; j < sal->nr_addrs; j++) {
+			if (es->failed_set & (1 << j))
+				continue;
+			if (!sal->addrs[j].peer)
+				continue;
+			if (sal->addrs[j].prio > best_prio) {
+				op->server_index = i;
+				best_prio = sal->addrs[j].prio;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	if (op->server_index == -1)
+		goto no_more_servers;
+
+selected_server:
+	trace_afs_rotate(op, afs_rotate_trace_selected_server, best_prio);
+	_debug("use %d prio %u", op->server_index, best_prio);
+	__clear_bit(op->server_index, &op->untried_servers);
+
+	/* We're starting on a different fileserver from the list.  We need to
+	 * check it, create a callback intercept, find its address list and
+	 * probe its capabilities before we use it.
+	 */
+	ASSERTCMP(op->estate, ==, NULL);
+	server = op->server_list->servers[op->server_index].server;
+
+	if (!afs_check_server_record(op, server, op->key))
+		goto failed;
+
+	_debug("USING SERVER: %pU", &server->uuid);
+
+	op->flags |= AFS_OPERATION_RETRY_SERVER;
+	op->server = server;
+	if (vnode->cb_server != server) {
+		vnode->cb_server = server;
+		vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
+		afs_clear_cb_promise(vnode, afs_cb_promise_clear_server_change);
+	}
+
+retry_server:
+	op->addr_tried = 0;
+	op->addr_index = -1;
+
+iterate_address:
+	/* Iterate over the current server's address list to try and find an
+	 * address on which it will respond to us.
+	 */
+	op->estate = op->server_states[op->server_index].endpoint_state;
+	set = READ_ONCE(op->estate->responsive_set);
+	failed = READ_ONCE(op->estate->failed_set);
+	_debug("iterate ES=%x rs=%lx fs=%lx", op->estate->probe_seq, set, failed);
+	set &= ~(failed | op->addr_tried);
+	trace_afs_rotate(op, afs_rotate_trace_iterate_addr, set);
+	if (!set)
+		goto wait_for_more_probe_results;
+
+	alist = op->estate->addresses;
+	best_prio = -1;
+	addr_index = 0;
+	for (i = 0; i < alist->nr_addrs; i++) {
+		if (!(set & (1 << i)))
+			continue;
+		if (alist->addrs[i].prio > best_prio) {
+			addr_index = i;
+			best_prio = alist->addrs[i].prio;
+		}
+	}
+
+	alist->preferred = addr_index;
+
+	op->addr_index = addr_index;
+	set_bit(addr_index, &op->addr_tried);
+
+	_debug("address [%u] %u/%u %pISp",
+	       op->server_index, addr_index, alist->nr_addrs,
+	       rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer));
+go_again:
+	op->volsync.creation = TIME64_MIN;
+	op->volsync.update = TIME64_MIN;
+	op->call_responded = false;
+	_leave(" = t");
+	return true;
+
+wait_for_more_probe_results:
+	error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
+					  !(op->flags & AFS_OPERATION_UNINTR));
+	if (error == 1)
+		goto iterate_address;
+	if (!error)
+		goto restart_from_beginning;
+
+	/* We've now had a failure to respond on all of a server's addresses -
+	 * immediately probe them again and consider retrying the server.
+	 */
+	trace_afs_rotate(op, afs_rotate_trace_probe_fileserver, 0);
+	afs_probe_fileserver(op->net, op->server);
+	if (op->flags & AFS_OPERATION_RETRY_SERVER) {
+		error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
+						  !(op->flags & AFS_OPERATION_UNINTR));
+		switch (error) {
+		case 1:
+			op->flags &= ~AFS_OPERATION_RETRY_SERVER;
+			trace_afs_rotate(op, afs_rotate_trace_retry_server, 1);
+			goto retry_server;
+		case 0:
+			trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
+			goto restart_from_beginning;
+		case -ERESTARTSYS:
+			afs_op_set_error(op, error);
+			goto failed;
+		case -ETIME:
+		case -EDESTADDRREQ:
+			goto next_server;
+		}
+	}
+
+next_server:
+	trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
+	_debug("next");
+	op->estate = NULL;
+	goto pick_server;
+
+no_more_servers:
+	/* That's all the servers poked to no good effect.  Try again if some
+	 * of them were busy.
+	 */
+	trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0);
+	if (op->flags & AFS_OPERATION_VBUSY) {
+		afs_sleep_and_retry(op);
+		op->flags &= ~AFS_OPERATION_VBUSY;
+		goto restart_from_beginning;
+	}
+
+	rcu_read_lock();
+	for (i = 0; i < op->server_list->nr_servers; i++) {
+		struct afs_endpoint_state *estate;
+
+		estate = op->server_states[i].endpoint_state;
+		error = READ_ONCE(estate->error);
+		if (error < 0)
+			afs_op_accumulate_error(op, error, estate->abort_code);
+	}
+	rcu_read_unlock();
+
+failed:
+	trace_afs_rotate(op, afs_rotate_trace_failed, 0);
+	op->flags |= AFS_OPERATION_STOP;
+	op->estate = NULL;
+	_leave(" = f [failed %d]", afs_op_error(op));
+	return false;
+}
+
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+void afs_dump_edestaddrreq(const struct afs_operation *op)
+{
+	static int count;
+	int i;
+
+	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+		return;
+	count++;
+
+	rcu_read_lock();
+
+	pr_notice("EDESTADDR occurred\n");
+	pr_notice("OP: cbb=%x cbb2=%x fl=%x err=%hd\n",
+		  op->file[0].cb_break_before,
+		  op->file[1].cb_break_before, op->flags, op->cumul_error.error);
+	pr_notice("OP: ut=%lx ix=%d ni=%u\n",
+		  op->untried_servers, op->server_index, op->nr_iterations);
+	pr_notice("OP: call  er=%d ac=%d r=%u\n",
+		  op->call_error, op->call_abort_code, op->call_responded);
+
+	if (op->server_list) {
+		const struct afs_server_list *sl = op->server_list;
+
+		pr_notice("FC: SL nr=%u vnov=%hx\n",
+			  sl->nr_servers, sl->vnovol_mask);
+		for (i = 0; i < sl->nr_servers; i++) {
+			const struct afs_server *s = sl->servers[i].server;
+			const struct afs_endpoint_state *e =
+				rcu_dereference(s->endpoint_state);
+			const struct afs_addr_list *a = e->addresses;
+
+			pr_notice("FC: server fl=%lx av=%u %pU\n",
+				  s->flags, s->addr_version, &s->uuid);
+			pr_notice("FC:  - pq=%x R=%lx F=%lx\n",
+				  e->probe_seq, e->responsive_set, e->failed_set);
+			if (a) {
+				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
+					  a->version,
+					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
+					  a->preferred);
+				if (a == e->addresses)
+					pr_notice("FC:  - current\n");
+			}
+		}
+	}
+
+	pr_notice("AC: t=%lx ax=%d\n", op->addr_tried, op->addr_index);
+	rcu_read_unlock();
+}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 8ad8c2a0703a..bf0e4ea0aafd 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -1,68 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* Maintain an RxRPC server socket to do AFS communications through
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
-#include <rxrpc/packet.h>
 #include "internal.h"
 #include "afs_cm.h"
+#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
 
-static struct socket *afs_socket; /* my RxRPC socket */
-static struct workqueue_struct *afs_async_calls;
-static atomic_t afs_outstanding_calls;
-static atomic_t afs_outstanding_skbs;
+struct workqueue_struct *afs_async_calls;
 
-static void afs_wake_up_call_waiter(struct afs_call *);
-static int afs_wait_for_call_to_complete(struct afs_call *);
-static void afs_wake_up_async_call(struct afs_call *);
-static int afs_dont_wait_for_call_to_complete(struct afs_call *);
+static void afs_deferred_free_worker(struct work_struct *work);
+static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
 static void afs_process_async_call(struct work_struct *);
-static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
-static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
-
-/* synchronous call management */
-const struct afs_wait_mode afs_sync_call = {
-	.rx_wakeup	= afs_wake_up_call_waiter,
-	.wait		= afs_wait_for_call_to_complete,
-};
-
-/* asynchronous call management */
-const struct afs_wait_mode afs_async_call = {
-	.rx_wakeup	= afs_wake_up_async_call,
-	.wait		= afs_dont_wait_for_call_to_complete,
-};
-
-/* asynchronous incoming call management */
-static const struct afs_wait_mode afs_async_incoming_call = {
-	.rx_wakeup	= afs_wake_up_async_call,
+static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID);
+static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob);
+static int afs_deliver_cm_op_id(struct afs_call *);
+
+static const struct rxrpc_kernel_ops afs_rxrpc_callback_ops = {
+	.notify_new_call	= afs_rx_new_call,
+	.discard_new_call	= afs_rx_discard_new_call,
+	.user_attach_call	= afs_rx_attach,
+	.notify_oob		= afs_rx_notify_oob,
 };
 
 /* asynchronous incoming call initial processing */
 static const struct afs_call_type afs_RXCMxxxx = {
 	.name		= "CB.xxxx",
 	.deliver	= afs_deliver_cm_op_id,
-	.abort_to_error	= afs_abort_to_error,
 };
 
-static void afs_collect_incoming_call(struct work_struct *);
-
-static struct sk_buff_head afs_incoming_calls;
-static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
-
 /*
  * open an RxRPC socket and bind it to be a server for callback notifications
  * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
  */
-int afs_open_socket(void)
+int afs_open_socket(struct afs_net *net)
 {
 	struct sockaddr_rxrpc srx;
 	struct socket *socket;
@@ -70,157 +53,254 @@ int afs_open_socket(void)
 
 	_enter("");
 
-	skb_queue_head_init(&afs_incoming_calls);
-
-	afs_async_calls = create_singlethread_workqueue("kafsd");
-	if (!afs_async_calls) {
-		_leave(" = -ENOMEM [wq]");
-		return -ENOMEM;
-	}
-
-	ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
-	if (ret < 0) {
-		destroy_workqueue(afs_async_calls);
-		_leave(" = %d [socket]", ret);
-		return ret;
-	}
+	ret = sock_create_kern(net->net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket);
+	if (ret < 0)
+		goto error_1;
 
 	socket->sk->sk_allocation = GFP_NOFS;
+	socket->sk->sk_user_data = net;
 
 	/* bind the callback manager's address to make this a server socket */
+	memset(&srx, 0, sizeof(srx));
 	srx.srx_family			= AF_RXRPC;
 	srx.srx_service			= CM_SERVICE;
 	srx.transport_type		= SOCK_DGRAM;
-	srx.transport_len		= sizeof(srx.transport.sin);
-	srx.transport.sin.sin_family	= AF_INET;
-	srx.transport.sin.sin_port	= htons(AFS_CM_PORT);
-	memset(&srx.transport.sin.sin_addr, 0,
-	       sizeof(srx.transport.sin.sin_addr));
-
-	ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
-	if (ret < 0) {
-		sock_release(socket);
-		destroy_workqueue(afs_async_calls);
-		_leave(" = %d [bind]", ret);
-		return ret;
+	srx.transport_len		= sizeof(srx.transport.sin6);
+	srx.transport.sin6.sin6_family	= AF_INET6;
+	srx.transport.sin6.sin6_port	= htons(AFS_CM_PORT);
+
+	ret = rxrpc_sock_set_min_security_level(socket->sk,
+						RXRPC_SECURITY_ENCRYPT);
+	if (ret < 0)
+		goto error_2;
+
+	ret = rxrpc_sock_set_manage_response(socket->sk, true);
+	if (ret < 0)
+		goto error_2;
+
+	ret = afs_create_token_key(net, socket);
+	if (ret < 0)
+		pr_err("Couldn't create RxGK CM key: %d\n", ret);
+
+	ret = kernel_bind(socket, (struct sockaddr_unsized *) &srx, sizeof(srx));
+	if (ret == -EADDRINUSE) {
+		srx.transport.sin6.sin6_port = 0;
+		ret = kernel_bind(socket, (struct sockaddr_unsized *) &srx, sizeof(srx));
 	}
+	if (ret < 0)
+		goto error_2;
+
+	srx.srx_service = YFS_CM_SERVICE;
+	ret = kernel_bind(socket, (struct sockaddr_unsized *) &srx, sizeof(srx));
+	if (ret < 0)
+		goto error_2;
+
+	/* Ideally, we'd turn on service upgrade here, but we can't because
+	 * OpenAFS is buggy and leaks the userStatus field from packet to
+	 * packet and between FS packets and CB packets - so if we try to do an
+	 * upgrade on an FS packet, OpenAFS will leak that into the CB packet
+	 * it sends back to us.
+	 */
 
-	rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
+	rxrpc_kernel_set_notifications(socket, &afs_rxrpc_callback_ops);
 
-	afs_socket = socket;
+	ret = kernel_listen(socket, INT_MAX);
+	if (ret < 0)
+		goto error_2;
+
+	net->socket = socket;
+	afs_charge_preallocation(&net->charge_preallocation_work);
 	_leave(" = 0");
 	return 0;
+
+error_2:
+	sock_release(socket);
+error_1:
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
  * close the RxRPC socket AFS was using
  */
-void afs_close_socket(void)
+void afs_close_socket(struct afs_net *net)
 {
 	_enter("");
 
-	sock_release(afs_socket);
+	kernel_listen(net->socket, 0);
+	flush_workqueue(afs_async_calls);
 
-	_debug("dework");
-	destroy_workqueue(afs_async_calls);
+	if (net->spare_incoming_call) {
+		afs_put_call(net->spare_incoming_call);
+		net->spare_incoming_call = NULL;
+	}
+
+	_debug("outstanding %u", atomic_read(&net->nr_outstanding_calls));
+	wait_var_event(&net->nr_outstanding_calls,
+		       !atomic_read(&net->nr_outstanding_calls));
+	_debug("no outstanding calls");
 
-	ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0);
-	ASSERTCMP(atomic_read(&afs_outstanding_calls), ==, 0);
+	kernel_sock_shutdown(net->socket, SHUT_RDWR);
+	flush_workqueue(afs_async_calls);
+	net->socket->sk->sk_user_data = NULL;
+	sock_release(net->socket);
+	key_put(net->fs_cm_token_key);
+
+	_debug("dework");
 	_leave("");
 }
 
 /*
- * note that the data in a socket buffer is now delivered and that the buffer
- * should be freed
+ * Allocate a call.
  */
-static void afs_data_delivered(struct sk_buff *skb)
+static struct afs_call *afs_alloc_call(struct afs_net *net,
+				       const struct afs_call_type *type,
+				       gfp_t gfp)
 {
-	if (!skb) {
-		_debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
-		dump_stack();
-	} else {
-		_debug("DLVR %p{%u} [%d]",
-		       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-		if (atomic_dec_return(&afs_outstanding_skbs) == -1)
-			BUG();
-		rxrpc_kernel_data_delivered(skb);
+	struct afs_call *call;
+	int o;
+
+	call = kzalloc(sizeof(*call), gfp);
+	if (!call)
+		return NULL;
+
+	call->type = type;
+	call->net = net;
+	call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+	refcount_set(&call->ref, 1);
+	INIT_WORK(&call->async_work, type->async_rx ?: afs_process_async_call);
+	INIT_WORK(&call->work, call->type->work);
+	INIT_WORK(&call->free_work, afs_deferred_free_worker);
+	init_waitqueue_head(&call->waitq);
+	spin_lock_init(&call->state_lock);
+	call->iter = &call->def_iter;
+
+	o = atomic_inc_return(&net->nr_outstanding_calls);
+	trace_afs_call(call->debug_id, afs_call_trace_alloc, 1, o,
+		       __builtin_return_address(0));
+	return call;
+}
+
+static void afs_free_call(struct afs_call *call)
+{
+	struct afs_net *net = call->net;
+	int o;
+
+	ASSERT(!work_pending(&call->async_work));
+
+	rxrpc_kernel_put_peer(call->peer);
+
+	if (call->rxcall) {
+		rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
+		rxrpc_kernel_put_call(net->socket, call->rxcall);
+		call->rxcall = NULL;
 	}
+	if (call->type->destructor)
+		call->type->destructor(call);
+
+	afs_unuse_server_notime(call->net, call->server, afs_server_trace_unuse_call);
+	kfree(call->request);
+
+	o = atomic_read(&net->nr_outstanding_calls);
+	trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
+		       __builtin_return_address(0));
+	kfree(call);
+
+	o = atomic_dec_return(&net->nr_outstanding_calls);
+	if (o == 0)
+		wake_up_var(&net->nr_outstanding_calls);
 }
 
 /*
- * free a socket buffer
+ * Dispose of a reference on a call.
  */
-static void afs_free_skb(struct sk_buff *skb)
+void afs_put_call(struct afs_call *call)
 {
-	if (!skb) {
-		_debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs));
-		dump_stack();
-	} else {
-		_debug("FREE %p{%u} [%d]",
-		       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-		if (atomic_dec_return(&afs_outstanding_skbs) == -1)
-			BUG();
-		rxrpc_kernel_free_skb(skb);
-	}
+	struct afs_net *net = call->net;
+	unsigned int debug_id = call->debug_id;
+	bool zero;
+	int r, o;
+
+	zero = __refcount_dec_and_test(&call->ref, &r);
+	o = atomic_read(&net->nr_outstanding_calls);
+	trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
+		       __builtin_return_address(0));
+	if (zero)
+		afs_free_call(call);
+}
+
+static void afs_deferred_free_worker(struct work_struct *work)
+{
+	struct afs_call *call = container_of(work, struct afs_call, free_work);
+
+	afs_free_call(call);
 }
 
 /*
- * free a call
+ * Dispose of a reference on a call, deferring the cleanup to a workqueue
+ * to avoid lock recursion.
  */
-static void afs_free_call(struct afs_call *call)
+void afs_deferred_put_call(struct afs_call *call)
 {
-	_debug("DONE %p{%s} [%d]",
-	       call, call->type->name, atomic_read(&afs_outstanding_calls));
-	if (atomic_dec_return(&afs_outstanding_calls) == -1)
-		BUG();
-
-	ASSERTCMP(call->rxcall, ==, NULL);
-	ASSERT(!work_pending(&call->async_work));
-	ASSERT(skb_queue_empty(&call->rx_queue));
-	ASSERT(call->type->name != NULL);
+	struct afs_net *net = call->net;
+	unsigned int debug_id = call->debug_id;
+	bool zero;
+	int r, o;
+
+	zero = __refcount_dec_and_test(&call->ref, &r);
+	o = atomic_read(&net->nr_outstanding_calls);
+	trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
+		       __builtin_return_address(0));
+	if (zero)
+		schedule_work(&call->free_work);
+}
 
-	kfree(call->request);
-	kfree(call);
+/*
+ * Queue the call for actual work.
+ */
+static void afs_queue_call_work(struct afs_call *call)
+{
+	if (call->type->work) {
+		afs_get_call(call, afs_call_trace_work);
+		if (!queue_work(afs_wq, &call->work))
+			afs_put_call(call);
+	}
 }
 
 /*
  * allocate a call with flat request and reply buffers
  */
-struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
-				     size_t request_size, size_t reply_size)
+struct afs_call *afs_alloc_flat_call(struct afs_net *net,
+				     const struct afs_call_type *type,
+				     size_t request_size, size_t reply_max)
 {
 	struct afs_call *call;
 
-	call = kzalloc(sizeof(*call), GFP_NOFS);
+	call = afs_alloc_call(net, type, GFP_NOFS);
 	if (!call)
 		goto nomem_call;
 
-	_debug("CALL %p{%s} [%d]",
-	       call, type->name, atomic_read(&afs_outstanding_calls));
-	atomic_inc(&afs_outstanding_calls);
-
-	call->type = type;
-	call->request_size = request_size;
-	call->reply_max = reply_size;
-
 	if (request_size) {
+		call->request_size = request_size;
 		call->request = kmalloc(request_size, GFP_NOFS);
 		if (!call->request)
 			goto nomem_free;
 	}
 
-	if (reply_size) {
-		call->buffer = kmalloc(reply_size, GFP_NOFS);
+	if (reply_max) {
+		call->reply_max = reply_max;
+		call->buffer = kmalloc(reply_max, GFP_NOFS);
 		if (!call->buffer)
 			goto nomem_free;
 	}
 
+	afs_extract_to_buf(call, call->reply_max);
+	call->operation_ID = type->op;
 	init_waitqueue_head(&call->waitq);
-	skb_queue_head_init(&call->rx_queue);
 	return call;
 
 nomem_free:
-	afs_free_call(call);
+	afs_put_call(call);
 nomem_call:
 	return NULL;
 }
@@ -239,114 +319,79 @@ void afs_flat_call_destructor(struct afs_call *call)
 }
 
 /*
- * attach the data from a bunch of pages on an inode to a call
+ * Advance the AFS call state when the RxRPC call ends the transmit phase.
  */
-static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
-			  struct kvec *iov)
+static void afs_notify_end_request_tx(struct sock *sock,
+				      struct rxrpc_call *rxcall,
+				      unsigned long call_user_ID)
 {
-	struct page *pages[8];
-	unsigned count, n, loop, offset, to;
-	pgoff_t first = call->first, last = call->last;
-	int ret;
-
-	_enter("");
+	struct afs_call *call = (struct afs_call *)call_user_ID;
 
-	offset = call->first_offset;
-	call->first_offset = 0;
-
-	do {
-		_debug("attach %lx-%lx", first, last);
-
-		count = last - first + 1;
-		if (count > ARRAY_SIZE(pages))
-			count = ARRAY_SIZE(pages);
-		n = find_get_pages_contig(call->mapping, first, count, pages);
-		ASSERTCMP(n, ==, count);
-
-		loop = 0;
-		do {
-			msg->msg_flags = 0;
-			to = PAGE_SIZE;
-			if (first + loop >= last)
-				to = call->last_to;
-			else
-				msg->msg_flags = MSG_MORE;
-			iov->iov_base = kmap(pages[loop]) + offset;
-			iov->iov_len = to - offset;
-			offset = 0;
-
-			_debug("- range %u-%u%s",
-			       offset, to, msg->msg_flags ? " [more]" : "");
-			msg->msg_iov = (struct iovec *) iov;
-			msg->msg_iovlen = 1;
-
-			/* have to change the state *before* sending the last
-			 * packet as RxRPC might give us the reply before it
-			 * returns from sending the request */
-			if (first + loop >= last)
-				call->state = AFS_CALL_AWAIT_REPLY;
-			ret = rxrpc_kernel_send_data(call->rxcall, msg,
-						     to - offset);
-			kunmap(pages[loop]);
-			if (ret < 0)
-				break;
-		} while (++loop < count);
-		first += count;
-
-		for (loop = 0; loop < count; loop++)
-			put_page(pages[loop]);
-		if (ret < 0)
-			break;
-	} while (first <= last);
-
-	_leave(" = %d", ret);
-	return ret;
+	afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY);
 }
 
 /*
- * initiate a call
+ * Initiate a call and synchronously queue up the parameters for dispatch.  Any
+ * error is stored into the call struct, which the caller must check for.
  */
-int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
-		  const struct afs_wait_mode *wait_mode)
+void afs_make_call(struct afs_call *call, gfp_t gfp)
 {
-	struct sockaddr_rxrpc srx;
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
+	size_t len;
+	s64 tx_total_len;
 	int ret;
-	struct sk_buff *skb;
 
-	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
+	_enter(",{%pISp+%u},", rxrpc_kernel_remote_addr(call->peer), call->service_id);
 
 	ASSERT(call->type != NULL);
 	ASSERT(call->type->name != NULL);
 
 	_debug("____MAKE %p{%s,%x} [%d]____",
 	       call, call->type->name, key_serial(call->key),
-	       atomic_read(&afs_outstanding_calls));
-
-	call->wait_mode = wait_mode;
-	INIT_WORK(&call->async_work, afs_process_async_call);
-
-	memset(&srx, 0, sizeof(srx));
-	srx.srx_family = AF_RXRPC;
-	srx.srx_service = call->service_id;
-	srx.transport_type = SOCK_DGRAM;
-	srx.transport_len = sizeof(srx.transport.sin);
-	srx.transport.sin.sin_family = AF_INET;
-	srx.transport.sin.sin_port = call->port;
-	memcpy(&srx.transport.sin.sin_addr, addr, 4);
+	       atomic_read(&call->net->nr_outstanding_calls));
+
+	trace_afs_make_call(call);
+
+	/* Work out the length we're going to transmit.  This is awkward for
+	 * calls such as FS.StoreData where there's an extra injection of data
+	 * after the initial fixed part.
+	 */
+	tx_total_len = call->request_size;
+	if (call->write_iter)
+		tx_total_len += iov_iter_count(call->write_iter);
+
+	/* If the call is going to be asynchronous, we need an extra ref for
+	 * the call to hold itself so the caller need not hang on to its ref.
+	 */
+	if (call->async) {
+		afs_get_call(call, afs_call_trace_get);
+		call->drop_ref = true;
+	}
 
 	/* create a call */
-	rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
-					 (unsigned long) call, gfp);
-	call->key = NULL;
+	rxcall = rxrpc_kernel_begin_call(call->net->socket, call->peer, call->key,
+					 (unsigned long)call,
+					 tx_total_len,
+					 call->max_lifespan,
+					 gfp,
+					 (call->async ?
+					  afs_wake_up_async_call :
+					  afs_wake_up_call_waiter),
+					 call->service_id,
+					 call->upgrade,
+					 (call->intr ? RXRPC_PREINTERRUPTIBLE :
+					  RXRPC_UNINTERRUPTIBLE),
+					 call->debug_id);
 	if (IS_ERR(rxcall)) {
 		ret = PTR_ERR(rxcall);
+		call->error = ret;
 		goto error_kill_call;
 	}
 
 	call->rxcall = rxcall;
+	call->issue_time = ktime_get_real();
 
 	/* send the request */
 	iov[0].iov_base	= call->request;
@@ -354,402 +399,448 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	msg.msg_iov		= (struct iovec *) iov;
-	msg.msg_iovlen		= 1;
+	iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, call->request_size);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
-	msg.msg_flags		= (call->send_pages ? MSG_MORE : 0);
-
-	/* have to change the state *before* sending the last packet as RxRPC
-	 * might give us the reply before it returns from sending the
-	 * request */
-	if (!call->send_pages)
-		call->state = AFS_CALL_AWAIT_REPLY;
-	ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
+	msg.msg_flags		= MSG_WAITALL | (call->write_iter ? MSG_MORE : 0);
+
+	ret = rxrpc_kernel_send_data(call->net->socket, rxcall,
+				     &msg, call->request_size,
+				     afs_notify_end_request_tx);
 	if (ret < 0)
 		goto error_do_abort;
 
-	if (call->send_pages) {
-		ret = afs_send_pages(call, &msg, iov);
+	if (call->write_iter) {
+		msg.msg_iter = *call->write_iter;
+		msg.msg_flags &= ~MSG_MORE;
+		trace_afs_send_data(call, &msg);
+
+		ret = rxrpc_kernel_send_data(call->net->socket,
+					     call->rxcall, &msg,
+					     iov_iter_count(&msg.msg_iter),
+					     afs_notify_end_request_tx);
+		*call->write_iter = msg.msg_iter;
+
+		trace_afs_sent_data(call, &msg, ret);
 		if (ret < 0)
 			goto error_do_abort;
 	}
 
-	/* at this point, an async call may no longer exist as it may have
-	 * already completed */
-	return wait_mode->wait(call);
+	/* Note that at this point, we may have received the reply or an abort
+	 * - and an asynchronous call may already have completed.
+	 *
+	 * afs_wait_for_call_to_complete(call)
+	 * must be called to synchronously clean up.
+	 */
+	return;
 
 error_do_abort:
-	rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
-	while ((skb = skb_dequeue(&call->rx_queue)))
-		afs_free_skb(skb);
-	rxrpc_kernel_end_call(rxcall);
-	call->rxcall = NULL;
+	if (ret != -ECONNABORTED)
+		rxrpc_kernel_abort_call(call->net->socket, rxcall,
+					RX_USER_ABORT, ret,
+					afs_abort_send_data_error);
+	if (call->async) {
+		afs_see_call(call, afs_call_trace_async_abort);
+		return;
+	}
+
+	if (ret == -ECONNABORTED) {
+		len = 0;
+		iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
+		rxrpc_kernel_recv_data(call->net->socket, rxcall,
+				       &msg.msg_iter, &len, false,
+				       &call->abort_code, &call->service_id);
+		call->responded = true;
+	}
+	call->error = ret;
+	trace_afs_call_done(call);
 error_kill_call:
-	call->type->destructor(call);
-	afs_free_call(call);
+	if (call->async)
+		afs_see_call(call, afs_call_trace_async_kill);
+	if (call->type->immediate_cancel)
+		call->type->immediate_cancel(call);
+
+	/* We need to dispose of the extra ref we grabbed for an async call.
+	 * The call, however, might be queued on afs_async_calls and we need to
+	 * make sure we don't get any more notifications that might requeue it.
+	 */
+	if (call->rxcall)
+		rxrpc_kernel_shutdown_call(call->net->socket, call->rxcall);
+	if (call->async) {
+		if (cancel_work_sync(&call->async_work))
+			afs_put_call(call);
+		afs_set_call_complete(call, ret, 0);
+	}
+
+	call->error = ret;
+	call->state = AFS_CALL_COMPLETE;
 	_leave(" = %d", ret);
-	return ret;
 }
 
 /*
- * handles intercepted messages that were arriving in the socket's Rx queue
- * - called with the socket receive queue lock held to ensure message ordering
- * - called with softirqs disabled
+ * Log remote abort codes that indicate that we have a protocol disagreement
+ * with the server.
  */
-static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
-			       struct sk_buff *skb)
+static void afs_log_error(struct afs_call *call, s32 remote_abort)
 {
-	struct afs_call *call = (struct afs_call *) user_call_ID;
-
-	_enter("%p,,%u", call, skb->mark);
-
-	_debug("ICPT %p{%u} [%d]",
-	       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-
-	ASSERTCMP(sk, ==, afs_socket->sk);
-	atomic_inc(&afs_outstanding_skbs);
-
-	if (!call) {
-		/* its an incoming call for our callback service */
-		skb_queue_tail(&afs_incoming_calls, skb);
-		queue_work(afs_wq, &afs_collect_incoming_call_work);
-	} else {
-		/* route the messages directly to the appropriate call */
-		skb_queue_tail(&call->rx_queue, skb);
-		call->wait_mode->rx_wakeup(call);
+	static int max = 0;
+	const char *msg;
+	int m;
+
+	switch (remote_abort) {
+	case RX_EOF:		 msg = "unexpected EOF";	break;
+	case RXGEN_CC_MARSHAL:	 msg = "client marshalling";	break;
+	case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling";	break;
+	case RXGEN_SS_MARSHAL:	 msg = "server marshalling";	break;
+	case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling";	break;
+	case RXGEN_DECODE:	 msg = "opcode decode";		break;
+	case RXGEN_SS_XDRFREE:	 msg = "server XDR cleanup";	break;
+	case RXGEN_CC_XDRFREE:	 msg = "client XDR cleanup";	break;
+	case -32:		 msg = "insufficient data";	break;
+	default:
+		return;
 	}
 
-	_leave("");
+	m = max;
+	if (m < 3) {
+		max = m + 1;
+		pr_notice("kAFS: Peer reported %s failure on %s [%pISp]\n",
+			  msg, call->type->name,
+			  rxrpc_kernel_remote_addr(call->peer));
+	}
 }
 
 /*
  * deliver messages to a call
  */
-static void afs_deliver_to_call(struct afs_call *call)
+void afs_deliver_to_call(struct afs_call *call)
 {
-	struct sk_buff *skb;
-	bool last;
-	u32 abort_code;
+	enum afs_call_state state;
+	size_t len;
+	u32 abort_code, remote_abort = 0;
 	int ret;
 
-	_enter("");
-
-	while ((call->state == AFS_CALL_AWAIT_REPLY ||
-		call->state == AFS_CALL_AWAIT_OP_ID ||
-		call->state == AFS_CALL_AWAIT_REQUEST ||
-		call->state == AFS_CALL_AWAIT_ACK) &&
-	       (skb = skb_dequeue(&call->rx_queue))) {
-		switch (skb->mark) {
-		case RXRPC_SKB_MARK_DATA:
-			_debug("Rcv DATA");
-			last = rxrpc_kernel_is_data_last(skb);
-			ret = call->type->deliver(call, skb, last);
-			switch (ret) {
-			case 0:
-				if (last &&
-				    call->state == AFS_CALL_AWAIT_REPLY)
-					call->state = AFS_CALL_COMPLETE;
-				break;
-			case -ENOTCONN:
-				abort_code = RX_CALL_DEAD;
-				goto do_abort;
-			case -ENOTSUPP:
-				abort_code = RX_INVALID_OPERATION;
-				goto do_abort;
-			default:
-				abort_code = RXGEN_CC_UNMARSHAL;
-				if (call->state != AFS_CALL_AWAIT_REPLY)
-					abort_code = RXGEN_SS_UNMARSHAL;
-			do_abort:
-				rxrpc_kernel_abort_call(call->rxcall,
-							abort_code);
-				call->error = ret;
-				call->state = AFS_CALL_ERROR;
-				break;
+	_enter("%s", call->type->name);
+
+	while (state = READ_ONCE(call->state),
+	       state == AFS_CALL_CL_AWAIT_REPLY ||
+	       state == AFS_CALL_SV_AWAIT_OP_ID ||
+	       state == AFS_CALL_SV_AWAIT_REQUEST ||
+	       state == AFS_CALL_SV_AWAIT_ACK
+	       ) {
+		if (state == AFS_CALL_SV_AWAIT_ACK) {
+			len = 0;
+			iov_iter_kvec(&call->def_iter, ITER_DEST, NULL, 0, 0);
+			ret = rxrpc_kernel_recv_data(call->net->socket,
+						     call->rxcall, &call->def_iter,
+						     &len, false, &remote_abort,
+						     &call->service_id);
+			trace_afs_receive_data(call, &call->def_iter, false, ret);
+
+			if (ret == -EINPROGRESS || ret == -EAGAIN)
+				return;
+			if (ret < 0 || ret == 1) {
+				if (ret == 1)
+					ret = 0;
+				goto call_complete;
 			}
-			afs_data_delivered(skb);
-			skb = NULL;
-			continue;
-		case RXRPC_SKB_MARK_FINAL_ACK:
-			_debug("Rcv ACK");
-			call->state = AFS_CALL_COMPLETE;
-			break;
-		case RXRPC_SKB_MARK_BUSY:
-			_debug("Rcv BUSY");
-			call->error = -EBUSY;
-			call->state = AFS_CALL_BUSY;
-			break;
-		case RXRPC_SKB_MARK_REMOTE_ABORT:
-			abort_code = rxrpc_kernel_get_abort_code(skb);
-			call->error = call->type->abort_to_error(abort_code);
-			call->state = AFS_CALL_ABORTED;
-			_debug("Rcv ABORT %u -> %d", abort_code, call->error);
-			break;
-		case RXRPC_SKB_MARK_NET_ERROR:
-			call->error = -rxrpc_kernel_get_error_number(skb);
-			call->state = AFS_CALL_ERROR;
-			_debug("Rcv NET ERROR %d", call->error);
-			break;
-		case RXRPC_SKB_MARK_LOCAL_ERROR:
-			call->error = -rxrpc_kernel_get_error_number(skb);
-			call->state = AFS_CALL_ERROR;
-			_debug("Rcv LOCAL ERROR %d", call->error);
-			break;
-		default:
-			BUG();
-			break;
+			return;
 		}
 
-		afs_free_skb(skb);
-	}
-
-	/* make sure the queue is empty if the call is done with (we might have
-	 * aborted the call early because of an unmarshalling error) */
-	if (call->state >= AFS_CALL_COMPLETE) {
-		while ((skb = skb_dequeue(&call->rx_queue)))
-			afs_free_skb(skb);
-		if (call->incoming) {
-			rxrpc_kernel_end_call(call->rxcall);
-			call->rxcall = NULL;
-			call->type->destructor(call);
-			afs_free_call(call);
+		ret = call->type->deliver(call);
+		state = READ_ONCE(call->state);
+		if (ret == 0 && call->unmarshalling_error)
+			ret = -EBADMSG;
+		switch (ret) {
+		case 0:
+			call->responded = true;
+			afs_queue_call_work(call);
+			if (state == AFS_CALL_CL_PROC_REPLY) {
+				if (call->op)
+					set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
+						&call->op->server->flags);
+				goto call_complete;
+			}
+			ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
+			goto done;
+		case -EINPROGRESS:
+		case -EAGAIN:
+			goto out;
+		case -ECONNABORTED:
+			ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
+			call->responded = true;
+			afs_log_error(call, call->abort_code);
+			goto done;
+		case -ENOTSUPP:
+			call->responded = true;
+			abort_code = RXGEN_OPCODE;
+			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+						abort_code, ret,
+						afs_abort_op_not_supported);
+			goto local_abort;
+		case -EIO:
+			pr_err("kAFS: Call %u in bad state %u\n",
+			       call->debug_id, state);
+			fallthrough;
+		case -ENODATA:
+		case -EBADMSG:
+		case -EMSGSIZE:
+		case -ENOMEM:
+		case -EFAULT:
+			abort_code = RXGEN_CC_UNMARSHAL;
+			if (state != AFS_CALL_CL_AWAIT_REPLY)
+				abort_code = RXGEN_SS_UNMARSHAL;
+			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+						abort_code, ret,
+						afs_abort_unmarshal_error);
+			goto local_abort;
+		default:
+			abort_code = RX_CALL_DEAD;
+			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+						abort_code, ret,
+						afs_abort_general_error);
+			goto local_abort;
 		}
 	}
 
+done:
+	if (call->type->done)
+		call->type->done(call);
+out:
 	_leave("");
+	return;
+
+local_abort:
+	abort_code = 0;
+call_complete:
+	afs_set_call_complete(call, ret, remote_abort);
+	goto done;
 }
 
 /*
- * wait synchronously for a call to complete
+ * Wait synchronously for a call to complete.
  */
-static int afs_wait_for_call_to_complete(struct afs_call *call)
+void afs_wait_for_call_to_complete(struct afs_call *call)
 {
-	struct sk_buff *skb;
-	int ret;
-
-	DECLARE_WAITQUEUE(myself, current);
+	bool rxrpc_complete = false;
 
 	_enter("");
 
-	add_wait_queue(&call->waitq, &myself);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
+	if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
+		DECLARE_WAITQUEUE(myself, current);
 
-		/* deliver any messages that are in the queue */
-		if (!skb_queue_empty(&call->rx_queue)) {
-			__set_current_state(TASK_RUNNING);
-			afs_deliver_to_call(call);
-			continue;
-		}
+		add_wait_queue(&call->waitq, &myself);
+		for (;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
 
-		ret = call->error;
-		if (call->state >= AFS_CALL_COMPLETE)
-			break;
-		ret = -EINTR;
-		if (signal_pending(current))
-			break;
-		schedule();
-	}
+			/* deliver any messages that are in the queue */
+			if (!afs_check_call_state(call, AFS_CALL_COMPLETE) &&
+			    call->need_attention) {
+				call->need_attention = false;
+				__set_current_state(TASK_RUNNING);
+				afs_deliver_to_call(call);
+				continue;
+			}
+
+			if (afs_check_call_state(call, AFS_CALL_COMPLETE))
+				break;
 
-	remove_wait_queue(&call->waitq, &myself);
-	__set_current_state(TASK_RUNNING);
+			if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) {
+				/* rxrpc terminated the call. */
+				rxrpc_complete = true;
+				break;
+			}
+
+			schedule();
+		}
 
-	/* kill the call */
-	if (call->state < AFS_CALL_COMPLETE) {
-		_debug("call incomplete");
-		rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
-		while ((skb = skb_dequeue(&call->rx_queue)))
-			afs_free_skb(skb);
+		remove_wait_queue(&call->waitq, &myself);
+		__set_current_state(TASK_RUNNING);
 	}
 
-	_debug("call complete");
-	rxrpc_kernel_end_call(call->rxcall);
-	call->rxcall = NULL;
-	call->type->destructor(call);
-	afs_free_call(call);
-	_leave(" = %d", ret);
-	return ret;
+	if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
+		if (rxrpc_complete) {
+			afs_set_call_complete(call, call->error, call->abort_code);
+		} else {
+			/* Kill off the call if it's still live. */
+			_debug("call interrupted");
+			if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+						    RX_USER_ABORT, -EINTR,
+						    afs_abort_interrupted))
+				afs_set_call_complete(call, -EINTR, 0);
+		}
+	}
 }
 
 /*
  * wake up a waiting call
  */
-static void afs_wake_up_call_waiter(struct afs_call *call)
-{
-	wake_up(&call->waitq);
-}
-
-/*
- * wake up an asynchronous call
- */
-static void afs_wake_up_async_call(struct afs_call *call)
+static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
+				    unsigned long call_user_ID)
 {
-	_enter("");
-	queue_work(afs_async_calls, &call->async_work);
-}
+	struct afs_call *call = (struct afs_call *)call_user_ID;
 
-/*
- * put a call into asynchronous mode
- * - mustn't touch the call descriptor as the call my have completed by the
- *   time we get here
- */
-static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
-{
-	_enter("");
-	return -EINPROGRESS;
+	call->need_attention = true;
+	wake_up(&call->waitq);
 }
 
 /*
- * delete an asynchronous call
+ * Wake up an asynchronous call.  The caller is holding the call notify
+ * spinlock around this, so we can't call afs_put_call().
  */
-static void afs_delete_async_call(struct work_struct *work)
+static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
+				   unsigned long call_user_ID)
 {
-	struct afs_call *call =
-		container_of(work, struct afs_call, async_work);
+	struct afs_call *call = (struct afs_call *)call_user_ID;
+	int r;
 
-	_enter("");
+	trace_afs_notify_call(rxcall, call);
+	call->need_attention = true;
 
-	afs_free_call(call);
+	if (__refcount_inc_not_zero(&call->ref, &r)) {
+		trace_afs_call(call->debug_id, afs_call_trace_wake, r + 1,
+			       atomic_read(&call->net->nr_outstanding_calls),
+			       __builtin_return_address(0));
 
-	_leave("");
+		if (!queue_work(afs_async_calls, &call->async_work))
+			afs_deferred_put_call(call);
+	}
 }
 
 /*
- * perform processing on an asynchronous call
- * - on a multiple-thread workqueue this work item may try to run on several
- *   CPUs at the same time
+ * Perform I/O processing on an asynchronous call.  The work item carries a ref
+ * to the call struct that we either need to release or to pass on.
  */
 static void afs_process_async_call(struct work_struct *work)
 {
-	struct afs_call *call =
-		container_of(work, struct afs_call, async_work);
+	struct afs_call *call = container_of(work, struct afs_call, async_work);
 
 	_enter("");
 
-	if (!skb_queue_empty(&call->rx_queue))
+	if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+		call->need_attention = false;
 		afs_deliver_to_call(call);
-
-	if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) {
-		if (call->wait_mode->async_complete)
-			call->wait_mode->async_complete(call->reply,
-							call->error);
-		call->reply = NULL;
-
-		/* kill the call */
-		rxrpc_kernel_end_call(call->rxcall);
-		call->rxcall = NULL;
-		if (call->type->destructor)
-			call->type->destructor(call);
-
-		/* we can't just delete the call because the work item may be
-		 * queued */
-		PREPARE_WORK(&call->async_work, afs_delete_async_call);
-		queue_work(afs_async_calls, &call->async_work);
 	}
 
+	afs_put_call(call);
 	_leave("");
 }
 
-/*
- * empty a socket buffer into a flat reply buffer
- */
-void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
 {
-	size_t len = skb->len;
+	struct afs_call *call = (struct afs_call *)user_call_ID;
 
-	if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
-		BUG();
-	call->reply_size += len;
+	call->rxcall = rxcall;
 }
 
 /*
- * accept the backlog of incoming calls
+ * Charge the incoming call preallocation.
  */
-static void afs_collect_incoming_call(struct work_struct *work)
+void afs_charge_preallocation(struct work_struct *work)
 {
-	struct rxrpc_call *rxcall;
-	struct afs_call *call = NULL;
-	struct sk_buff *skb;
-
-	while ((skb = skb_dequeue(&afs_incoming_calls))) {
-		_debug("new call");
-
-		/* don't need the notification */
-		afs_free_skb(skb);
+	struct afs_net *net =
+		container_of(work, struct afs_net, charge_preallocation_work);
+	struct afs_call *call = net->spare_incoming_call;
 
+	for (;;) {
 		if (!call) {
-			call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
-			if (!call) {
-				rxrpc_kernel_reject_call(afs_socket);
-				return;
-			}
+			call = afs_alloc_call(net, &afs_RXCMxxxx, GFP_KERNEL);
+			if (!call)
+				break;
 
-			INIT_WORK(&call->async_work, afs_process_async_call);
-			call->wait_mode = &afs_async_incoming_call;
-			call->type = &afs_RXCMxxxx;
+			call->drop_ref = true;
+			call->async = true;
+			call->state = AFS_CALL_SV_AWAIT_OP_ID;
 			init_waitqueue_head(&call->waitq);
-			skb_queue_head_init(&call->rx_queue);
-			call->state = AFS_CALL_AWAIT_OP_ID;
-
-			_debug("CALL %p{%s} [%d]",
-			       call, call->type->name,
-			       atomic_read(&afs_outstanding_calls));
-			atomic_inc(&afs_outstanding_calls);
+			afs_extract_to_tmp(call);
 		}
 
-		rxcall = rxrpc_kernel_accept_call(afs_socket,
-						  (unsigned long) call);
-		if (!IS_ERR(rxcall)) {
-			call->rxcall = rxcall;
-			call = NULL;
-		}
+		if (rxrpc_kernel_charge_accept(net->socket,
+					       afs_wake_up_async_call,
+					       (unsigned long)call,
+					       GFP_KERNEL,
+					       call->debug_id) < 0)
+			break;
+		call = NULL;
 	}
+	net->spare_incoming_call = call;
+}
 
-	if (call)
-		afs_free_call(call);
+/*
+ * Discard a preallocated call when a socket is shut down.
+ */
+static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
+				    unsigned long user_call_ID)
+{
+	struct afs_call *call = (struct afs_call *)user_call_ID;
+
+	call->rxcall = NULL;
+	afs_put_call(call);
 }
 
 /*
- * grab the operation ID from an incoming cache manager call
+ * Notification of an incoming call.
  */
-static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
-				bool last)
+static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+			    unsigned long user_call_ID)
 {
-	size_t len = skb->len;
-	void *oibuf = (void *) &call->operation_ID;
+	struct afs_call *call = (struct afs_call *)user_call_ID;
+	struct afs_net *net = afs_sock2net(sk);
 
-	_enter("{%u},{%zu},%d", call->offset, len, last);
+	call->peer = rxrpc_kernel_get_call_peer(sk->sk_socket, call->rxcall);
+	call->server = afs_find_server(call->peer);
+	if (!call->server)
+		trace_afs_cm_no_server(call, rxrpc_kernel_remote_srx(call->peer));
+
+	queue_work(afs_wq, &net->charge_preallocation_work);
+}
 
-	ASSERTCMP(call->offset, <, 4);
+/*
+ * Grab the operation ID from an incoming cache manager call.  The socket
+ * buffer is discarded on error or if we don't yet have sufficient data.
+ */
+static int afs_deliver_cm_op_id(struct afs_call *call)
+{
+	int ret;
+
+	_enter("{%zu}", iov_iter_count(call->iter));
 
 	/* the operation ID forms the first four bytes of the request data */
-	len = min_t(size_t, len, 4 - call->offset);
-	if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0)
-		BUG();
-	if (!pskb_pull(skb, len))
-		BUG();
-	call->offset += len;
-
-	if (call->offset < 4) {
-		if (last) {
-			_leave(" = -EBADMSG [op ID short]");
-			return -EBADMSG;
-		}
-		_leave(" = 0 [incomplete]");
-		return 0;
-	}
+	ret = afs_extract_data(call, true);
+	if (ret < 0)
+		return ret;
 
-	call->state = AFS_CALL_AWAIT_REQUEST;
+	call->operation_ID = ntohl(call->tmp);
+	afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
 
 	/* ask the cache manager to route the call (it'll change the call type
 	 * if successful) */
 	if (!afs_cm_incoming_call(call))
 		return -ENOTSUPP;
 
-	/* pass responsibility for the remainer of this message off to the
+	call->security_ix = rxrpc_kernel_query_call_security(call->rxcall,
+							     &call->service_id,
+							     &call->enctype);
+
+	trace_afs_cb_call(call);
+	call->work.func = call->type->work;
+
+	/* pass responsibility for the remainder of this message off to the
 	 * cache manager op */
-	return call->type->deliver(call, skb, last);
+	return call->type->deliver(call);
+}
+
+/*
+ * Advance the AFS call state when an RxRPC service call ends the transmit
+ * phase.
+ */
+static void afs_notify_end_reply_tx(struct sock *sock,
+				    struct rxrpc_call *rxcall,
+				    unsigned long call_user_ID)
+{
+	struct afs_call *call = (struct afs_call *)call_user_ID;
+
+	afs_set_call_state(call, AFS_CALL_SV_REPLYING, AFS_CALL_SV_AWAIT_ACK);
 }
 
 /*
@@ -757,35 +848,33 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
  */
 void afs_send_empty_reply(struct afs_call *call)
 {
+	struct afs_net *net = call->net;
 	struct msghdr msg;
-	struct iovec iov[1];
 
 	_enter("");
 
-	iov[0].iov_base		= NULL;
-	iov[0].iov_len		= 0;
+	rxrpc_kernel_set_tx_length(net->socket, call->rxcall, 0);
+
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	msg.msg_iov		= iov;
-	msg.msg_iovlen		= 0;
+	iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, NULL, 0, 0);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
 
-	call->state = AFS_CALL_AWAIT_ACK;
-	switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) {
+	switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0,
+				       afs_notify_end_reply_tx)) {
 	case 0:
 		_leave(" [replied]");
 		return;
 
 	case -ENOMEM:
 		_debug("oom");
-		rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+		rxrpc_kernel_abort_call(net->socket, call->rxcall,
+					RXGEN_SS_MARSHAL, -ENOMEM,
+					afs_abort_oom);
+		fallthrough;
 	default:
-		rxrpc_kernel_end_call(call->rxcall);
-		call->rxcall = NULL;
-		call->type->destructor(call);
-		afs_free_call(call);
 		_leave(" [error]");
 		return;
 	}
@@ -796,64 +885,102 @@ void afs_send_empty_reply(struct afs_call *call)
  */
 void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 {
+	struct afs_net *net = call->net;
 	struct msghdr msg;
-	struct iovec iov[1];
+	struct kvec iov[1];
 	int n;
 
 	_enter("");
 
+	rxrpc_kernel_set_tx_length(net->socket, call->rxcall, len);
+
 	iov[0].iov_base		= (void *) buf;
 	iov[0].iov_len		= len;
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	msg.msg_iov		= iov;
-	msg.msg_iovlen		= 1;
+	iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, len);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
 
-	call->state = AFS_CALL_AWAIT_ACK;
-	n = rxrpc_kernel_send_data(call->rxcall, &msg, len);
+	n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len,
+				   afs_notify_end_reply_tx);
 	if (n >= 0) {
+		/* Success */
 		_leave(" [replied]");
 		return;
 	}
+
 	if (n == -ENOMEM) {
 		_debug("oom");
-		rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+		rxrpc_kernel_abort_call(net->socket, call->rxcall,
+					RXGEN_SS_MARSHAL, -ENOMEM,
+					afs_abort_oom);
 	}
-	rxrpc_kernel_end_call(call->rxcall);
-	call->rxcall = NULL;
-	call->type->destructor(call);
-	afs_free_call(call);
 	_leave(" [error]");
 }
 
 /*
- * extract a piece of data from the received data socket buffers
+ * Extract a piece of data from the received data socket buffers.
  */
-int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
-		     bool last, void *buf, size_t count)
+int afs_extract_data(struct afs_call *call, bool want_more)
 {
-	size_t len = skb->len;
-
-	_enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
+	struct afs_net *net = call->net;
+	struct iov_iter *iter = call->iter;
+	enum afs_call_state state;
+	u32 remote_abort = 0;
+	int ret;
 
-	ASSERTCMP(call->offset, <, count);
+	_enter("{%s,%zu,%zu},%d",
+	       call->type->name, call->iov_len, iov_iter_count(iter), want_more);
 
-	len = min_t(size_t, len, count - call->offset);
-	if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 ||
-	    !pskb_pull(skb, len))
-		BUG();
-	call->offset += len;
+	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
+				     &call->iov_len, want_more, &remote_abort,
+				     &call->service_id);
+	trace_afs_receive_data(call, call->iter, want_more, ret);
+	if (ret == 0 || ret == -EAGAIN)
+		return ret;
 
-	if (call->offset < count) {
-		if (last) {
-			_leave(" = -EBADMSG [%d < %zu]", call->offset, count);
-			return -EBADMSG;
+	state = READ_ONCE(call->state);
+	if (ret == 1) {
+		switch (state) {
+		case AFS_CALL_CL_AWAIT_REPLY:
+			afs_set_call_state(call, state, AFS_CALL_CL_PROC_REPLY);
+			break;
+		case AFS_CALL_SV_AWAIT_REQUEST:
+			afs_set_call_state(call, state, AFS_CALL_SV_REPLYING);
+			break;
+		case AFS_CALL_COMPLETE:
+			kdebug("prem complete %d", call->error);
+			return afs_io_error(call, afs_io_error_extract);
+		default:
+			break;
 		}
-		_leave(" = -EAGAIN");
-		return -EAGAIN;
+		return 0;
 	}
-	return 0;
+
+	afs_set_call_complete(call, ret, remote_abort);
+	return ret;
+}
+
+/*
+ * Log protocol error production.
+ */
+noinline int afs_protocol_error(struct afs_call *call,
+				enum afs_eproto_cause cause)
+{
+	trace_afs_protocol_error(call, cause);
+	if (call)
+		call->unmarshalling_error = true;
+	return -EBADMSG;
+}
+
+/*
+ * Wake up OOB notification processing.
+ */
+static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob)
+{
+	struct afs_net *net = sk->sk_user_data;
+
+	schedule_work(&net->rx_oob_work);
 }
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8d010422dc89..55ddce94af03 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS security handling
  *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/init.h>
@@ -14,27 +10,63 @@
 #include <linux/fs.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
+#include <linux/hashtable.h>
 #include <keys/rxrpc-type.h>
 #include "internal.h"
 
+static DEFINE_HASHTABLE(afs_permits_cache, 10);
+static DEFINE_SPINLOCK(afs_permits_lock);
+static DEFINE_MUTEX(afs_key_lock);
+
+/*
+ * Allocate a key to use as a placeholder for anonymous user security.
+ */
+static int afs_alloc_anon_key(struct afs_cell *cell)
+{
+	struct key *key;
+
+	mutex_lock(&afs_key_lock);
+	key = cell->anonymous_key;
+	if (!key) {
+		key = rxrpc_get_null_key(cell->key_desc);
+		if (!IS_ERR(key))
+			cell->anonymous_key = key;
+	}
+	mutex_unlock(&afs_key_lock);
+
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	_debug("anon key %p{%x}",
+	       cell->anonymous_key, key_serial(cell->anonymous_key));
+	return 0;
+}
+
 /*
  * get a key
  */
 struct key *afs_request_key(struct afs_cell *cell)
 {
 	struct key *key;
+	int ret;
 
-	_enter("{%x}", key_serial(cell->anonymous_key));
+	_enter("{%s}", cell->key_desc);
 
-	_debug("key %s", cell->anonymous_key->description);
-	key = request_key(&key_type_rxrpc, cell->anonymous_key->description,
-			  NULL);
+	_debug("key %s", cell->key_desc);
+	key = request_key_net(&key_type_rxrpc, cell->key_desc,
+			      cell->net->net, NULL);
 	if (IS_ERR(key)) {
 		if (PTR_ERR(key) != -ENOKEY) {
 			_leave(" = %ld", PTR_ERR(key));
 			return key;
 		}
 
+		if (!cell->anonymous_key) {
+			ret = afs_alloc_anon_key(cell);
+			if (ret < 0)
+				return ERR_PTR(ret);
+		}
+
 		/* act as anonymous user */
 		_leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
 		return key_get(cell->anonymous_key);
@@ -46,167 +78,293 @@ struct key *afs_request_key(struct afs_cell *cell)
 }
 
 /*
- * dispose of a permits list
+ * Get a key when pathwalk is in rcuwalk mode.
  */
-void afs_zap_permits(struct rcu_head *rcu)
+struct key *afs_request_key_rcu(struct afs_cell *cell)
 {
-	struct afs_permits *permits =
-		container_of(rcu, struct afs_permits, rcu);
-	int loop;
+	struct key *key;
 
-	_enter("{%d}", permits->count);
+	_enter("{%s}", cell->key_desc);
 
-	for (loop = permits->count - 1; loop >= 0; loop--)
-		key_put(permits->permits[loop].key);
-	kfree(permits);
+	_debug("key %s", cell->key_desc);
+	key = request_key_net_rcu(&key_type_rxrpc, cell->key_desc,
+				  cell->net->net);
+	if (IS_ERR(key)) {
+		if (PTR_ERR(key) != -ENOKEY) {
+			_leave(" = %ld", PTR_ERR(key));
+			return key;
+		}
+
+		/* act as anonymous user */
+		if (!cell->anonymous_key)
+			return NULL; /* Need to allocate */
+		_leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
+		return key_get(cell->anonymous_key);
+	} else {
+		/* act as authorised user */
+		_leave(" = {%x} [auth]", key_serial(key));
+		return key;
+	}
 }
 
 /*
- * dispose of a permits list in which all the key pointers have been copied
+ * Dispose of a list of permits.
  */
-static void afs_dispose_of_permits(struct rcu_head *rcu)
+static void afs_permits_rcu(struct rcu_head *rcu)
 {
 	struct afs_permits *permits =
 		container_of(rcu, struct afs_permits, rcu);
+	int i;
 
-	_enter("{%d}", permits->count);
-
+	for (i = 0; i < permits->nr_permits; i++)
+		key_put(permits->permits[i].key);
 	kfree(permits);
 }
 
 /*
- * get the authorising vnode - this is the specified inode itself if it's a
- * directory or it's the parent directory if the specified inode is a file or
- * symlink
- * - the caller must release the ref on the inode
+ * Discard a permission cache.
  */
-static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode,
-					    struct key *key)
+void afs_put_permits(struct afs_permits *permits)
 {
-	struct afs_vnode *auth_vnode;
-	struct inode *auth_inode;
-
-	_enter("");
-
-	if (S_ISDIR(vnode->vfs_inode.i_mode)) {
-		auth_inode = igrab(&vnode->vfs_inode);
-		ASSERT(auth_inode != NULL);
-	} else {
-		auth_inode = afs_iget(vnode->vfs_inode.i_sb, key,
-				      &vnode->status.parent, NULL, NULL);
-		if (IS_ERR(auth_inode))
-			return ERR_CAST(auth_inode);
+	if (permits && refcount_dec_and_test(&permits->usage)) {
+		spin_lock(&afs_permits_lock);
+		hash_del_rcu(&permits->hash_node);
+		spin_unlock(&afs_permits_lock);
+		call_rcu(&permits->rcu, afs_permits_rcu);
 	}
-
-	auth_vnode = AFS_FS_I(auth_inode);
-	_leave(" = {%x}", auth_vnode->fid.vnode);
-	return auth_vnode;
 }
 
 /*
- * clear the permit cache on a directory vnode
+ * Clear a permit cache on callback break.
  */
 void afs_clear_permits(struct afs_vnode *vnode)
 {
 	struct afs_permits *permits;
 
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+	spin_lock(&vnode->lock);
+	permits = rcu_dereference_protected(vnode->permit_cache,
+					    lockdep_is_held(&vnode->lock));
+	RCU_INIT_POINTER(vnode->permit_cache, NULL);
+	spin_unlock(&vnode->lock);
+
+	afs_put_permits(permits);
+}
+
+/*
+ * Hash a list of permits.  Use simple addition to make it easy to add an extra
+ * one at an as-yet indeterminate position in the list.
+ */
+static void afs_hash_permits(struct afs_permits *permits)
+{
+	unsigned long h = permits->nr_permits;
+	int i;
 
-	mutex_lock(&vnode->permits_lock);
-	permits = vnode->permits;
-	rcu_assign_pointer(vnode->permits, NULL);
-	mutex_unlock(&vnode->permits_lock);
+	for (i = 0; i < permits->nr_permits; i++) {
+		h += (unsigned long)permits->permits[i].key / sizeof(void *);
+		h += permits->permits[i].access;
+	}
 
-	if (permits)
-		call_rcu(&permits->rcu, afs_zap_permits);
-	_leave("");
+	permits->h = h;
 }
 
 /*
- * add the result obtained for a vnode to its or its parent directory's cache
- * for the key used to access it
+ * Cache the CallerAccess result obtained from doing a fileserver operation
+ * that returned a vnode status for a particular key.  If a callback break
+ * occurs whilst the operation was in progress then we have to ditch the cache
+ * as the ACL *may* have changed.
  */
-void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
+void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
+		      unsigned int cb_break, struct afs_status_cb *scb)
 {
-	struct afs_permits *permits, *xpermits;
-	struct afs_permit *permit;
-	struct afs_vnode *auth_vnode;
-	int count, loop;
-
-	_enter("{%x:%u},%x,%lx",
-	       vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order);
-
-	auth_vnode = afs_get_auth_inode(vnode, key);
-	if (IS_ERR(auth_vnode)) {
-		_leave(" [get error %ld]", PTR_ERR(auth_vnode));
-		return;
+	struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL;
+	afs_access_t caller_access = scb->status.caller_access;
+	size_t size = 0;
+	bool changed = false;
+	int i, j;
+
+	_enter("{%llx:%llu},%x,%x",
+	       vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
+
+	rcu_read_lock();
+
+	/* Check for the common case first: We got back the same access as last
+	 * time we tried and already have it recorded.
+	 */
+	permits = rcu_dereference(vnode->permit_cache);
+	if (permits) {
+		if (!permits->invalidated) {
+			for (i = 0; i < permits->nr_permits; i++) {
+				if (permits->permits[i].key < key)
+					continue;
+				if (permits->permits[i].key > key)
+					break;
+				if (permits->permits[i].access != caller_access) {
+					changed = true;
+					break;
+				}
+
+				if (afs_cb_is_broken(cb_break, vnode)) {
+					changed = true;
+					break;
+				}
+
+				/* The cache is still good. */
+				rcu_read_unlock();
+				return;
+			}
+		}
+
+		changed |= permits->invalidated;
+		size = permits->nr_permits;
+
+		/* If this set of permits is now wrong, clear the permits
+		 * pointer so that no one tries to use the stale information.
+		 */
+		if (changed) {
+			spin_lock(&vnode->lock);
+			if (permits != rcu_access_pointer(vnode->permit_cache))
+				goto someone_else_changed_it_unlock;
+			RCU_INIT_POINTER(vnode->permit_cache, NULL);
+			spin_unlock(&vnode->lock);
+
+			afs_put_permits(permits);
+			permits = NULL;
+			size = 0;
+		}
 	}
 
-	mutex_lock(&auth_vnode->permits_lock);
+	if (afs_cb_is_broken(cb_break, vnode))
+		goto someone_else_changed_it;
+
+	/* We need a ref on any permits list we want to copy as we'll have to
+	 * drop the lock to do memory allocation.
+	 */
+	if (permits && !refcount_inc_not_zero(&permits->usage))
+		goto someone_else_changed_it;
+
+	rcu_read_unlock();
+
+	/* Speculatively create a new list with the revised permission set.  We
+	 * discard this if we find an extant match already in the hash, but
+	 * it's easier to compare with memcmp this way.
+	 *
+	 * We fill in the key pointers at this time, but we don't get the refs
+	 * yet.
+	 */
+	size++;
+	new = kzalloc(struct_size(new, permits, size), GFP_NOFS);
+	if (!new)
+		goto out_put;
+
+	refcount_set(&new->usage, 1);
+	new->nr_permits = size;
+	i = j = 0;
+	if (permits) {
+		for (i = 0; i < permits->nr_permits; i++) {
+			if (j == i && permits->permits[i].key > key) {
+				new->permits[j].key = key;
+				new->permits[j].access = caller_access;
+				j++;
+			}
+			new->permits[j].key = permits->permits[i].key;
+			new->permits[j].access = permits->permits[i].access;
+			j++;
+		}
+	}
 
-	/* guard against a rename being detected whilst we waited for the
-	 * lock */
-	if (memcmp(&auth_vnode->fid, &vnode->status.parent,
-		   sizeof(struct afs_fid)) != 0) {
-		_debug("renamed");
-		goto out_unlock;
+	if (j == i) {
+		new->permits[j].key = key;
+		new->permits[j].access = caller_access;
 	}
 
-	/* have to be careful as the directory's callback may be broken between
-	 * us receiving the status we're trying to cache and us getting the
-	 * lock to update the cache for the status */
-	if (auth_vnode->acl_order - acl_order > 0) {
-		_debug("ACL changed?");
-		goto out_unlock;
+	afs_hash_permits(new);
+
+	/* Now see if the permit list we want is actually already available */
+	spin_lock(&afs_permits_lock);
+
+	hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) {
+		if (xpermits->h != new->h ||
+		    xpermits->invalidated ||
+		    xpermits->nr_permits != new->nr_permits ||
+		    memcmp(xpermits->permits, new->permits,
+			   new->nr_permits * sizeof(struct afs_permit)) != 0)
+			continue;
+
+		if (refcount_inc_not_zero(&xpermits->usage)) {
+			replacement = xpermits;
+			goto found;
+		}
+
+		break;
 	}
 
-	/* always update the anonymous mask */
-	_debug("anon access %x", vnode->status.anon_access);
-	auth_vnode->status.anon_access = vnode->status.anon_access;
-	if (key == vnode->volume->cell->anonymous_key)
-		goto out_unlock;
-
-	xpermits = auth_vnode->permits;
-	count = 0;
-	if (xpermits) {
-		/* see if the permit is already in the list
-		 * - if it is then we just amend the list
-		 */
-		count = xpermits->count;
-		permit = xpermits->permits;
-		for (loop = count; loop > 0; loop--) {
-			if (permit->key == key) {
-				permit->access_mask =
-					vnode->status.caller_access;
-				goto out_unlock;
-			}
-			permit++;
+	for (i = 0; i < new->nr_permits; i++)
+		key_get(new->permits[i].key);
+	hash_add_rcu(afs_permits_cache, &new->hash_node, new->h);
+	replacement = new;
+	new = NULL;
+
+found:
+	spin_unlock(&afs_permits_lock);
+
+	kfree(new);
+
+	rcu_read_lock();
+	spin_lock(&vnode->lock);
+	zap = rcu_access_pointer(vnode->permit_cache);
+	if (!afs_cb_is_broken(cb_break, vnode) && zap == permits)
+		rcu_assign_pointer(vnode->permit_cache, replacement);
+	else
+		zap = replacement;
+	spin_unlock(&vnode->lock);
+	rcu_read_unlock();
+	afs_put_permits(zap);
+out_put:
+	afs_put_permits(permits);
+	return;
+
+someone_else_changed_it_unlock:
+	spin_unlock(&vnode->lock);
+someone_else_changed_it:
+	/* Someone else changed the cache under us - don't recheck at this
+	 * time.
+	 */
+	rcu_read_unlock();
+	return;
+}
+
+static bool afs_check_permit_rcu(struct afs_vnode *vnode, struct key *key,
+				 afs_access_t *_access)
+{
+	const struct afs_permits *permits;
+	int i;
+
+	_enter("{%llx:%llu},%x",
+	       vnode->fid.vid, vnode->fid.vnode, key_serial(key));
+
+	/* check the permits to see if we've got one yet */
+	if (key == vnode->volume->cell->anonymous_key) {
+		*_access = vnode->status.anon_access;
+		_leave(" = t [anon %x]", *_access);
+		return true;
+	}
+
+	permits = rcu_dereference(vnode->permit_cache);
+	if (permits) {
+		for (i = 0; i < permits->nr_permits; i++) {
+			if (permits->permits[i].key < key)
+				continue;
+			if (permits->permits[i].key > key)
+				break;
+
+			*_access = permits->permits[i].access;
+			_leave(" = %u [perm %x]", !permits->invalidated, *_access);
+			return !permits->invalidated;
 		}
 	}
 
-	permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1),
-			  GFP_NOFS);
-	if (!permits)
-		goto out_unlock;
-
-	if (xpermits)
-		memcpy(permits->permits, xpermits->permits,
-			count * sizeof(struct afs_permit));
-
-	_debug("key %x access %x",
-	       key_serial(key), vnode->status.caller_access);
-	permits->permits[count].access_mask = vnode->status.caller_access;
-	permits->permits[count].key = key_get(key);
-	permits->count = count + 1;
-
-	rcu_assign_pointer(auth_vnode->permits, permits);
-	if (xpermits)
-		call_rcu(&xpermits->rcu, afs_dispose_of_permits);
-
-out_unlock:
-	mutex_unlock(&auth_vnode->permits_lock);
-	iput(&auth_vnode->vfs_inode);
-	_leave("");
+	_leave(" = f");
+	return false;
 }
 
 /*
@@ -214,68 +372,53 @@ out_unlock:
  * permitted to be accessed with this authorisation, and if so, what access it
  * is granted
  */
-static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
-			    afs_access_t *_access)
+int afs_check_permit(struct afs_vnode *vnode, struct key *key,
+		     afs_access_t *_access)
 {
 	struct afs_permits *permits;
-	struct afs_permit *permit;
-	struct afs_vnode *auth_vnode;
-	bool valid;
-	int loop, ret;
+	bool valid = false;
+	int i, ret;
 
-	_enter("{%x:%u},%x",
+	_enter("{%llx:%llu},%x",
 	       vnode->fid.vid, vnode->fid.vnode, key_serial(key));
 
-	auth_vnode = afs_get_auth_inode(vnode, key);
-	if (IS_ERR(auth_vnode)) {
-		*_access = 0;
-		_leave(" = %ld", PTR_ERR(auth_vnode));
-		return PTR_ERR(auth_vnode);
-	}
-
-	ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode));
-
 	/* check the permits to see if we've got one yet */
-	if (key == auth_vnode->volume->cell->anonymous_key) {
+	if (key == vnode->volume->cell->anonymous_key) {
 		_debug("anon");
-		*_access = auth_vnode->status.anon_access;
+		*_access = vnode->status.anon_access;
 		valid = true;
 	} else {
-		valid = false;
 		rcu_read_lock();
-		permits = rcu_dereference(auth_vnode->permits);
+		permits = rcu_dereference(vnode->permit_cache);
 		if (permits) {
-			permit = permits->permits;
-			for (loop = permits->count; loop > 0; loop--) {
-				if (permit->key == key) {
-					_debug("found in cache");
-					*_access = permit->access_mask;
-					valid = true;
+			for (i = 0; i < permits->nr_permits; i++) {
+				if (permits->permits[i].key < key)
+					continue;
+				if (permits->permits[i].key > key)
 					break;
-				}
-				permit++;
+
+				*_access = permits->permits[i].access;
+				valid = !permits->invalidated;
+				break;
 			}
 		}
 		rcu_read_unlock();
 	}
 
 	if (!valid) {
-		/* check the status on the file we're actually interested in
-		 * (the post-processing will cache the result on auth_vnode) */
+		/* Check the status on the file we're actually interested in
+		 * (the post-processing will cache the result).
+		 */
 		_debug("no valid permit");
 
-		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-		ret = afs_vnode_fetch_status(vnode, auth_vnode, key);
+		ret = afs_fetch_status(vnode, key, false, _access);
 		if (ret < 0) {
-			iput(&auth_vnode->vfs_inode);
 			*_access = 0;
 			_leave(" = %d", ret);
 			return ret;
 		}
-		*_access = vnode->status.caller_access;
 	}
 
-	iput(&auth_vnode->vfs_inode);
 	_leave(" = 0 [access %x]", *_access);
 	return 0;
 }
@@ -285,72 +428,77 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
  * - AFS ACLs are attached to directories only, and a file is controlled by its
  *   parent directory's ACL
  */
-int afs_permission(struct inode *inode, int mask)
+int afs_permission(struct mnt_idmap *idmap, struct inode *inode,
+		   int mask)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
-	afs_access_t uninitialized_var(access);
+	afs_access_t access;
 	struct key *key;
-	int ret;
+	int ret = 0;
 
-	if (mask & MAY_NOT_BLOCK)
-		return -ECHILD;
-
-	_enter("{{%x:%u},%lx},%x,",
+	_enter("{{%llx:%llu},%lx},%x,",
 	       vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
 
-	key = afs_request_key(vnode->volume->cell);
-	if (IS_ERR(key)) {
-		_leave(" = %ld [key]", PTR_ERR(key));
-		return PTR_ERR(key);
-	}
+	if (mask & MAY_NOT_BLOCK) {
+		key = afs_request_key_rcu(vnode->volume->cell);
+		if (IS_ERR_OR_NULL(key))
+			return -ECHILD;
+
+		ret = -ECHILD;
+		if (!afs_check_validity(vnode) ||
+		    !afs_check_permit_rcu(vnode, key, &access))
+			goto error;
+	} else {
+		key = afs_request_key(vnode->volume->cell);
+		if (IS_ERR(key)) {
+			_leave(" = %ld [key]", PTR_ERR(key));
+			return PTR_ERR(key);
+		}
 
-	/* if the promise has expired, we need to check the server again */
-	if (!vnode->cb_promised) {
-		_debug("not promised");
-		ret = afs_vnode_fetch_status(vnode, NULL, key);
+		ret = afs_validate(vnode, key);
 		if (ret < 0)
 			goto error;
-		_debug("new promise [fl=%lx]", vnode->flags);
-	}
 
-	/* check the permits to see if we've got one yet */
-	ret = afs_check_permit(vnode, key, &access);
-	if (ret < 0)
-		goto error;
+		/* check the permits to see if we've got one yet */
+		ret = afs_check_permit(vnode, key, &access);
+		if (ret < 0)
+			goto error;
+	}
 
 	/* interpret the access mask */
 	_debug("REQ %x ACC %x on %s",
 	       mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file");
 
+	ret = 0;
 	if (S_ISDIR(inode->i_mode)) {
-		if (mask & MAY_EXEC) {
+		if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) {
 			if (!(access & AFS_ACE_LOOKUP))
 				goto permission_denied;
-		} else if (mask & MAY_READ) {
-			if (!(access & AFS_ACE_READ))
-				goto permission_denied;
-		} else if (mask & MAY_WRITE) {
+		}
+		if (mask & MAY_WRITE) {
 			if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
-					AFS_ACE_INSERT | /* create, mkdir, symlink, rename to */
-					AFS_ACE_WRITE))) /* chmod */
+					AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */
 				goto permission_denied;
-		} else {
-			BUG();
 		}
 	} else {
 		if (!(access & AFS_ACE_LOOKUP))
 			goto permission_denied;
+		if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+			goto permission_denied;
 		if (mask & (MAY_EXEC | MAY_READ)) {
 			if (!(access & AFS_ACE_READ))
 				goto permission_denied;
+			if (!(inode->i_mode & S_IRUSR))
+				goto permission_denied;
 		} else if (mask & MAY_WRITE) {
 			if (!(access & AFS_ACE_WRITE))
 				goto permission_denied;
+			if (!(inode->i_mode & S_IWUSR))
+				goto permission_denied;
 		}
 	}
 
 	key_put(key);
-	ret = generic_permission(inode, mask);
 	_leave(" = %d", ret);
 	return ret;
 
@@ -361,3 +509,12 @@ error:
 	_leave(" = %d", ret);
 	return ret;
 }
+
+void __exit afs_clean_up_permit_cache(void)
+{
+	int i;
+
+	for (i = 0; i < HASH_SIZE(afs_permits_cache); i++)
+		WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i]));
+
+}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f342acf3547d..c4428ebddb1d 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -1,322 +1,631 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS server record management
  *
  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include "afs_fs.h"
 #include "internal.h"
+#include "protocol_yfs.h"
 
-static unsigned afs_server_timeout = 10;	/* server timeout in seconds */
+static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
+static atomic_t afs_server_debug_id;
 
-static void afs_reap_server(struct work_struct *);
+static void __afs_put_server(struct afs_net *, struct afs_server *);
+static void afs_server_timer(struct timer_list *timer);
+static void afs_server_destroyer(struct work_struct *work);
 
-/* tree of all the servers, indexed by IP address */
-static struct rb_root afs_servers = RB_ROOT;
-static DEFINE_RWLOCK(afs_servers_lock);
+/*
+ * Find a server by one of its addresses.
+ */
+struct afs_server *afs_find_server(const struct rxrpc_peer *peer)
+{
+	struct afs_server *server = (struct afs_server *)rxrpc_kernel_get_peer_data(peer);
 
-/* LRU list of all the servers not currently in use */
-static LIST_HEAD(afs_server_graveyard);
-static DEFINE_SPINLOCK(afs_server_graveyard_lock);
-static DECLARE_DELAYED_WORK(afs_server_reaper, afs_reap_server);
+	if (!server)
+		return NULL;
+	return afs_use_server(server, false, afs_server_trace_use_cm_call);
+}
 
 /*
- * install a server record in the master tree
+ * Look up a server by its UUID and mark it active.  The caller must hold
+ * cell->fs_lock.
  */
-static int afs_install_server(struct afs_server *server)
+static struct afs_server *afs_find_server_by_uuid(struct afs_cell *cell, const uuid_t *uuid)
 {
-	struct afs_server *xserver;
-	struct rb_node **pp, *p;
-	int ret;
+	struct afs_server *server;
+	struct rb_node *p;
+	int diff;
 
-	_enter("%p", server);
+	_enter("%pU", uuid);
 
-	write_lock(&afs_servers_lock);
+	p = cell->fs_servers.rb_node;
+	while (p) {
+		server = rb_entry(p, struct afs_server, uuid_rb);
 
-	ret = -EEXIST;
-	pp = &afs_servers.rb_node;
+		diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
+		if (diff < 0) {
+			p = p->rb_left;
+		} else if (diff > 0) {
+			p = p->rb_right;
+		} else {
+			if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags))
+				return NULL; /* Need a write lock */
+			afs_use_server(server, true, afs_server_trace_use_by_uuid);
+			return server;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Install a server record in the cell tree.  The caller must hold an exclusive
+ * lock on cell->fs_lock.
+ */
+static struct afs_server *afs_install_server(struct afs_cell *cell,
+					     struct afs_server **candidate)
+{
+	struct afs_server *server;
+	struct afs_net *net = cell->net;
+	struct rb_node **pp, *p;
+	int diff;
+
+	_enter("%p", candidate);
+
+	/* Firstly install the server in the UUID lookup tree */
+	pp = &cell->fs_servers.rb_node;
 	p = NULL;
 	while (*pp) {
 		p = *pp;
 		_debug("- consider %p", p);
-		xserver = rb_entry(p, struct afs_server, master_rb);
-		if (server->addr.s_addr < xserver->addr.s_addr)
+		server = rb_entry(p, struct afs_server, uuid_rb);
+		diff = memcmp(&(*candidate)->uuid, &server->uuid, sizeof(uuid_t));
+		if (diff < 0)
 			pp = &(*pp)->rb_left;
-		else if (server->addr.s_addr > xserver->addr.s_addr)
+		else if (diff > 0)
 			pp = &(*pp)->rb_right;
 		else
-			goto error;
+			goto exists;
 	}
 
-	rb_link_node(&server->master_rb, p, pp);
-	rb_insert_color(&server->master_rb, &afs_servers);
-	ret = 0;
+	server = *candidate;
+	*candidate = NULL;
+	rb_link_node(&server->uuid_rb, p, pp);
+	rb_insert_color(&server->uuid_rb, &cell->fs_servers);
+	write_seqlock(&net->fs_lock);
+	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
+	write_sequnlock(&net->fs_lock);
+
+	afs_get_cell(cell, afs_cell_trace_get_server);
 
-error:
-	write_unlock(&afs_servers_lock);
-	return ret;
+exists:
+	afs_use_server(server, true, afs_server_trace_use_install);
+	return server;
 }
 
 /*
- * allocate a new server record
+ * Allocate a new server record and mark it as active but uncreated.
  */
-static struct afs_server *afs_alloc_server(struct afs_cell *cell,
-					   const struct in_addr *addr)
+static struct afs_server *afs_alloc_server(struct afs_cell *cell, const uuid_t *uuid)
 {
 	struct afs_server *server;
+	struct afs_net *net = cell->net;
 
 	_enter("");
 
 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
-	if (server) {
-		atomic_set(&server->usage, 1);
-		server->cell = cell;
-
-		INIT_LIST_HEAD(&server->link);
-		INIT_LIST_HEAD(&server->grave);
-		init_rwsem(&server->sem);
-		spin_lock_init(&server->fs_lock);
-		server->fs_vnodes = RB_ROOT;
-		server->cb_promises = RB_ROOT;
-		spin_lock_init(&server->cb_lock);
-		init_waitqueue_head(&server->cb_break_waitq);
-		INIT_DELAYED_WORK(&server->cb_break_work,
-				  afs_dispatch_give_up_callbacks);
-
-		memcpy(&server->addr, addr, sizeof(struct in_addr));
-		server->addr.s_addr = addr->s_addr;
-		_leave(" = %p{%d}", server, atomic_read(&server->usage));
-	} else {
-		_leave(" = NULL [nomem]");
-	}
+	if (!server)
+		return NULL;
+
+	refcount_set(&server->ref, 1);
+	atomic_set(&server->active, 0);
+	__set_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+	server->debug_id = atomic_inc_return(&afs_server_debug_id);
+	server->uuid = *uuid;
+	rwlock_init(&server->fs_lock);
+	INIT_WORK(&server->destroyer, &afs_server_destroyer);
+	timer_setup(&server->timer, afs_server_timer, 0);
+	INIT_LIST_HEAD(&server->volumes);
+	init_waitqueue_head(&server->probe_wq);
+	mutex_init(&server->cm_token_lock);
+	INIT_LIST_HEAD(&server->probe_link);
+	INIT_HLIST_NODE(&server->proc_link);
+	spin_lock_init(&server->probe_lock);
+	server->cell = cell;
+	server->rtt = UINT_MAX;
+	server->service_id = FS_SERVICE;
+	server->probe_counter = 1;
+	server->probed_at = jiffies - LONG_MAX / 2;
+
+	afs_inc_servers_outstanding(net);
+	_leave(" = %p", server);
 	return server;
 }
 
 /*
- * get an FS-server record for a cell
+ * Look up an address record for a server
  */
-struct afs_server *afs_lookup_server(struct afs_cell *cell,
-				     const struct in_addr *addr)
+static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_server *server,
+						 struct key *key)
 {
-	struct afs_server *server, *candidate;
+	struct afs_vl_cursor vc;
+	struct afs_addr_list *alist = NULL;
+	int ret;
+
+	ret = -ERESTARTSYS;
+	if (afs_begin_vlserver_operation(&vc, server->cell, key)) {
+		while (afs_select_vlserver(&vc)) {
+			if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
+				alist = afs_yfsvl_get_endpoints(&vc, &server->uuid);
+			else
+				alist = afs_vl_get_addrs_u(&vc, &server->uuid);
+		}
 
-	_enter("%p,%pI4", cell, &addr->s_addr);
+		ret = afs_end_vlserver_operation(&vc);
+	}
 
-	/* quick scan of the list to see if we already have the server */
-	read_lock(&cell->servers_lock);
+	return ret < 0 ? ERR_PTR(ret) : alist;
+}
 
-	list_for_each_entry(server, &cell->servers, link) {
-		if (server->addr.s_addr == addr->s_addr)
-			goto found_server_quickly;
+/*
+ * Get or create a fileserver record and return it with an active-use count on
+ * it.
+ */
+struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
+				     const uuid_t *uuid, u32 addr_version)
+{
+	struct afs_addr_list *alist = NULL;
+	struct afs_server *server, *candidate = NULL;
+	bool creating = false;
+	int ret;
+
+	_enter("%p,%pU", cell->net, uuid);
+
+	down_read(&cell->fs_lock);
+	server = afs_find_server_by_uuid(cell, uuid);
+	/* Won't see servers marked uncreated. */
+	up_read(&cell->fs_lock);
+
+	if (server) {
+		timer_delete_sync(&server->timer);
+		if (test_bit(AFS_SERVER_FL_CREATING, &server->flags))
+			goto wait_for_creation;
+		if (server->addr_version != addr_version)
+			set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
+		return server;
 	}
-	read_unlock(&cell->servers_lock);
 
-	candidate = afs_alloc_server(cell, addr);
+	candidate = afs_alloc_server(cell, uuid);
 	if (!candidate) {
-		_leave(" = -ENOMEM");
+		afs_put_addrlist(alist, afs_alist_trace_put_server_oom);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	write_lock(&cell->servers_lock);
+	down_write(&cell->fs_lock);
+	server = afs_install_server(cell, &candidate);
+	if (test_bit(AFS_SERVER_FL_CREATING, &server->flags)) {
+		/* We need to wait for creation to complete. */
+		up_write(&cell->fs_lock);
+		goto wait_for_creation;
+	}
+	if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags)) {
+		set_bit(AFS_SERVER_FL_CREATING, &server->flags);
+		clear_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+		creating = true;
+	}
+	up_write(&cell->fs_lock);
+	timer_delete_sync(&server->timer);
+
+	/* If we get to create the server, we look up the addresses and then
+	 * immediately dispatch an asynchronous probe to each interface on the
+	 * fileserver.  This will make sure the repeat-probing service is
+	 * started.
+	 */
+	if (creating) {
+		alist = afs_vl_lookup_addrs(server, key);
+		if (IS_ERR(alist)) {
+			ret = PTR_ERR(alist);
+			goto create_failed;
+		}
+
+		ret = afs_fs_probe_fileserver(cell->net, server, alist, key);
+		if (ret)
+			goto create_failed;
+
+		clear_and_wake_up_bit(AFS_SERVER_FL_CREATING, &server->flags);
+	}
 
-	/* check the cell's server list again */
-	list_for_each_entry(server, &cell->servers, link) {
-		if (server->addr.s_addr == addr->s_addr)
-			goto found_server;
+out:
+	afs_put_addrlist(alist, afs_alist_trace_put_server_create);
+	if (candidate) {
+		kfree(rcu_access_pointer(server->endpoint_state));
+		kfree(candidate);
+		afs_dec_servers_outstanding(cell->net);
+	}
+	return server ?: ERR_PTR(ret);
+
+wait_for_creation:
+	afs_see_server(server, afs_server_trace_wait_create);
+	wait_on_bit(&server->flags, AFS_SERVER_FL_CREATING, TASK_UNINTERRUPTIBLE);
+	if (test_bit_acquire(AFS_SERVER_FL_UNCREATED, &server->flags)) {
+		/* Barrier: read flag before error */
+		ret = READ_ONCE(server->create_error);
+		afs_put_server(cell->net, server, afs_server_trace_unuse_create_fail);
+		server = NULL;
+		goto out;
 	}
 
-	_debug("new");
-	server = candidate;
-	if (afs_install_server(server) < 0)
-		goto server_in_two_cells;
+	ret = 0;
+	goto out;
 
-	afs_get_cell(cell);
-	list_add_tail(&server->link, &cell->servers);
+create_failed:
+	down_write(&cell->fs_lock);
 
-	write_unlock(&cell->servers_lock);
-	_leave(" = %p{%d}", server, atomic_read(&server->usage));
-	return server;
+	WRITE_ONCE(server->create_error, ret);
+	smp_wmb(); /* Barrier: set error before flag. */
+	set_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+
+	clear_and_wake_up_bit(AFS_SERVER_FL_CREATING, &server->flags);
 
-	/* found a matching server quickly */
-found_server_quickly:
-	_debug("found quickly");
-	afs_get_server(server);
-	read_unlock(&cell->servers_lock);
-no_longer_unused:
-	if (!list_empty(&server->grave)) {
-		spin_lock(&afs_server_graveyard_lock);
-		list_del_init(&server->grave);
-		spin_unlock(&afs_server_graveyard_lock);
+	if (test_bit(AFS_SERVER_FL_UNCREATED, &server->flags)) {
+		clear_bit(AFS_SERVER_FL_UNCREATED, &server->flags);
+		creating = true;
 	}
-	_leave(" = %p{%d}", server, atomic_read(&server->usage));
-	return server;
+	afs_unuse_server(cell->net, server, afs_server_trace_unuse_create_fail);
+	server = NULL;
 
-	/* found a matching server on the second pass */
-found_server:
-	_debug("found");
-	afs_get_server(server);
-	write_unlock(&cell->servers_lock);
-	kfree(candidate);
-	goto no_longer_unused;
-
-	/* found a server that seems to be in two cells */
-server_in_two_cells:
-	write_unlock(&cell->servers_lock);
-	kfree(candidate);
-	printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n",
-	       addr);
-	_leave(" = -EEXIST");
-	return ERR_PTR(-EEXIST);
+	up_write(&cell->fs_lock);
+	goto out;
 }
 
 /*
- * look up a server by its IP address
+ * Set/reduce a server's timer.
  */
-struct afs_server *afs_find_server(const struct in_addr *_addr)
+static void afs_set_server_timer(struct afs_server *server, unsigned int delay_secs)
 {
-	struct afs_server *server = NULL;
-	struct rb_node *p;
-	struct in_addr addr = *_addr;
-
-	_enter("%pI4", &addr.s_addr);
+	mod_timer(&server->timer, jiffies + delay_secs * HZ);
+}
 
-	read_lock(&afs_servers_lock);
+/*
+ * Get a reference on a server object.
+ */
+struct afs_server *afs_get_server(struct afs_server *server,
+				  enum afs_server_trace reason)
+{
+	unsigned int a;
+	int r;
 
-	p = afs_servers.rb_node;
-	while (p) {
-		server = rb_entry(p, struct afs_server, master_rb);
+	__refcount_inc(&server->ref, &r);
+	a = atomic_read(&server->active);
+	trace_afs_server(server->debug_id, r + 1, a, reason);
+	return server;
+}
 
-		_debug("- consider %p", p);
+/*
+ * Get an active count on a server object and maybe remove from the inactive
+ * list.
+ */
+struct afs_server *afs_use_server(struct afs_server *server, bool activate,
+				  enum afs_server_trace reason)
+{
+	unsigned int a;
+	int r;
 
-		if (addr.s_addr < server->addr.s_addr) {
-			p = p->rb_left;
-		} else if (addr.s_addr > server->addr.s_addr) {
-			p = p->rb_right;
-		} else {
-			afs_get_server(server);
-			goto found;
-		}
-	}
+	__refcount_inc(&server->ref, &r);
+	a = atomic_inc_return(&server->active);
+	if (a == 1 && activate &&
+	    !test_bit(AFS_SERVER_FL_EXPIRED, &server->flags))
+		timer_delete(&server->timer);
 
-	server = NULL;
-found:
-	read_unlock(&afs_servers_lock);
-	ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr);
-	_leave(" = %p", server);
+	trace_afs_server(server->debug_id, r + 1, a, reason);
 	return server;
 }
 
 /*
- * destroy a server record
- * - removes from the cell list
+ * Release a reference on a server record.
  */
-void afs_put_server(struct afs_server *server)
+void afs_put_server(struct afs_net *net, struct afs_server *server,
+		    enum afs_server_trace reason)
 {
+	unsigned int a, debug_id;
+	bool zero;
+	int r;
+
 	if (!server)
 		return;
 
-	_enter("%p{%d}", server, atomic_read(&server->usage));
+	debug_id = server->debug_id;
+	a = atomic_read(&server->active);
+	zero = __refcount_dec_and_test(&server->ref, &r);
+	trace_afs_server(debug_id, r - 1, a, reason);
+	if (unlikely(zero))
+		__afs_put_server(net, server);
+}
 
-	_debug("PUT SERVER %d", atomic_read(&server->usage));
+/*
+ * Drop an active count on a server object without updating the last-unused
+ * time.
+ */
+void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
+			     enum afs_server_trace reason)
+{
+	if (!server)
+		return;
+
+	if (atomic_dec_and_test(&server->active)) {
+		if (test_bit(AFS_SERVER_FL_EXPIRED, &server->flags) ||
+		    READ_ONCE(server->cell->state) >= AFS_CELL_REMOVING)
+			schedule_work(&server->destroyer);
+	}
 
-	ASSERTCMP(atomic_read(&server->usage), >, 0);
+	afs_put_server(net, server, reason);
+}
 
-	if (likely(!atomic_dec_and_test(&server->usage))) {
-		_leave("");
+/*
+ * Drop an active count on a server object.
+ */
+void afs_unuse_server(struct afs_net *net, struct afs_server *server,
+		      enum afs_server_trace reason)
+{
+	if (!server)
 		return;
+
+	if (atomic_dec_and_test(&server->active)) {
+		if (!test_bit(AFS_SERVER_FL_EXPIRED, &server->flags) &&
+		    READ_ONCE(server->cell->state) < AFS_CELL_REMOVING) {
+			time64_t unuse_time = ktime_get_real_seconds();
+
+			server->unuse_time = unuse_time;
+			afs_set_server_timer(server, afs_server_gc_delay);
+		} else {
+			schedule_work(&server->destroyer);
+		}
 	}
 
-	afs_flush_callback_breaks(server);
+	afs_put_server(net, server, reason);
+}
+
+static void afs_server_rcu(struct rcu_head *rcu)
+{
+	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
+
+	trace_afs_server(server->debug_id, refcount_read(&server->ref),
+			 atomic_read(&server->active), afs_server_trace_free);
+	afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state),
+			       afs_estate_trace_put_server);
+	afs_put_cell(server->cell, afs_cell_trace_put_server);
+	kfree(server->cm_rxgk_appdata.data);
+	kfree(server);
+}
+
+static void __afs_put_server(struct afs_net *net, struct afs_server *server)
+{
+	call_rcu(&server->rcu, afs_server_rcu);
+	afs_dec_servers_outstanding(net);
+}
+
+static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server)
+{
+	struct afs_endpoint_state *estate = rcu_access_pointer(server->endpoint_state);
+	struct afs_addr_list *alist = estate->addresses;
+
+	afs_fs_give_up_all_callbacks(net, server, &alist->addrs[alist->preferred], NULL);
+}
+
+/*
+ * Check to see if the server record has expired.
+ */
+static bool afs_has_server_expired(const struct afs_server *server)
+{
+	time64_t expires_at;
+
+	if (atomic_read(&server->active))
+		return false;
 
-	spin_lock(&afs_server_graveyard_lock);
-	if (atomic_read(&server->usage) == 0) {
-		list_move_tail(&server->grave, &afs_server_graveyard);
-		server->time_of_death = get_seconds();
-		queue_delayed_work(afs_wq, &afs_server_reaper,
-				   afs_server_timeout * HZ);
+	if (server->cell->net->live ||
+	    server->cell->state >= AFS_CELL_REMOVING) {
+		trace_afs_server(server->debug_id, refcount_read(&server->ref),
+				 0, afs_server_trace_purging);
+		return true;
 	}
-	spin_unlock(&afs_server_graveyard_lock);
-	_leave(" [dead]");
+
+	expires_at = server->unuse_time;
+	if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
+	    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
+		expires_at += afs_server_gc_delay;
+
+	return ktime_get_real_seconds() > expires_at;
 }
 
 /*
- * destroy a dead server
+ * Remove a server record from it's parent cell's database.
  */
-static void afs_destroy_server(struct afs_server *server)
+static bool afs_remove_server_from_cell(struct afs_server *server)
 {
-	_enter("%p", server);
+	struct afs_cell *cell = server->cell;
+
+	down_write(&cell->fs_lock);
 
-	ASSERTIF(server->cb_break_head != server->cb_break_tail,
-		 delayed_work_pending(&server->cb_break_work));
+	if (!afs_has_server_expired(server)) {
+		up_write(&cell->fs_lock);
+		return false;
+	}
 
-	ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL);
-	ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
-	ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
-	ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
+	set_bit(AFS_SERVER_FL_EXPIRED, &server->flags);
+	_debug("expire %pU %u", &server->uuid, atomic_read(&server->active));
+	afs_see_server(server, afs_server_trace_see_expired);
+	rb_erase(&server->uuid_rb, &cell->fs_servers);
+	up_write(&cell->fs_lock);
+	return true;
+}
 
-	afs_put_cell(server->cell);
-	kfree(server);
+static void afs_server_destroyer(struct work_struct *work)
+{
+	struct afs_endpoint_state *estate;
+	struct afs_server *server = container_of(work, struct afs_server, destroyer);
+	struct afs_net *net = server->cell->net;
+
+	afs_see_server(server, afs_server_trace_see_destroyer);
+
+	if (test_bit(AFS_SERVER_FL_EXPIRED, &server->flags))
+		return;
+
+	if (!afs_remove_server_from_cell(server))
+		return;
+
+	timer_shutdown_sync(&server->timer);
+	cancel_work(&server->destroyer);
+
+	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
+		afs_give_up_callbacks(net, server);
+
+	/* Unbind the rxrpc_peer records from the server. */
+	estate = rcu_access_pointer(server->endpoint_state);
+	if (estate)
+		afs_set_peer_appdata(server, estate->addresses, NULL);
+
+	write_seqlock(&net->fs_lock);
+	list_del_init(&server->probe_link);
+	if (!hlist_unhashed(&server->proc_link))
+		hlist_del_rcu(&server->proc_link);
+	write_sequnlock(&net->fs_lock);
+
+	afs_put_server(net, server, afs_server_trace_destroy);
+}
+
+static void afs_server_timer(struct timer_list *timer)
+{
+	struct afs_server *server = container_of(timer, struct afs_server, timer);
+
+	afs_see_server(server, afs_server_trace_see_timer);
+	if (!test_bit(AFS_SERVER_FL_EXPIRED, &server->flags))
+		schedule_work(&server->destroyer);
 }
 
 /*
- * reap dead server records
+ * Wake up all the servers in a cell so that they can purge themselves.
  */
-static void afs_reap_server(struct work_struct *work)
+void afs_purge_servers(struct afs_cell *cell)
 {
-	LIST_HEAD(corpses);
 	struct afs_server *server;
-	unsigned long delay, expiry;
-	time_t now;
-
-	now = get_seconds();
-	spin_lock(&afs_server_graveyard_lock);
-
-	while (!list_empty(&afs_server_graveyard)) {
-		server = list_entry(afs_server_graveyard.next,
-				    struct afs_server, grave);
-
-		/* the queue is ordered most dead first */
-		expiry = server->time_of_death + afs_server_timeout;
-		if (expiry > now) {
-			delay = (expiry - now) * HZ;
-			mod_delayed_work(afs_wq, &afs_server_reaper, delay);
-			break;
-		}
+	struct rb_node *rb;
 
-		write_lock(&server->cell->servers_lock);
-		write_lock(&afs_servers_lock);
-		if (atomic_read(&server->usage) > 0) {
-			list_del_init(&server->grave);
-		} else {
-			list_move_tail(&server->grave, &corpses);
-			list_del_init(&server->link);
-			rb_erase(&server->master_rb, &afs_servers);
-		}
-		write_unlock(&afs_servers_lock);
-		write_unlock(&server->cell->servers_lock);
+	down_read(&cell->fs_lock);
+	for (rb = rb_first(&cell->fs_servers); rb; rb = rb_next(rb)) {
+		server = rb_entry(rb, struct afs_server, uuid_rb);
+		afs_see_server(server, afs_server_trace_see_purge);
+		schedule_work(&server->destroyer);
 	}
+	up_read(&cell->fs_lock);
+}
+
+/*
+ * Wait for outstanding servers.
+ */
+void afs_wait_for_servers(struct afs_net *net)
+{
+	_enter("");
+
+	atomic_dec(&net->servers_outstanding);
+	wait_var_event(&net->servers_outstanding,
+		       !atomic_read(&net->servers_outstanding));
+	_leave("");
+}
+
+/*
+ * Get an update for a server's address list.
+ */
+static noinline bool afs_update_server_record(struct afs_operation *op,
+					      struct afs_server *server,
+					      struct key *key)
+{
+	struct afs_endpoint_state *estate;
+	struct afs_addr_list *alist;
+	bool has_addrs;
 
-	spin_unlock(&afs_server_graveyard_lock);
+	_enter("");
 
-	/* now reap the corpses we've extracted */
-	while (!list_empty(&corpses)) {
-		server = list_entry(corpses.next, struct afs_server, grave);
-		list_del(&server->grave);
-		afs_destroy_server(server);
+	trace_afs_server(server->debug_id, refcount_read(&server->ref),
+			 atomic_read(&server->active),
+			 afs_server_trace_update);
+
+	alist = afs_vl_lookup_addrs(server, op->key);
+	if (IS_ERR(alist)) {
+		rcu_read_lock();
+		estate = rcu_dereference(server->endpoint_state);
+		has_addrs = estate->addresses;
+		rcu_read_unlock();
+
+		if ((PTR_ERR(alist) == -ERESTARTSYS ||
+		     PTR_ERR(alist) == -EINTR) &&
+		    (op->flags & AFS_OPERATION_UNINTR) &&
+		    has_addrs) {
+			_leave(" = t [intr]");
+			return true;
+		}
+		afs_op_set_error(op, PTR_ERR(alist));
+		_leave(" = f [%d]", afs_op_error(op));
+		return false;
 	}
+
+	if (server->addr_version != alist->version)
+		afs_fs_probe_fileserver(op->net, server, alist, key);
+
+	afs_put_addrlist(alist, afs_alist_trace_put_server_update);
+	_leave(" = t");
+	return true;
 }
 
 /*
- * discard all the server records for rmmod
+ * See if a server's address list needs updating.
  */
-void __exit afs_purge_servers(void)
+bool afs_check_server_record(struct afs_operation *op, struct afs_server *server,
+			     struct key *key)
 {
-	afs_server_timeout = 0;
-	mod_delayed_work(afs_wq, &afs_server_reaper, 0);
+	bool success;
+	int ret, retries = 0;
+
+	_enter("");
+
+	ASSERT(server);
+
+retry:
+	if (test_bit(AFS_SERVER_FL_UPDATING, &server->flags))
+		goto wait;
+	if (test_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags))
+		goto update;
+	_leave(" = t [good]");
+	return true;
+
+update:
+	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
+		clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
+		success = afs_update_server_record(op, server, key);
+		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
+		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
+		_leave(" = %d", success);
+		return success;
+	}
+
+wait:
+	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
+			  (op->flags & AFS_OPERATION_UNINTR) ?
+			  TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
+	if (ret == -ERESTARTSYS) {
+		afs_op_set_error(op, ret);
+		_leave(" = f [intr]");
+		return false;
+	}
+
+	retries++;
+	if (retries == 4) {
+		_leave(" = f [stale]");
+		ret = -ESTALE;
+		return false;
+	}
+	goto retry;
 }
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
new file mode 100644
index 000000000000..20d5474837df
--- /dev/null
+++ b/fs/afs/server_list.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS fileserver list management.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
+{
+	int i;
+
+	if (slist && refcount_dec_and_test(&slist->usage)) {
+		for (i = 0; i < slist->nr_servers; i++)
+			afs_unuse_server(net, slist->servers[i].server,
+					 afs_server_trace_unuse_slist);
+		kfree_rcu(slist, rcu);
+	}
+}
+
+/*
+ * Build a server list from a VLDB record.
+ */
+struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
+					      struct key *key,
+					      struct afs_vldb_entry *vldb)
+{
+	struct afs_server_list *slist;
+	struct afs_server *server;
+	unsigned int type_mask = 1 << volume->type;
+	bool use_newrepsites = false;
+	int ret = -ENOMEM, nr_servers = 0, newrep = 0, i, j, usable = 0;
+
+	/* Work out if we're going to restrict to NEWREPSITE-marked servers or
+	 * not.  If at least one site is marked as NEWREPSITE, then it's likely
+	 * that "vos release" is busy updating RO sites.  We cut over from one
+	 * to the other when >=50% of the sites have been updated.  Sites that
+	 * are in the process of being updated are marked DONTUSE.
+	 */
+	for (i = 0; i < vldb->nr_servers; i++) {
+		if (!(vldb->fs_mask[i] & type_mask))
+			continue;
+		nr_servers++;
+		if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
+			continue;
+		usable++;
+		if (vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE)
+			newrep++;
+	}
+
+	slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
+	if (!slist)
+		goto error;
+
+	if (newrep) {
+		if (newrep < usable / 2) {
+			slist->ro_replicating = AFS_RO_REPLICATING_USE_OLD;
+		} else {
+			slist->ro_replicating = AFS_RO_REPLICATING_USE_NEW;
+			use_newrepsites = true;
+		}
+	}
+
+	refcount_set(&slist->usage, 1);
+	rwlock_init(&slist->lock);
+
+	/* Make sure a records exists for each server in the list. */
+	for (i = 0; i < vldb->nr_servers; i++) {
+		unsigned long se_flags = 0;
+		bool newrepsite = vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE;
+
+		if (!(vldb->fs_mask[i] & type_mask))
+			continue;
+		if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
+			__set_bit(AFS_SE_EXCLUDED, &se_flags);
+		if (newrep && (newrepsite ^ use_newrepsites))
+			__set_bit(AFS_SE_EXCLUDED, &se_flags);
+
+		server = afs_lookup_server(volume->cell, key, &vldb->fs_server[i],
+					   vldb->addr_version[i]);
+		if (IS_ERR(server)) {
+			ret = PTR_ERR(server);
+			if (ret == -ENOENT ||
+			    ret == -ENOMEDIUM)
+				continue;
+			goto error_2;
+		}
+
+		/* Insertion-sort by UUID */
+		for (j = 0; j < slist->nr_servers; j++)
+			if (memcmp(&slist->servers[j].server->uuid,
+				   &server->uuid,
+				   sizeof(server->uuid)) >= 0)
+				break;
+		if (j < slist->nr_servers) {
+			if (slist->servers[j].server == server) {
+				afs_unuse_server_notime(volume->cell->net, server,
+							afs_server_trace_unuse_slist_isort);
+				continue;
+			}
+
+			memmove(slist->servers + j + 1,
+				slist->servers + j,
+				(slist->nr_servers - j) * sizeof(struct afs_server_entry));
+		}
+
+		slist->servers[j].server = server;
+		slist->servers[j].volume = volume;
+		slist->servers[j].flags = se_flags;
+		slist->servers[j].cb_expires_at = AFS_NO_CB_PROMISE;
+		slist->nr_servers++;
+	}
+
+	if (slist->nr_servers == 0) {
+		ret = -EDESTADDRREQ;
+		goto error_2;
+	}
+
+	return slist;
+
+error_2:
+	afs_put_serverlist(volume->cell->net, slist);
+error:
+	return ERR_PTR(ret);
+}
+
+/*
+ * Copy the annotations from an old server list to its potential replacement.
+ */
+bool afs_annotate_server_list(struct afs_server_list *new,
+			      struct afs_server_list *old)
+{
+	unsigned long mask = 1UL << AFS_SE_EXCLUDED;
+	int i;
+
+	if (old->nr_servers != new->nr_servers ||
+	    old->ro_replicating != new->ro_replicating)
+		goto changed;
+
+	for (i = 0; i < old->nr_servers; i++) {
+		if (old->servers[i].server != new->servers[i].server)
+			goto changed;
+		if ((old->servers[i].flags & mask) != (new->servers[i].flags & mask))
+			goto changed;
+	}
+	return false;
+changed:
+	return true;
+}
+
+/*
+ * Attach a volume to the servers it is going to use.
+ */
+void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist)
+{
+	struct afs_server_entry *se, *pe;
+	struct afs_server *server;
+	struct list_head *p;
+	unsigned int i;
+
+	down_write(&volume->cell->vs_lock);
+
+	for (i = 0; i < slist->nr_servers; i++) {
+		se = &slist->servers[i];
+		server = se->server;
+
+		list_for_each(p, &server->volumes) {
+			pe = list_entry(p, struct afs_server_entry, slink);
+			if (volume->vid <= pe->volume->vid)
+				break;
+		}
+		list_add_tail(&se->slink, p);
+	}
+
+	slist->attached = true;
+	up_write(&volume->cell->vs_lock);
+}
+
+/*
+ * Reattach a volume to the servers it is going to use when server list is
+ * replaced.  We try to switch the attachment points to avoid rewalking the
+ * lists.
+ */
+void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *new,
+				    struct afs_server_list *old)
+{
+	unsigned int n = 0, o = 0;
+
+	down_write(&volume->cell->vs_lock);
+
+	while (n < new->nr_servers || o < old->nr_servers) {
+		struct afs_server_entry *pn = n < new->nr_servers ? &new->servers[n] : NULL;
+		struct afs_server_entry *po = o < old->nr_servers ? &old->servers[o] : NULL;
+		struct afs_server_entry *s;
+		struct list_head *p;
+		int diff;
+
+		if (pn && po && pn->server == po->server) {
+			pn->cb_expires_at = po->cb_expires_at;
+			list_replace(&po->slink, &pn->slink);
+			n++;
+			o++;
+			continue;
+		}
+
+		if (pn && po)
+			diff = memcmp(&pn->server->uuid, &po->server->uuid,
+				      sizeof(pn->server->uuid));
+		else
+			diff = pn ? -1 : 1;
+
+		if (diff < 0) {
+			list_for_each(p, &pn->server->volumes) {
+				s = list_entry(p, struct afs_server_entry, slink);
+				if (volume->vid <= s->volume->vid)
+					break;
+			}
+			list_add_tail(&pn->slink, p);
+			n++;
+		} else {
+			list_del(&po->slink);
+			o++;
+		}
+	}
+
+	up_write(&volume->cell->vs_lock);
+}
+
+/*
+ * Detach a volume from the servers it has been using.
+ */
+void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist)
+{
+	unsigned int i;
+
+	if (!slist->attached)
+		return;
+
+	down_write(&volume->cell->vs_lock);
+
+	for (i = 0; i < slist->nr_servers; i++)
+		list_del(&slist->servers[i].slink);
+
+	slist->attached = false;
+	up_write(&volume->cell->vs_lock);
+}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index c4861557e385..d672b7ab57ae 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -1,6 +1,6 @@
 /* AFS superblock handling
  *
- * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved.
  *
  * This software may be freely redistributed under the terms of the
  * GNU General Public License.
@@ -21,58 +21,73 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
 #include <linux/statfs.h>
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
+#include <linux/magic.h>
 #include <net/net_namespace.h>
 #include "internal.h"
 
-#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
-
 static void afs_i_init_once(void *foo);
-static struct dentry *afs_mount(struct file_system_type *fs_type,
-		      int flags, const char *dev_name, void *data);
 static void afs_kill_super(struct super_block *sb);
 static struct inode *afs_alloc_inode(struct super_block *sb);
 static void afs_destroy_inode(struct inode *inode);
+static void afs_free_inode(struct inode *inode);
 static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
+static int afs_show_devname(struct seq_file *m, struct dentry *root);
+static int afs_show_options(struct seq_file *m, struct dentry *root);
+static int afs_init_fs_context(struct fs_context *fc);
+static const struct fs_parameter_spec afs_fs_parameters[];
 
 struct file_system_type afs_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "afs",
-	.mount		= afs_mount,
-	.kill_sb	= afs_kill_super,
-	.fs_flags	= 0,
+	.owner			= THIS_MODULE,
+	.name			= "afs",
+	.init_fs_context	= afs_init_fs_context,
+	.parameters		= afs_fs_parameters,
+	.kill_sb		= afs_kill_super,
+	.fs_flags		= FS_RENAME_DOES_D_MOVE,
 };
 MODULE_ALIAS_FS("afs");
 
+int afs_net_id;
+
 static const struct super_operations afs_super_ops = {
 	.statfs		= afs_statfs,
 	.alloc_inode	= afs_alloc_inode,
+	.write_inode	= netfs_unpin_writeback,
 	.drop_inode	= afs_drop_inode,
 	.destroy_inode	= afs_destroy_inode,
+	.free_inode	= afs_free_inode,
 	.evict_inode	= afs_evict_inode,
-	.show_options	= generic_show_options,
+	.show_devname	= afs_show_devname,
+	.show_options	= afs_show_options,
 };
 
 static struct kmem_cache *afs_inode_cachep;
 static atomic_t afs_count_active_inodes;
 
-enum {
-	afs_no_opt,
-	afs_opt_cell,
-	afs_opt_rwpath,
-	afs_opt_vol,
-	afs_opt_autocell,
+enum afs_param {
+	Opt_autocell,
+	Opt_dyn,
+	Opt_flock,
+	Opt_source,
 };
 
-static const match_table_t afs_options_list = {
-	{ afs_opt_cell,		"cell=%s"	},
-	{ afs_opt_rwpath,	"rwpath"	},
-	{ afs_opt_vol,		"vol=%s"	},
-	{ afs_opt_autocell,	"autocell"	},
-	{ afs_no_opt,		NULL		},
+static const struct constant_table afs_param_flock[] = {
+	{"local",	afs_flock_mode_local },
+	{"openafs",	afs_flock_mode_openafs },
+	{"strict",	afs_flock_mode_strict },
+	{"write",	afs_flock_mode_write },
+	{}
+};
+
+static const struct fs_parameter_spec afs_fs_parameters[] = {
+	fsparam_flag  ("autocell",	Opt_autocell),
+	fsparam_flag  ("dyn",		Opt_dyn),
+	fsparam_enum  ("flock",		Opt_flock, afs_param_flock),
+	fsparam_string("source",	Opt_source),
+	{}
 };
 
 /*
@@ -91,7 +106,7 @@ int __init afs_fs_init(void)
 	afs_inode_cachep = kmem_cache_create("afs_inode_cache",
 					     sizeof(struct afs_vnode),
 					     0,
-					     SLAB_HWCACHE_ALIGN,
+					     SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT,
 					     afs_i_init_once);
 	if (!afs_inode_cachep) {
 		printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n");
@@ -113,7 +128,7 @@ int __init afs_fs_init(void)
 /*
  * clean up the filesystem
  */
-void __exit afs_fs_exit(void)
+void afs_fs_exit(void)
 {
 	_enter("");
 
@@ -136,150 +151,265 @@ void __exit afs_fs_exit(void)
 }
 
 /*
- * parse the mount options
- * - this function has been shamelessly adapted from the ext3 fs which
- *   shamelessly adapted it from the msdos fs
+ * Display the mount device name in /proc/mounts.
  */
-static int afs_parse_options(struct afs_mount_params *params,
-			     char *options, const char **devname)
+static int afs_show_devname(struct seq_file *m, struct dentry *root)
 {
-	struct afs_cell *cell;
-	substring_t args[MAX_OPT_ARGS];
-	char *p;
-	int token;
-
-	_enter("%s", options);
-
-	options[PAGE_SIZE - 1] = 0;
-
-	while ((p = strsep(&options, ","))) {
-		if (!*p)
-			continue;
-
-		token = match_token(p, afs_options_list, args);
-		switch (token) {
-		case afs_opt_cell:
-			cell = afs_cell_lookup(args[0].from,
-					       args[0].to - args[0].from,
-					       false);
-			if (IS_ERR(cell))
-				return PTR_ERR(cell);
-			afs_put_cell(params->cell);
-			params->cell = cell;
-			break;
-
-		case afs_opt_rwpath:
-			params->rwpath = 1;
-			break;
-
-		case afs_opt_vol:
-			*devname = args[0].from;
-			break;
-
-		case afs_opt_autocell:
-			params->autocell = 1;
-			break;
-
-		default:
-			printk(KERN_ERR "kAFS:"
-			       " Unknown or invalid mount option: '%s'\n", p);
-			return -EINVAL;
-		}
+	struct afs_super_info *as = AFS_FS_S(root->d_sb);
+	struct afs_volume *volume = as->volume;
+	struct afs_cell *cell = as->cell;
+	const char *suf = "";
+	char pref = '%';
+
+	if (as->dyn_root) {
+		seq_puts(m, "none");
+		return 0;
 	}
 
-	_leave(" = 0");
+	switch (volume->type) {
+	case AFSVL_RWVOL:
+		break;
+	case AFSVL_ROVOL:
+		pref = '#';
+		if (volume->type_force)
+			suf = ".readonly";
+		break;
+	case AFSVL_BACKVOL:
+		pref = '#';
+		suf = ".backup";
+		break;
+	}
+
+	seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf);
 	return 0;
 }
 
 /*
- * parse a device name to get cell name, volume name, volume type and R/W
- * selector
- * - this can be one of the following:
+ * Display the mount options in /proc/mounts.
+ */
+static int afs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct afs_super_info *as = AFS_FS_S(root->d_sb);
+	const char *p = NULL;
+
+	if (as->dyn_root)
+		seq_puts(m, ",dyn");
+	switch (as->flock_mode) {
+	case afs_flock_mode_unset:	break;
+	case afs_flock_mode_local:	p = "local";	break;
+	case afs_flock_mode_openafs:	p = "openafs";	break;
+	case afs_flock_mode_strict:	p = "strict";	break;
+	case afs_flock_mode_write:	p = "write";	break;
+	}
+	if (p)
+		seq_printf(m, ",flock=%s", p);
+
+	return 0;
+}
+
+/*
+ * Parse the source name to get cell name, volume name, volume type and R/W
+ * selector.
+ *
+ * This can be one of the following:
  *	"%[cell:]volume[.]"		R/W volume
- *	"#[cell:]volume[.]"		R/O or R/W volume (rwpath=0),
- *					 or R/W (rwpath=1) volume
+ *	"#[cell:]volume[.]"		R/O or R/W volume (R/O parent),
+ *					 or R/W (R/W parent) volume
  *	"%[cell:]volume.readonly"	R/O volume
  *	"#[cell:]volume.readonly"	R/O volume
  *	"%[cell:]volume.backup"		Backup volume
  *	"#[cell:]volume.backup"		Backup volume
  */
-static int afs_parse_device_name(struct afs_mount_params *params,
-				 const char *name)
+static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
 {
+	struct afs_fs_context *ctx = fc->fs_private;
 	struct afs_cell *cell;
-	const char *cellname, *suffix;
+	const char *cellname, *suffix, *name = param->string;
 	int cellnamesz;
 
 	_enter(",%s", name);
 
+	if (fc->source)
+		return invalf(fc, "kAFS: Multiple sources not supported");
+
 	if (!name) {
 		printk(KERN_ERR "kAFS: no volume name specified\n");
 		return -EINVAL;
 	}
 
 	if ((name[0] != '%' && name[0] != '#') || !name[1]) {
+		/* To use dynroot, we don't want to have to provide a source */
+		if (strcmp(name, "none") == 0) {
+			ctx->no_cell = true;
+			return 0;
+		}
 		printk(KERN_ERR "kAFS: unparsable volume name\n");
 		return -EINVAL;
 	}
 
 	/* determine the type of volume we're looking for */
-	params->type = AFSVL_ROVOL;
-	params->force = false;
-	if (params->rwpath || name[0] == '%') {
-		params->type = AFSVL_RWVOL;
-		params->force = true;
+	if (name[0] == '%') {
+		ctx->type = AFSVL_RWVOL;
+		ctx->force = true;
 	}
 	name++;
 
 	/* split the cell name out if there is one */
-	params->volname = strchr(name, ':');
-	if (params->volname) {
+	ctx->volname = strchr(name, ':');
+	if (ctx->volname) {
 		cellname = name;
-		cellnamesz = params->volname - name;
-		params->volname++;
+		cellnamesz = ctx->volname - name;
+		ctx->volname++;
 	} else {
-		params->volname = name;
+		ctx->volname = name;
 		cellname = NULL;
 		cellnamesz = 0;
 	}
 
 	/* the volume type is further affected by a possible suffix */
-	suffix = strrchr(params->volname, '.');
+	suffix = strrchr(ctx->volname, '.');
 	if (suffix) {
 		if (strcmp(suffix, ".readonly") == 0) {
-			params->type = AFSVL_ROVOL;
-			params->force = true;
+			ctx->type = AFSVL_ROVOL;
+			ctx->force = true;
 		} else if (strcmp(suffix, ".backup") == 0) {
-			params->type = AFSVL_BACKVOL;
-			params->force = true;
+			ctx->type = AFSVL_BACKVOL;
+			ctx->force = true;
 		} else if (suffix[1] == 0) {
 		} else {
 			suffix = NULL;
 		}
 	}
 
-	params->volnamesz = suffix ?
-		suffix - params->volname : strlen(params->volname);
+	ctx->volnamesz = suffix ?
+		suffix - ctx->volname : strlen(ctx->volname);
 
 	_debug("cell %*.*s [%p]",
-	       cellnamesz, cellnamesz, cellname ?: "", params->cell);
+	       cellnamesz, cellnamesz, cellname ?: "", ctx->cell);
 
 	/* lookup the cell record */
-	if (cellname || !params->cell) {
-		cell = afs_cell_lookup(cellname, cellnamesz, true);
+	if (cellname) {
+		cell = afs_lookup_cell(ctx->net, cellname, cellnamesz,
+				       NULL, AFS_LOOKUP_CELL_DIRECT_MOUNT,
+				       afs_cell_trace_use_lookup_mount);
 		if (IS_ERR(cell)) {
-			printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n",
+			pr_err("kAFS: unable to lookup cell '%*.*s'\n",
 			       cellnamesz, cellnamesz, cellname ?: "");
 			return PTR_ERR(cell);
 		}
-		afs_put_cell(params->cell);
-		params->cell = cell;
+		afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_parse);
+		afs_see_cell(cell, afs_cell_trace_see_source);
+		ctx->cell = cell;
 	}
 
 	_debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
-	       params->cell->name, params->cell,
-	       params->volnamesz, params->volnamesz, params->volname,
-	       suffix ?: "-", params->type, params->force ? " FORCE" : "");
+	       ctx->cell->name, ctx->cell,
+	       ctx->volnamesz, ctx->volnamesz, ctx->volname,
+	       suffix ?: "-", ctx->type, ctx->force ? " FORCE" : "");
+
+	fc->source = param->string;
+	param->string = NULL;
+	return 0;
+}
+
+/*
+ * Parse a single mount parameter.
+ */
+static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct fs_parse_result result;
+	struct afs_fs_context *ctx = fc->fs_private;
+	int opt;
+
+	opt = fs_parse(fc, afs_fs_parameters, param, &result);
+	if (opt < 0)
+		return opt;
+
+	switch (opt) {
+	case Opt_source:
+		return afs_parse_source(fc, param);
+
+	case Opt_autocell:
+		ctx->autocell = true;
+		break;
+
+	case Opt_dyn:
+		ctx->dyn_root = true;
+		break;
+
+	case Opt_flock:
+		ctx->flock_mode = result.uint_32;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * Validate the options, get the cell key and look up the volume.
+ */
+static int afs_validate_fc(struct fs_context *fc)
+{
+	struct afs_fs_context *ctx = fc->fs_private;
+	struct afs_volume *volume;
+	struct afs_cell *cell;
+	struct key *key;
+	int ret;
+
+	if (!ctx->dyn_root) {
+		if (ctx->no_cell) {
+			pr_warn("kAFS: Can only specify source 'none' with -o dyn\n");
+			return -EINVAL;
+		}
+
+		if (!ctx->cell) {
+			pr_warn("kAFS: No cell specified\n");
+			return -EDESTADDRREQ;
+		}
+
+	reget_key:
+		/* We try to do the mount securely. */
+		key = afs_request_key(ctx->cell);
+		if (IS_ERR(key))
+			return PTR_ERR(key);
+
+		ctx->key = key;
+
+		if (ctx->volume) {
+			afs_put_volume(ctx->volume, afs_volume_trace_put_validate_fc);
+			ctx->volume = NULL;
+		}
+
+		if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &ctx->cell->flags)) {
+			ret = afs_cell_detect_alias(ctx->cell, key);
+			if (ret < 0)
+				return ret;
+			if (ret == 1) {
+				_debug("switch to alias");
+				key_put(ctx->key);
+				ctx->key = NULL;
+				cell = afs_use_cell(ctx->cell->alias_of,
+						    afs_cell_trace_use_fc_alias);
+				afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc);
+				ctx->cell = cell;
+				goto reget_key;
+			}
+		}
+
+		volume = afs_create_volume(ctx);
+		if (IS_ERR(volume))
+			return PTR_ERR(volume);
+
+		ctx->volume = volume;
+		if (volume->type != AFSVL_RWVOL) {
+			ctx->flock_mode = afs_flock_mode_local;
+			fc->sb_flags |= SB_RDONLY;
+		}
+	}
 
 	return 0;
 }
@@ -287,58 +417,77 @@ static int afs_parse_device_name(struct afs_mount_params *params,
 /*
  * check a superblock to see if it's the one we're looking for
  */
-static int afs_test_super(struct super_block *sb, void *data)
+static int afs_test_super(struct super_block *sb, struct fs_context *fc)
+{
+	struct afs_fs_context *ctx = fc->fs_private;
+	struct afs_super_info *as = AFS_FS_S(sb);
+
+	return (as->net_ns == fc->net_ns &&
+		as->volume &&
+		as->volume->vid == ctx->volume->vid &&
+		as->cell == ctx->cell &&
+		!as->dyn_root);
+}
+
+static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc)
 {
-	struct afs_super_info *as1 = data;
-	struct afs_super_info *as = sb->s_fs_info;
+	struct afs_super_info *as = AFS_FS_S(sb);
 
-	return as->volume == as1->volume;
+	return (as->net_ns == fc->net_ns &&
+		as->dyn_root);
 }
 
-static int afs_set_super(struct super_block *sb, void *data)
+static int afs_set_super(struct super_block *sb, struct fs_context *fc)
 {
-	sb->s_fs_info = data;
 	return set_anon_super(sb, NULL);
 }
 
 /*
  * fill in the superblock
  */
-static int afs_fill_super(struct super_block *sb,
-			  struct afs_mount_params *params)
+static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
 {
-	struct afs_super_info *as = sb->s_fs_info;
-	struct afs_fid fid;
+	struct afs_super_info *as = AFS_FS_S(sb);
 	struct inode *inode = NULL;
 	int ret;
 
 	_enter("");
 
 	/* fill in the superblock */
-	sb->s_blocksize		= PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits	= PAGE_CACHE_SHIFT;
+	sb->s_blocksize		= PAGE_SIZE;
+	sb->s_blocksize_bits	= PAGE_SHIFT;
+	sb->s_maxbytes		= MAX_LFS_FILESIZE;
 	sb->s_magic		= AFS_FS_MAGIC;
 	sb->s_op		= &afs_super_ops;
-	sb->s_bdi		= &as->volume->bdi;
-	strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id));
+	if (!as->dyn_root)
+		sb->s_xattr	= afs_xattr_handlers;
+	ret = super_setup_bdi(sb);
+	if (ret)
+		return ret;
 
 	/* allocate the root inode and dentry */
-	fid.vid		= as->volume->vid;
-	fid.vnode	= 1;
-	fid.unique	= 1;
-	inode = afs_iget(sb, params->key, &fid, NULL, NULL);
+	if (as->dyn_root) {
+		inode = afs_dynroot_iget_root(sb);
+	} else {
+		sprintf(sb->s_id, "%llu", as->volume->vid);
+		afs_activate_volume(as->volume);
+		inode = afs_root_iget(sb, ctx->key);
+	}
+
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	if (params->autocell)
-		set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
-
 	ret = -ENOMEM;
 	sb->s_root = d_make_root(inode);
 	if (!sb->s_root)
 		goto error;
 
-	sb->s_d_op = &afs_fs_dentry_operations;
+	if (as->dyn_root) {
+		set_default_d_op(sb, &afs_dynroot_dentry_operations);
+	} else {
+		set_default_d_op(sb, &afs_fs_dentry_operations);
+		rcu_assign_pointer(as->volume->sb, sb);
+	}
 
 	_leave(" = 0");
 	return 0;
@@ -348,130 +497,171 @@ error:
 	return ret;
 }
 
-/*
- * get an AFS superblock
- */
-static struct dentry *afs_mount(struct file_system_type *fs_type,
-		      int flags, const char *dev_name, void *options)
+static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
 {
-	struct afs_mount_params params;
-	struct super_block *sb;
-	struct afs_volume *vol;
-	struct key *key;
-	char *new_opts = kstrdup(options, GFP_KERNEL);
+	struct afs_fs_context *ctx = fc->fs_private;
 	struct afs_super_info *as;
-	int ret;
 
-	_enter(",,%s,%p", dev_name, options);
+	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
+	if (as) {
+		as->net_ns = get_net(fc->net_ns);
+		as->flock_mode = ctx->flock_mode;
+		if (ctx->dyn_root) {
+			as->dyn_root = true;
+		} else {
+			as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi);
+			as->volume = afs_get_volume(ctx->volume,
+						    afs_volume_trace_get_alloc_sbi);
+		}
+	}
+	return as;
+}
 
-	memset(&params, 0, sizeof(params));
+static void afs_destroy_sbi(struct afs_super_info *as)
+{
+	if (as) {
+		afs_put_volume(as->volume, afs_volume_trace_put_destroy_sbi);
+		afs_unuse_cell(as->cell, afs_cell_trace_unuse_sbi);
+		put_net(as->net_ns);
+		kfree(as);
+	}
+}
 
-	ret = -EINVAL;
-	if (current->nsproxy->net_ns != &init_net)
-		goto error;
+static void afs_kill_super(struct super_block *sb)
+{
+	struct afs_super_info *as = AFS_FS_S(sb);
 
-	/* parse the options and device name */
-	if (options) {
-		ret = afs_parse_options(&params, options, &dev_name);
-		if (ret < 0)
-			goto error;
-	}
+	/* Clear the callback interests (which will do ilookup5) before
+	 * deactivating the superblock.
+	 */
+	if (as->volume)
+		rcu_assign_pointer(as->volume->sb, NULL);
+	kill_anon_super(sb);
+	if (as->volume)
+		afs_deactivate_volume(as->volume);
+	afs_destroy_sbi(as);
+}
 
-	ret = afs_parse_device_name(&params, dev_name);
-	if (ret < 0)
-		goto error;
+/*
+ * Get an AFS superblock and root directory.
+ */
+static int afs_get_tree(struct fs_context *fc)
+{
+	struct afs_fs_context *ctx = fc->fs_private;
+	struct super_block *sb;
+	struct afs_super_info *as;
+	int ret;
 
-	/* try and do the mount securely */
-	key = afs_request_key(params.cell);
-	if (IS_ERR(key)) {
-		_leave(" = %ld [key]", PTR_ERR(key));
-		ret = PTR_ERR(key);
+	ret = afs_validate_fc(fc);
+	if (ret)
 		goto error;
-	}
-	params.key = key;
 
-	/* parse the device name */
-	vol = afs_volume_lookup(&params);
-	if (IS_ERR(vol)) {
-		ret = PTR_ERR(vol);
-		goto error;
-	}
+	_enter("");
 
 	/* allocate a superblock info record */
-	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
-	if (!as) {
-		ret = -ENOMEM;
-		afs_put_volume(vol);
+	ret = -ENOMEM;
+	as = afs_alloc_sbi(fc);
+	if (!as)
 		goto error;
-	}
-	as->volume = vol;
+	fc->s_fs_info = as;
 
 	/* allocate a deviceless superblock */
-	sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
+	sb = sget_fc(fc,
+		     as->dyn_root ? afs_dynroot_test_super : afs_test_super,
+		     afs_set_super);
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
-		afs_put_volume(vol);
-		kfree(as);
 		goto error;
 	}
 
 	if (!sb->s_root) {
 		/* initial superblock/root creation */
 		_debug("create");
-		ret = afs_fill_super(sb, &params);
-		if (ret < 0) {
-			deactivate_locked_super(sb);
-			goto error;
-		}
-		save_mount_options(sb, new_opts);
-		sb->s_flags |= MS_ACTIVE;
+		ret = afs_fill_super(sb, ctx);
+		if (ret < 0)
+			goto error_sb;
+		sb->s_flags |= SB_ACTIVE;
 	} else {
 		_debug("reuse");
-		ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
-		afs_put_volume(vol);
-		kfree(as);
+		ASSERTCMP(sb->s_flags, &, SB_ACTIVE);
 	}
 
-	afs_put_cell(params.cell);
-	kfree(new_opts);
+	fc->root = dget(sb->s_root);
+	trace_afs_get_tree(as->cell, as->volume);
 	_leave(" = 0 [%p]", sb);
-	return dget(sb->s_root);
+	return 0;
 
+error_sb:
+	deactivate_locked_super(sb);
 error:
-	afs_put_cell(params.cell);
-	key_put(params.key);
-	kfree(new_opts);
 	_leave(" = %d", ret);
-	return ERR_PTR(ret);
+	return ret;
 }
 
-static void afs_kill_super(struct super_block *sb)
+static void afs_free_fc(struct fs_context *fc)
 {
-	struct afs_super_info *as = sb->s_fs_info;
-	kill_anon_super(sb);
-	afs_put_volume(as->volume);
-	kfree(as);
+	struct afs_fs_context *ctx = fc->fs_private;
+
+	afs_destroy_sbi(fc->s_fs_info);
+	afs_put_volume(ctx->volume, afs_volume_trace_put_free_fc);
+	afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc);
+	key_put(ctx->key);
+	kfree(ctx);
 }
 
+static const struct fs_context_operations afs_context_ops = {
+	.free		= afs_free_fc,
+	.parse_param	= afs_parse_param,
+	.get_tree	= afs_get_tree,
+};
+
 /*
- * initialise an inode cache slab element prior to any use
+ * Set up the filesystem mount context.
+ */
+static int afs_init_fs_context(struct fs_context *fc)
+{
+	struct afs_fs_context *ctx;
+	struct afs_cell *cell;
+
+	ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->type = AFSVL_ROVOL;
+	ctx->net = afs_net(fc->net_ns);
+
+	/* Default to the workstation cell. */
+	cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc);
+	if (IS_ERR(cell))
+		cell = NULL;
+	ctx->cell = cell;
+
+	fc->fs_private = ctx;
+	fc->ops = &afs_context_ops;
+	return 0;
+}
+
+/*
+ * Initialise an inode cache slab element prior to any use.  Note that
+ * afs_alloc_inode() *must* reset anything that could incorrectly leak from one
+ * inode to another.
  */
 static void afs_i_init_once(void *_vnode)
 {
 	struct afs_vnode *vnode = _vnode;
 
 	memset(vnode, 0, sizeof(*vnode));
-	inode_init_once(&vnode->vfs_inode);
-	init_waitqueue_head(&vnode->update_waitq);
-	mutex_init(&vnode->permits_lock);
-	mutex_init(&vnode->validate_lock);
-	spin_lock_init(&vnode->writeback_lock);
+	inode_init_once(&vnode->netfs.inode);
+	INIT_LIST_HEAD(&vnode->io_lock_waiters);
+	init_rwsem(&vnode->validate_lock);
+	spin_lock_init(&vnode->wb_lock);
 	spin_lock_init(&vnode->lock);
-	INIT_LIST_HEAD(&vnode->writebacks);
+	INIT_LIST_HEAD(&vnode->wb_keys);
 	INIT_LIST_HEAD(&vnode->pending_locks);
 	INIT_LIST_HEAD(&vnode->granted_locks);
 	INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
-	INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
+	INIT_LIST_HEAD(&vnode->cb_mmap_link);
+	seqlock_init(&vnode->cb_lock);
 }
 
 /*
@@ -481,29 +671,36 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 {
 	struct afs_vnode *vnode;
 
-	vnode = kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
+	vnode = alloc_inode_sb(sb, afs_inode_cachep, GFP_KERNEL);
 	if (!vnode)
 		return NULL;
 
 	atomic_inc(&afs_count_active_inodes);
 
+	/* Reset anything that shouldn't leak from one inode to the next. */
 	memset(&vnode->fid, 0, sizeof(vnode->fid));
 	memset(&vnode->status, 0, sizeof(vnode->status));
+	afs_vnode_set_cache(vnode, NULL);
 
 	vnode->volume		= NULL;
-	vnode->update_cnt	= 0;
+	vnode->lock_key		= NULL;
+	vnode->permit_cache	= NULL;
+	vnode->directory	= NULL;
+	vnode->directory_size	= 0;
+
 	vnode->flags		= 1 << AFS_VNODE_UNSET;
-	vnode->cb_promised	= false;
+	vnode->lock_state	= AFS_VNODE_LOCK_NONE;
+
+	init_rwsem(&vnode->rmdir_lock);
+	INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work);
 
-	_leave(" = %p", &vnode->vfs_inode);
-	return &vnode->vfs_inode;
+	_leave(" = %p", &vnode->netfs.inode);
+	return &vnode->netfs.inode;
 }
 
-static void afs_i_callback(struct rcu_head *head)
+static void afs_free_inode(struct inode *inode)
 {
-	struct inode *inode = container_of(head, struct inode, i_rcu);
-	struct afs_vnode *vnode = AFS_FS_I(inode);
-	kmem_cache_free(afs_inode_cachep, vnode);
+	kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode));
 }
 
 /*
@@ -513,45 +710,61 @@ static void afs_destroy_inode(struct inode *inode)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	_enter("%p{%x:%u}", inode, vnode->fid.vid, vnode->fid.vnode);
+	_enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode);
 
 	_debug("DESTROY INODE %p", inode);
 
-	ASSERTCMP(vnode->server, ==, NULL);
-
-	call_rcu(&inode->i_rcu, afs_i_callback);
 	atomic_dec(&afs_count_active_inodes);
 }
 
+static void afs_get_volume_status_success(struct afs_operation *op)
+{
+	struct afs_volume_status *vs = &op->volstatus.vs;
+	struct kstatfs *buf = op->volstatus.buf;
+
+	if (vs->max_quota == 0)
+		buf->f_blocks = vs->part_max_blocks;
+	else
+		buf->f_blocks = vs->max_quota;
+
+	if (buf->f_blocks > vs->blocks_in_use)
+		buf->f_bavail = buf->f_bfree =
+			buf->f_blocks - vs->blocks_in_use;
+}
+
+static const struct afs_operation_ops afs_get_volume_status_operation = {
+	.issue_afs_rpc	= afs_fs_get_volume_status,
+	.issue_yfs_rpc	= yfs_fs_get_volume_status,
+	.success	= afs_get_volume_status_success,
+};
+
 /*
  * return information about an AFS volume
  */
 static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct afs_volume_status vs;
-	struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
-	struct key *key;
-	int ret;
-
-	key = afs_request_key(vnode->volume->cell);
-	if (IS_ERR(key))
-		return PTR_ERR(key);
-
-	ret = afs_vnode_get_volume_status(vnode, key, &vs);
-	key_put(key);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
+	struct afs_super_info *as = AFS_FS_S(dentry->d_sb);
+	struct afs_operation *op;
+	struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 
 	buf->f_type	= dentry->d_sb->s_magic;
 	buf->f_bsize	= AFS_BLOCK_SIZE;
 	buf->f_namelen	= AFSNAMEMAX - 1;
 
-	if (vs.max_quota == 0)
-		buf->f_blocks = vs.part_max_blocks;
-	else
-		buf->f_blocks = vs.max_quota;
-	buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use;
-	return 0;
+	if (as->dyn_root) {
+		buf->f_blocks	= 1;
+		buf->f_bavail	= 0;
+		buf->f_bfree	= 0;
+		return 0;
+	}
+
+	op = afs_alloc_operation(NULL, as->volume);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+
+	afs_op_set_vnode(op, 0, vnode);
+	op->nr_files		= 1;
+	op->volstatus.buf	= buf;
+	op->ops			= &afs_get_volume_status_operation;
+	return afs_do_sync_operation(op);
 }
diff --git a/fs/afs/validation.c b/fs/afs/validation.c
new file mode 100644
index 000000000000..0ba8336c9025
--- /dev/null
+++ b/fs/afs/validation.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* vnode and volume validity verification.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include "internal.h"
+
+/*
+ * Data validation is managed through a number of mechanisms from the server:
+ *
+ *  (1) On first contact with a server (such as if it has just been rebooted),
+ *      the server sends us a CB.InitCallBackState* request.
+ *
+ *  (2) On a RW volume, in response to certain vnode (inode)-accessing RPC
+ *      calls, the server maintains a time-limited per-vnode promise that it
+ *      will send us a CB.CallBack request if a third party alters the vnodes
+ *      accessed.
+ *
+ *      Note that a vnode-level callbacks may also be sent for other reasons,
+ *      such as filelock release.
+ *
+ *  (3) On a RO (or Backup) volume, in response to certain vnode-accessing RPC
+ *      calls, each server maintains a time-limited per-volume promise that it
+ *      will send us a CB.CallBack request if the RO volume is updated to a
+ *      snapshot of the RW volume ("vos release").  This is an atomic event
+ *      that cuts over all instances of the RO volume across multiple servers
+ *      simultaneously.
+ *
+ *	Note that a volume-level callbacks may also be sent for other reasons,
+ *	such as the volumeserver taking over control of the volume from the
+ *	fileserver.
+ *
+ *	Note also that each server maintains an independent time limit on an
+ *	independent callback.
+ *
+ *  (4) Certain RPC calls include a volume information record "VolSync" in
+ *      their reply.  This contains a creation date for the volume that should
+ *      remain unchanged for a RW volume (but will be changed if the volume is
+ *      restored from backup) or will be bumped to the time of snapshotting
+ *      when a RO volume is released.
+ *
+ * In order to track this events, the following are provided:
+ *
+ *	->cb_v_break.  A counter of events that might mean that the contents of
+ *	a volume have been altered since we last checked a vnode.
+ *
+ *	->cb_v_check.  A counter of the number of events that we've sent a
+ *	query to the server for.  Everything's up to date if this equals
+ *	cb_v_break.
+ *
+ *	->cb_scrub.  A counter of the number of regression events for which we
+ *	have to completely wipe the cache.
+ *
+ *	->cb_ro_snapshot.  A counter of the number of times that we've
+ *      recognised that a RO volume has been updated.
+ *
+ *	->cb_break.  A counter of events that might mean that the contents of a
+ *      vnode have been altered.
+ *
+ *	->cb_expires_at.  The time at which the callback promise expires or
+ *      AFS_NO_CB_PROMISE if we have no promise.
+ *
+ * The way we manage things is:
+ *
+ *  (1) When a volume-level CB.CallBack occurs, we increment ->cb_v_break on
+ *      the volume and reset ->cb_expires_at (ie. set AFS_NO_CB_PROMISE) on the
+ *      volume and volume's server record.
+ *
+ *  (2) When a CB.InitCallBackState occurs, we treat this as a volume-level
+ *	callback break on all the volumes that have been using that volume
+ *	(ie. increment ->cb_v_break and reset ->cb_expires_at).
+ *
+ *  (3) When a vnode-level CB.CallBack occurs, we increment ->cb_break on the
+ *	vnode and reset its ->cb_expires_at.  If the vnode is mmapped, we also
+ *	dispatch a work item to unmap all PTEs to the vnode's pagecache to
+ *	force reentry to the filesystem for revalidation.
+ *
+ *  (4) When entering the filesystem, we call afs_validate() to check the
+ *	validity of a vnode.  This first checks to see if ->cb_v_check and
+ *	->cb_v_break match, and if they don't, we lock volume->cb_check_lock
+ *	exclusively and perform an FS.FetchStatus on the vnode.
+ *
+ *	After checking the volume, we check the vnode.  If there's a mismatch
+ *	between the volume counters and the vnode's mirrors of those counters,
+ *	we lock vnode->validate_lock and issue an FS.FetchStatus on the vnode.
+ *
+ *  (5) When the reply from FS.FetchStatus arrives, the VolSync record is
+ *      parsed:
+ *
+ *	(A) If the Creation timestamp has changed on a RW volume or regressed
+ *	    on a RO volume, we try to increment ->cb_scrub; if it advances on a
+ *	    RO volume, we assume "vos release" happened and try to increment
+ *	    ->cb_ro_snapshot.
+ *
+ *      (B) If the Update timestamp has regressed, we try to increment
+ *	    ->cb_scrub.
+ *
+ *      Note that in both of these cases, we only do the increment if we can
+ *      cmpxchg the value of the timestamp from the value we noted before the
+ *      op.  This tries to prevent parallel ops from fighting one another.
+ *
+ *	volume->cb_v_check is then set to ->cb_v_break.
+ *
+ *  (6) The AFSCallBack record included in the FS.FetchStatus reply is also
+ *	parsed and used to set the promise in ->cb_expires_at for the vnode,
+ *	the volume and the volume's server record.
+ *
+ *  (7) If ->cb_scrub is seen to have advanced, we invalidate the pagecache for
+ *      the vnode.
+ */
+
+/*
+ * Check the validity of a vnode/inode and its parent volume.
+ */
+bool afs_check_validity(const struct afs_vnode *vnode)
+{
+	const struct afs_volume *volume = vnode->volume;
+	enum afs_vnode_invalid_trace trace = afs_vnode_valid_trace;
+	time64_t cb_expires_at = atomic64_read(&vnode->cb_expires_at);
+	time64_t deadline = ktime_get_real_seconds() + 10;
+
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+		return true;
+
+	if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break))
+		trace = afs_vnode_invalid_trace_cb_v_break;
+	else if (cb_expires_at == AFS_NO_CB_PROMISE)
+		trace = afs_vnode_invalid_trace_no_cb_promise;
+	else if (cb_expires_at <= deadline)
+		trace = afs_vnode_invalid_trace_expired;
+	else if (volume->cb_expires_at <= deadline)
+		trace = afs_vnode_invalid_trace_vol_expired;
+	else if (vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot))
+		trace = afs_vnode_invalid_trace_cb_ro_snapshot;
+	else if (vnode->cb_scrub != atomic_read(&volume->cb_scrub))
+		trace = afs_vnode_invalid_trace_cb_scrub;
+	else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
+		trace = afs_vnode_invalid_trace_zap_data;
+	else
+		return true;
+	trace_afs_vnode_invalid(vnode, trace);
+	return false;
+}
+
+/*
+ * See if the server we've just talked to is currently excluded.
+ */
+static bool __afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume)
+{
+	const struct afs_server_entry *se;
+	const struct afs_server_list *slist;
+	bool is_excluded = true;
+	int i;
+
+	rcu_read_lock();
+
+	slist = rcu_dereference(volume->servers);
+	for (i = 0; i < slist->nr_servers; i++) {
+		se = &slist->servers[i];
+		if (op->server == se->server) {
+			is_excluded = test_bit(AFS_SE_EXCLUDED, &se->flags);
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	return is_excluded;
+}
+
+/*
+ * Update the volume's server list when the creation time changes and see if
+ * the server we've just talked to is currently excluded.
+ */
+static int afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume)
+{
+	int ret;
+
+	if (__afs_is_server_excluded(op, volume))
+		return 1;
+
+	set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+	ret = afs_check_volume_status(op->volume, op);
+	if (ret < 0)
+		return ret;
+
+	return __afs_is_server_excluded(op, volume);
+}
+
+/*
+ * Handle a change to the volume creation time in the VolSync record.
+ */
+static int afs_update_volume_creation_time(struct afs_operation *op, struct afs_volume *volume)
+{
+	unsigned int snap;
+	time64_t cur = volume->creation_time;
+	time64_t old = op->pre_volsync.creation;
+	time64_t new = op->volsync.creation;
+	int ret;
+
+	_enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new);
+
+	if (cur == TIME64_MIN) {
+		volume->creation_time = new;
+		return 0;
+	}
+
+	if (new == cur)
+		return 0;
+
+	/* Try to advance the creation timestamp from what we had before the
+	 * operation to what we got back from the server.  This should
+	 * hopefully ensure that in a race between multiple operations only one
+	 * of them will do this.
+	 */
+	if (cur != old)
+		return 0;
+
+	/* If the creation time changes in an unexpected way, we need to scrub
+	 * our caches.  For a RW vol, this will only change if the volume is
+	 * restored from a backup; for a RO/Backup vol, this will advance when
+	 * the volume is updated to a new snapshot (eg. "vos release").
+	 */
+	if (volume->type == AFSVL_RWVOL)
+		goto regressed;
+	if (volume->type == AFSVL_BACKVOL) {
+		if (new < old)
+			goto regressed;
+		goto advance;
+	}
+
+	/* We have an RO volume, we need to query the VL server and look at the
+	 * server flags to see if RW->RO replication is in progress.
+	 */
+	ret = afs_is_server_excluded(op, volume);
+	if (ret < 0)
+		return ret;
+	if (ret > 0) {
+		snap = atomic_read(&volume->cb_ro_snapshot);
+		trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_volume_excluded);
+		return ret;
+	}
+
+advance:
+	snap = atomic_inc_return(&volume->cb_ro_snapshot);
+	trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_for_vos_release);
+	volume->creation_time = new;
+	return 0;
+
+regressed:
+	atomic_inc(&volume->cb_scrub);
+	trace_afs_cb_v_break(volume->vid, 0, afs_cb_break_for_creation_regress);
+	volume->creation_time = new;
+	return 0;
+}
+
+/*
+ * Handle a change to the volume update time in the VolSync record.
+ */
+static void afs_update_volume_update_time(struct afs_operation *op, struct afs_volume *volume)
+{
+	enum afs_cb_break_reason reason = afs_cb_break_no_break;
+	time64_t cur = volume->update_time;
+	time64_t old = op->pre_volsync.update;
+	time64_t new = op->volsync.update;
+
+	_enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new);
+
+	if (cur == TIME64_MIN) {
+		volume->update_time = new;
+		return;
+	}
+
+	if (new == cur)
+		return;
+
+	/* If the volume update time changes in an unexpected way, we need to
+	 * scrub our caches.  For a RW vol, this will advance on every
+	 * modification op; for a RO/Backup vol, this will advance when the
+	 * volume is updated to a new snapshot (eg. "vos release").
+	 */
+	if (new < old)
+		reason = afs_cb_break_for_update_regress;
+
+	/* Try to advance the update timestamp from what we had before the
+	 * operation to what we got back from the server.  This should
+	 * hopefully ensure that in a race between multiple operations only one
+	 * of them will do this.
+	 */
+	if (cur == old) {
+		if (reason == afs_cb_break_for_update_regress) {
+			atomic_inc(&volume->cb_scrub);
+			trace_afs_cb_v_break(volume->vid, 0, reason);
+		}
+		volume->update_time = new;
+	}
+}
+
+static int afs_update_volume_times(struct afs_operation *op, struct afs_volume *volume)
+{
+	int ret = 0;
+
+	if (likely(op->volsync.creation == volume->creation_time &&
+		   op->volsync.update == volume->update_time))
+		return 0;
+
+	mutex_lock(&volume->volsync_lock);
+	if (op->volsync.creation != volume->creation_time) {
+		ret = afs_update_volume_creation_time(op, volume);
+		if (ret < 0)
+			goto out;
+	}
+	if (op->volsync.update != volume->update_time)
+		afs_update_volume_update_time(op, volume);
+out:
+	mutex_unlock(&volume->volsync_lock);
+	return ret;
+}
+
+/*
+ * Update the state of a volume, including recording the expiration time of the
+ * callback promise.  Returns 1 to redo the operation from the start.
+ */
+int afs_update_volume_state(struct afs_operation *op)
+{
+	struct afs_server_list *slist = op->server_list;
+	struct afs_server_entry *se = &slist->servers[op->server_index];
+	struct afs_callback *cb = &op->file[0].scb.callback;
+	struct afs_volume *volume = op->volume;
+	unsigned int cb_v_break = atomic_read(&volume->cb_v_break);
+	unsigned int cb_v_check = atomic_read(&volume->cb_v_check);
+	int ret;
+
+	_enter("%llx", op->volume->vid);
+
+	if (op->volsync.creation != TIME64_MIN || op->volsync.update != TIME64_MIN) {
+		ret = afs_update_volume_times(op, volume);
+		if (ret != 0) {
+			_leave(" = %d", ret);
+			return ret;
+		}
+	}
+
+	if (op->cb_v_break == cb_v_break &&
+	    (op->file[0].scb.have_cb || op->file[1].scb.have_cb)) {
+		time64_t expires_at = cb->expires_at;
+
+		if (!op->file[0].scb.have_cb)
+			expires_at = op->file[1].scb.callback.expires_at;
+
+		se->cb_expires_at = expires_at;
+		volume->cb_expires_at = expires_at;
+	}
+	if (cb_v_check < op->cb_v_break)
+		atomic_cmpxchg(&volume->cb_v_check, cb_v_check, op->cb_v_break);
+	return 0;
+}
+
+/*
+ * mark the data attached to an inode as obsolete due to a write on the server
+ * - might also want to ditch all the outstanding writes and dirty pages
+ */
+static void afs_zap_data(struct afs_vnode *vnode)
+{
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+	afs_invalidate_cache(vnode, 0);
+
+	/* nuke all the non-dirty pages that aren't locked, mapped or being
+	 * written back in a regular file and completely discard the pages in a
+	 * directory or symlink */
+	if (S_ISREG(vnode->netfs.inode.i_mode))
+		filemap_invalidate_inode(&vnode->netfs.inode, true, 0, LLONG_MAX);
+	else
+		filemap_invalidate_inode(&vnode->netfs.inode, false, 0, LLONG_MAX);
+}
+
+/*
+ * validate a vnode/inode
+ * - there are several things we need to check
+ *   - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
+ *     symlink)
+ *   - parent dir metadata changed (security changes)
+ *   - dentry data changed (write, truncate)
+ *   - dentry metadata changed (security changes)
+ */
+int afs_validate(struct afs_vnode *vnode, struct key *key)
+{
+	struct afs_volume *volume = vnode->volume;
+	unsigned int cb_ro_snapshot, cb_scrub;
+	time64_t deadline = ktime_get_real_seconds() + 10;
+	bool zap = false, locked_vol = false;
+	int ret;
+
+	_enter("{v={%llx:%llu} fl=%lx},%x",
+	       vnode->fid.vid, vnode->fid.vnode, vnode->flags,
+	       key_serial(key));
+
+	if (afs_check_validity(vnode))
+		return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
+
+	ret = down_write_killable(&vnode->validate_lock);
+	if (ret < 0)
+		goto error;
+
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+		ret = -ESTALE;
+		goto error_unlock;
+	}
+
+	/* Validate a volume after the v_break has changed or the volume
+	 * callback expired.  We only want to do this once per volume per
+	 * v_break change.  The actual work will be done when parsing the
+	 * status fetch reply.
+	 */
+	if (volume->cb_expires_at <= deadline ||
+	    atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break)) {
+		ret = mutex_lock_interruptible(&volume->cb_check_lock);
+		if (ret < 0)
+			goto error_unlock;
+		locked_vol = true;
+	}
+
+	cb_ro_snapshot = atomic_read(&volume->cb_ro_snapshot);
+	cb_scrub = atomic_read(&volume->cb_scrub);
+	if (vnode->cb_ro_snapshot != cb_ro_snapshot ||
+	    vnode->cb_scrub	  != cb_scrub)
+		unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false);
+
+	if (vnode->cb_ro_snapshot != cb_ro_snapshot ||
+	    vnode->cb_scrub	  != cb_scrub ||
+	    volume->cb_expires_at <= deadline ||
+	    atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
+	    atomic64_read(&vnode->cb_expires_at) <= deadline
+	    ) {
+		ret = afs_fetch_status(vnode, key, false, NULL);
+		if (ret < 0) {
+			if (ret == -ENOENT) {
+				set_bit(AFS_VNODE_DELETED, &vnode->flags);
+				ret = -ESTALE;
+			}
+			goto error_unlock;
+		}
+
+		_debug("new promise [fl=%lx]", vnode->flags);
+	}
+
+	/* We can drop the volume lock now as. */
+	if (locked_vol) {
+		mutex_unlock(&volume->cb_check_lock);
+		locked_vol = false;
+	}
+
+	cb_ro_snapshot = atomic_read(&volume->cb_ro_snapshot);
+	cb_scrub = atomic_read(&volume->cb_scrub);
+	_debug("vnode inval %x==%x %x==%x",
+	       vnode->cb_ro_snapshot, cb_ro_snapshot,
+	       vnode->cb_scrub, cb_scrub);
+	if (vnode->cb_scrub != cb_scrub)
+		zap = true;
+	vnode->cb_ro_snapshot = cb_ro_snapshot;
+	vnode->cb_scrub = cb_scrub;
+
+	/* if the vnode's data version number changed then its contents are
+	 * different */
+	zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+	if (zap)
+		afs_zap_data(vnode);
+	up_write(&vnode->validate_lock);
+	_leave(" = 0");
+	return 0;
+
+error_unlock:
+	if (locked_vol)
+		mutex_unlock(&volume->cb_check_lock);
+	up_write(&vnode->validate_lock);
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c
new file mode 100644
index 000000000000..fc9676abd252
--- /dev/null
+++ b/fs/afs/vl_alias.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS cell alias detection
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+
+/*
+ * Sample a volume.
+ */
+static struct afs_volume *afs_sample_volume(struct afs_cell *cell, struct key *key,
+					    const char *name, unsigned int namelen)
+{
+	struct afs_volume *volume;
+	struct afs_fs_context fc = {
+		.type		= 0, /* Explicitly leave it to the VLDB */
+		.volnamesz	= namelen,
+		.volname	= name,
+		.net		= cell->net,
+		.cell		= cell,
+		.key		= key, /* This might need to be something */
+	};
+
+	volume = afs_create_volume(&fc);
+	_leave(" = %p", volume);
+	return volume;
+}
+
+/*
+ * Compare the address lists of a pair of fileservers.
+ */
+static int afs_compare_fs_alists(const struct afs_server *server_a,
+				 const struct afs_server *server_b)
+{
+	const struct afs_addr_list *la, *lb;
+	int a = 0, b = 0, addr_matches = 0;
+
+	la = rcu_dereference(server_a->endpoint_state)->addresses;
+	lb = rcu_dereference(server_b->endpoint_state)->addresses;
+
+	while (a < la->nr_addrs && b < lb->nr_addrs) {
+		unsigned long pa = (unsigned long)la->addrs[a].peer;
+		unsigned long pb = (unsigned long)lb->addrs[b].peer;
+		long diff = pa - pb;
+
+		if (diff < 0) {
+			a++;
+		} else if (diff > 0) {
+			b++;
+		} else {
+			addr_matches++;
+			a++;
+			b++;
+		}
+	}
+
+	return addr_matches;
+}
+
+/*
+ * Compare the fileserver lists of two volumes.  The server lists are sorted in
+ * order of ascending UUID.
+ */
+static int afs_compare_volume_slists(const struct afs_volume *vol_a,
+				     const struct afs_volume *vol_b)
+{
+	const struct afs_server_list *la, *lb;
+	int i, a = 0, b = 0, uuid_matches = 0, addr_matches = 0;
+
+	la = rcu_dereference(vol_a->servers);
+	lb = rcu_dereference(vol_b->servers);
+
+	for (i = 0; i < AFS_MAXTYPES; i++)
+		if (vol_a->vids[i] != vol_b->vids[i])
+			return 0;
+
+	while (a < la->nr_servers && b < lb->nr_servers) {
+		const struct afs_server *server_a = la->servers[a].server;
+		const struct afs_server *server_b = lb->servers[b].server;
+		int diff = memcmp(&server_a->uuid, &server_b->uuid, sizeof(uuid_t));
+
+		if (diff < 0) {
+			a++;
+		} else if (diff > 0) {
+			b++;
+		} else {
+			uuid_matches++;
+			addr_matches += afs_compare_fs_alists(server_a, server_b);
+			a++;
+			b++;
+		}
+	}
+
+	_leave(" = %d [um %d]", addr_matches, uuid_matches);
+	return addr_matches;
+}
+
+/*
+ * Compare root.cell volumes.
+ */
+static int afs_compare_cell_roots(struct afs_cell *cell)
+{
+	struct afs_cell *p;
+
+	_enter("");
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(p, &cell->net->proc_cells, proc_link) {
+		if (p == cell || p->alias_of)
+			continue;
+		if (!p->root_volume)
+			continue; /* Ignore cells that don't have a root.cell volume. */
+
+		if (afs_compare_volume_slists(cell->root_volume, p->root_volume) != 0)
+			goto is_alias;
+	}
+
+	rcu_read_unlock();
+	_leave(" = 0");
+	return 0;
+
+is_alias:
+	rcu_read_unlock();
+	cell->alias_of = afs_use_cell(p, afs_cell_trace_use_alias);
+	return 1;
+}
+
+/*
+ * Query the new cell for a volume from a cell we're already using.
+ */
+static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key,
+				   struct afs_cell *p)
+{
+	struct afs_volume *volume, *pvol = NULL;
+	int ret;
+
+	/* Arbitrarily pick a volume from the list. */
+	read_seqlock_excl(&p->volume_lock);
+	if (!RB_EMPTY_ROOT(&p->volumes))
+		pvol = afs_get_volume(rb_entry(p->volumes.rb_node,
+					       struct afs_volume, cell_node),
+				      afs_volume_trace_get_query_alias);
+	read_sequnlock_excl(&p->volume_lock);
+	if (!pvol)
+		return 0;
+
+	_enter("%s:%s", cell->name, pvol->name);
+
+	/* And see if it's in the new cell. */
+	volume = afs_sample_volume(cell, key, pvol->name, pvol->name_len);
+	if (IS_ERR(volume)) {
+		afs_put_volume(pvol, afs_volume_trace_put_query_alias);
+		if (PTR_ERR(volume) != -ENOMEDIUM)
+			return PTR_ERR(volume);
+		/* That volume is not in the new cell, so not an alias */
+		return 0;
+	}
+
+	/* The new cell has a like-named volume also - compare volume ID,
+	 * server and address lists.
+	 */
+	ret = 0;
+	if (pvol->vid == volume->vid) {
+		rcu_read_lock();
+		if (afs_compare_volume_slists(volume, pvol))
+			ret = 1;
+		rcu_read_unlock();
+	}
+
+	afs_put_volume(volume, afs_volume_trace_put_query_alias);
+	afs_put_volume(pvol, afs_volume_trace_put_query_alias);
+	return ret;
+}
+
+/*
+ * Query the new cell for volumes we know exist in cells we're already using.
+ */
+static int afs_query_for_alias(struct afs_cell *cell, struct key *key)
+{
+	struct afs_cell *p;
+
+	_enter("%s", cell->name);
+
+	if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0)
+		return -ERESTARTSYS;
+
+	hlist_for_each_entry(p, &cell->net->proc_cells, proc_link) {
+		if (p == cell || p->alias_of)
+			continue;
+		if (RB_EMPTY_ROOT(&p->volumes))
+			continue;
+		if (p->root_volume)
+			continue; /* Ignore cells that have a root.cell volume. */
+		afs_use_cell(p, afs_cell_trace_use_check_alias);
+		mutex_unlock(&cell->net->proc_cells_lock);
+
+		if (afs_query_for_alias_one(cell, key, p) != 0)
+			goto is_alias;
+
+		if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) {
+			afs_unuse_cell(p, afs_cell_trace_unuse_check_alias);
+			return -ERESTARTSYS;
+		}
+
+		afs_unuse_cell(p, afs_cell_trace_unuse_check_alias);
+	}
+
+	mutex_unlock(&cell->net->proc_cells_lock);
+	_leave(" = 0");
+	return 0;
+
+is_alias:
+	cell->alias_of = p; /* Transfer our ref */
+	return 1;
+}
+
+/*
+ * Look up a VLDB record for a volume.
+ */
+static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key)
+{
+	struct afs_vl_cursor vc;
+	char *cell_name = ERR_PTR(-EDESTADDRREQ);
+	bool skipped = false, not_skipped = false;
+	int ret;
+
+	if (!afs_begin_vlserver_operation(&vc, cell, key))
+		return ERR_PTR(-ERESTARTSYS);
+
+	while (afs_select_vlserver(&vc)) {
+		if (!test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) {
+			vc.call_error = -EOPNOTSUPP;
+			skipped = true;
+			continue;
+		}
+		not_skipped = true;
+		cell_name = afs_yfsvl_get_cell_name(&vc);
+	}
+
+	ret = afs_end_vlserver_operation(&vc);
+	if (skipped && !not_skipped)
+		ret = -EOPNOTSUPP;
+	return ret < 0 ? ERR_PTR(ret) : cell_name;
+}
+
+static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key)
+{
+	struct afs_cell *master;
+	size_t name_len;
+	char *cell_name;
+
+	cell_name = afs_vl_get_cell_name(cell, key);
+	if (IS_ERR(cell_name))
+		return PTR_ERR(cell_name);
+
+	if (strcmp(cell_name, cell->name) == 0) {
+		kfree(cell_name);
+		return 0;
+	}
+
+	name_len = strlen(cell_name);
+	if (!name_len || name_len > AFS_MAXCELLNAME)
+		master = ERR_PTR(-EOPNOTSUPP);
+	else
+		master = afs_lookup_cell(cell->net, cell_name, name_len, NULL,
+					 AFS_LOOKUP_CELL_ALIAS_CHECK,
+					 afs_cell_trace_use_lookup_canonical);
+	kfree(cell_name);
+	if (IS_ERR(master))
+		return PTR_ERR(master);
+
+	cell->alias_of = master; /* Transfer our ref */
+	return 1;
+}
+
+static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key)
+{
+	struct afs_volume *root_volume;
+	int ret;
+
+	_enter("%s", cell->name);
+
+	ret = yfs_check_canonical_cell_name(cell, key);
+	if (ret != -EOPNOTSUPP)
+		return ret;
+
+	/* Try and get the root.cell volume for comparison with other cells */
+	root_volume = afs_sample_volume(cell, key, "root.cell", 9);
+	if (!IS_ERR(root_volume)) {
+		cell->root_volume = root_volume;
+		return afs_compare_cell_roots(cell);
+	}
+
+	if (PTR_ERR(root_volume) != -ENOMEDIUM)
+		return PTR_ERR(root_volume);
+
+	/* Okay, this cell doesn't have an root.cell volume.  We need to
+	 * locate some other random volume and use that to check.
+	 */
+	return afs_query_for_alias(cell, key);
+}
+
+/*
+ * Check to see if a new cell is an alias of a cell we already have.  At this
+ * point we have the cell's volume server list.
+ *
+ * Returns 0 if we didn't detect an alias, 1 if we found an alias and an error
+ * if we had problems gathering the data required.  In the case the we did
+ * detect an alias, cell->alias_of is set to point to the assumed master.
+ */
+int afs_cell_detect_alias(struct afs_cell *cell, struct key *key)
+{
+	struct afs_net *net = cell->net;
+	int ret;
+
+	if (mutex_lock_interruptible(&net->cells_alias_lock) < 0)
+		return -ERESTARTSYS;
+
+	if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &cell->flags)) {
+		ret = afs_do_cell_detect_alias(cell, key);
+		if (ret >= 0)
+			clear_bit_unlock(AFS_CELL_FL_CHECK_ALIAS, &cell->flags);
+	} else {
+		ret = cell->alias_of ? 1 : 0;
+	}
+
+	mutex_unlock(&net->cells_alias_lock);
+
+	if (ret == 1)
+		pr_notice("kAFS: Cell %s is an alias of %s\n",
+			  cell->name, cell->alias_of->name);
+	return ret;
+}
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
new file mode 100644
index 000000000000..9b1c20daac53
--- /dev/null
+++ b/fs/afs/vl_list.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS vlserver list management.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
+					unsigned short port)
+{
+	struct afs_vlserver *vlserver;
+	static atomic_t debug_ids;
+
+	vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
+			   GFP_KERNEL);
+	if (vlserver) {
+		refcount_set(&vlserver->ref, 1);
+		rwlock_init(&vlserver->lock);
+		init_waitqueue_head(&vlserver->probe_wq);
+		spin_lock_init(&vlserver->probe_lock);
+		vlserver->debug_id = atomic_inc_return(&debug_ids);
+		vlserver->rtt = UINT_MAX;
+		vlserver->name_len = name_len;
+		vlserver->service_id = VL_SERVICE;
+		vlserver->port = port;
+		memcpy(vlserver->name, name, name_len);
+	}
+	return vlserver;
+}
+
+static void afs_vlserver_rcu(struct rcu_head *rcu)
+{
+	struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
+
+	afs_put_addrlist(rcu_access_pointer(vlserver->addresses),
+			 afs_alist_trace_put_vlserver);
+	kfree_rcu(vlserver, rcu);
+}
+
+void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver)
+{
+	if (vlserver &&
+	    refcount_dec_and_test(&vlserver->ref))
+		call_rcu(&vlserver->rcu, afs_vlserver_rcu);
+}
+
+struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
+{
+	struct afs_vlserver_list *vllist;
+
+	vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL);
+	if (vllist) {
+		refcount_set(&vllist->ref, 1);
+		rwlock_init(&vllist->lock);
+	}
+
+	return vllist;
+}
+
+void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist)
+{
+	if (vllist) {
+		if (refcount_dec_and_test(&vllist->ref)) {
+			int i;
+
+			for (i = 0; i < vllist->nr_servers; i++) {
+				afs_put_vlserver(net, vllist->servers[i].server);
+			}
+			kfree_rcu(vllist, rcu);
+		}
+	}
+}
+
+static u16 afs_extract_le16(const u8 **_b)
+{
+	u16 val;
+
+	val  = (u16)*(*_b)++ << 0;
+	val |= (u16)*(*_b)++ << 8;
+	return val;
+}
+
+/*
+ * Build a VL server address list from a DNS queried server list.
+ */
+static struct afs_addr_list *afs_extract_vl_addrs(struct afs_net *net,
+						  const u8 **_b, const u8 *end,
+						  u8 nr_addrs, u16 port)
+{
+	struct afs_addr_list *alist;
+	const u8 *b = *_b;
+	int ret = -EINVAL;
+
+	alist = afs_alloc_addrlist(nr_addrs);
+	if (!alist)
+		return ERR_PTR(-ENOMEM);
+	if (nr_addrs == 0)
+		return alist;
+
+	for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) {
+		struct dns_server_list_v1_address hdr;
+		__be32 x[4];
+
+		hdr.address_type = *b++;
+
+		switch (hdr.address_type) {
+		case DNS_ADDRESS_IS_IPV4:
+			if (end - b < 4) {
+				_leave(" = -EINVAL [short inet]");
+				goto error;
+			}
+			memcpy(x, b, 4);
+			ret = afs_merge_fs_addr4(net, alist, x[0], port);
+			if (ret < 0)
+				goto error;
+			b += 4;
+			break;
+
+		case DNS_ADDRESS_IS_IPV6:
+			if (end - b < 16) {
+				_leave(" = -EINVAL [short inet6]");
+				goto error;
+			}
+			memcpy(x, b, 16);
+			ret = afs_merge_fs_addr6(net, alist, x, port);
+			if (ret < 0)
+				goto error;
+			b += 16;
+			break;
+
+		default:
+			_leave(" = -EADDRNOTAVAIL [unknown af %u]",
+			       hdr.address_type);
+			ret = -EADDRNOTAVAIL;
+			goto error;
+		}
+	}
+
+	/* Start with IPv6 if available. */
+	if (alist->nr_ipv4 < alist->nr_addrs)
+		alist->preferred = alist->nr_ipv4;
+
+	*_b = b;
+	return alist;
+
+error:
+	*_b = b;
+	afs_put_addrlist(alist, afs_alist_trace_put_parse_error);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Build a VL server list from a DNS queried server list.
+ */
+struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
+						    const void *buffer,
+						    size_t buffer_size)
+{
+	const struct dns_server_list_v1_header *hdr = buffer;
+	struct dns_server_list_v1_server bs;
+	struct afs_vlserver_list *vllist, *previous;
+	struct afs_addr_list *addrs;
+	struct afs_vlserver *server;
+	const u8 *b = buffer, *end = buffer + buffer_size;
+	int ret = -ENOMEM, nr_servers, i, j;
+
+	_enter("");
+
+	/* Check that it's a server list, v1 */
+	if (end - b < sizeof(*hdr) ||
+	    hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST ||
+	    hdr->hdr.version != 1) {
+		pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n",
+			  hdr->hdr.content, hdr->hdr.version, end - b);
+		ret = -EDESTADDRREQ;
+		goto dump;
+	}
+
+	nr_servers = hdr->nr_servers;
+
+	vllist = afs_alloc_vlserver_list(nr_servers);
+	if (!vllist)
+		return ERR_PTR(-ENOMEM);
+
+	vllist->source = (hdr->source < NR__dns_record_source) ?
+		hdr->source : NR__dns_record_source;
+	vllist->status = (hdr->status < NR__dns_lookup_status) ?
+		hdr->status : NR__dns_lookup_status;
+
+	read_lock(&cell->vl_servers_lock);
+	previous = afs_get_vlserverlist(
+		rcu_dereference_protected(cell->vl_servers,
+					  lockdep_is_held(&cell->vl_servers_lock)));
+	read_unlock(&cell->vl_servers_lock);
+
+	b += sizeof(*hdr);
+	while (end - b >= sizeof(bs)) {
+		bs.name_len	= afs_extract_le16(&b);
+		bs.priority	= afs_extract_le16(&b);
+		bs.weight	= afs_extract_le16(&b);
+		bs.port		= afs_extract_le16(&b);
+		bs.source	= *b++;
+		bs.status	= *b++;
+		bs.protocol	= *b++;
+		bs.nr_addrs	= *b++;
+
+		_debug("extract %u %u %u %u %u %u %*.*s",
+		       bs.name_len, bs.priority, bs.weight,
+		       bs.port, bs.protocol, bs.nr_addrs,
+		       bs.name_len, bs.name_len, b);
+
+		if (end - b < bs.name_len)
+			break;
+
+		ret = -EPROTONOSUPPORT;
+		if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) {
+			bs.protocol = DNS_SERVER_PROTOCOL_UDP;
+		} else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) {
+			_leave(" = [proto %u]", bs.protocol);
+			goto error;
+		}
+
+		if (bs.port == 0)
+			bs.port = AFS_VL_PORT;
+		if (bs.source > NR__dns_record_source)
+			bs.source = NR__dns_record_source;
+		if (bs.status > NR__dns_lookup_status)
+			bs.status = NR__dns_lookup_status;
+
+		/* See if we can update an old server record */
+		server = NULL;
+		for (i = 0; i < previous->nr_servers; i++) {
+			struct afs_vlserver *p = previous->servers[i].server;
+
+			if (p->name_len == bs.name_len &&
+			    p->port == bs.port &&
+			    strncasecmp(b, p->name, bs.name_len) == 0) {
+				server = afs_get_vlserver(p);
+				break;
+			}
+		}
+
+		if (!server) {
+			ret = -ENOMEM;
+			server = afs_alloc_vlserver(b, bs.name_len, bs.port);
+			if (!server)
+				goto error;
+		}
+
+		b += bs.name_len;
+
+		/* Extract the addresses - note that we can't skip this as we
+		 * have to advance the payload pointer.
+		 */
+		addrs = afs_extract_vl_addrs(cell->net, &b, end, bs.nr_addrs, bs.port);
+		if (IS_ERR(addrs)) {
+			ret = PTR_ERR(addrs);
+			goto error_2;
+		}
+
+		if (vllist->nr_servers >= nr_servers) {
+			_debug("skip %u >= %u", vllist->nr_servers, nr_servers);
+			afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty);
+			afs_put_vlserver(cell->net, server);
+			continue;
+		}
+
+		addrs->source = bs.source;
+		addrs->status = bs.status;
+
+		if (addrs->nr_addrs == 0) {
+			afs_put_addrlist(addrs, afs_alist_trace_put_parse_empty);
+			if (!rcu_access_pointer(server->addresses)) {
+				afs_put_vlserver(cell->net, server);
+				continue;
+			}
+		} else {
+			struct afs_addr_list *old = addrs;
+
+			write_lock(&server->lock);
+			old = rcu_replace_pointer(server->addresses, old,
+						  lockdep_is_held(&server->lock));
+			write_unlock(&server->lock);
+			afs_put_addrlist(old, afs_alist_trace_put_vlserver_old);
+		}
+
+
+		/* TODO: Might want to check for duplicates */
+
+		/* Insertion-sort by priority and weight */
+		for (j = 0; j < vllist->nr_servers; j++) {
+			if (bs.priority < vllist->servers[j].priority)
+				break; /* Lower preferable */
+			if (bs.priority == vllist->servers[j].priority &&
+			    bs.weight > vllist->servers[j].weight)
+				break; /* Higher preferable */
+		}
+
+		if (j < vllist->nr_servers) {
+			memmove(vllist->servers + j + 1,
+				vllist->servers + j,
+				(vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
+		}
+
+		clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+
+		vllist->servers[j].priority = bs.priority;
+		vllist->servers[j].weight = bs.weight;
+		vllist->servers[j].server = server;
+		vllist->nr_servers++;
+	}
+
+	if (b != end) {
+		_debug("parse error %zd", b - end);
+		goto error;
+	}
+
+	afs_put_vlserverlist(cell->net, previous);
+	_leave(" = ok [%u]", vllist->nr_servers);
+	return vllist;
+
+error_2:
+	afs_put_vlserver(cell->net, server);
+error:
+	afs_put_vlserverlist(cell->net, vllist);
+	afs_put_vlserverlist(cell->net, previous);
+dump:
+	if (ret != -ENOMEM) {
+		printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer);
+		print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size);
+	}
+	return ERR_PTR(ret);
+}
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
new file mode 100644
index 000000000000..3d2e0c925460
--- /dev/null
+++ b/fs/afs/vl_probe.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AFS vlserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+
+/*
+ * Handle the completion of a set of probes.
+ */
+static void afs_finished_vl_probe(struct afs_vlserver *server)
+{
+	if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) {
+		server->rtt = UINT_MAX;
+		clear_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags);
+	}
+
+	clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags);
+	wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING);
+}
+
+/*
+ * Handle the completion of a probe RPC call.
+ */
+static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up)
+{
+	if (atomic_dec_and_test(&server->probe_outstanding)) {
+		afs_finished_vl_probe(server);
+		wake_up = true;
+	}
+
+	if (wake_up)
+		wake_up_all(&server->probe_wq);
+}
+
+/*
+ * Process the result of probing a vlserver.  This is called after successful
+ * or failed delivery of an VL.GetCapabilities operation.
+ */
+void afs_vlserver_probe_result(struct afs_call *call)
+{
+	struct afs_addr_list *alist = call->vl_probe;
+	struct afs_vlserver *server = call->vlserver;
+	struct afs_address *addr = &alist->addrs[call->probe_index];
+	unsigned int server_index = call->server_index;
+	unsigned int rtt_us = 0;
+	unsigned int index = call->probe_index;
+	bool have_result = false;
+	int ret = call->error;
+
+	_enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code);
+
+	spin_lock(&server->probe_lock);
+
+	switch (ret) {
+	case 0:
+		server->probe.error = 0;
+		goto responded;
+	case -ECONNABORTED:
+		if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) {
+			server->probe.abort_code = call->abort_code;
+			server->probe.error = ret;
+		}
+		goto responded;
+	case -ENOMEM:
+	case -ENONET:
+	case -EKEYEXPIRED:
+	case -EKEYREVOKED:
+	case -EKEYREJECTED:
+		server->probe.flags |= AFS_VLSERVER_PROBE_LOCAL_FAILURE;
+		if (server->probe.error == 0)
+			server->probe.error = ret;
+		trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail);
+		goto out;
+	case -ECONNRESET: /* Responded, but call expired. */
+	case -ERFKILL:
+	case -EADDRNOTAVAIL:
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -EHOSTDOWN:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -ETIME:
+	default:
+		clear_bit(index, &alist->responded);
+		set_bit(index, &alist->probe_failed);
+		if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) &&
+		    (server->probe.error == 0 ||
+		     server->probe.error == -ETIMEDOUT ||
+		     server->probe.error == -ETIME))
+			server->probe.error = ret;
+		trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail);
+		goto out;
+	}
+
+responded:
+	set_bit(index, &alist->responded);
+	clear_bit(index, &alist->probe_failed);
+
+	if (call->service_id == YFS_VL_SERVICE) {
+		server->probe.flags |= AFS_VLSERVER_PROBE_IS_YFS;
+		set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+		server->service_id = call->service_id;
+	} else {
+		server->probe.flags |= AFS_VLSERVER_PROBE_NOT_YFS;
+		if (!(server->probe.flags & AFS_VLSERVER_PROBE_IS_YFS)) {
+			clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+			server->service_id = call->service_id;
+		}
+	}
+
+	rtt_us = rxrpc_kernel_get_srtt(addr->peer);
+	if (rtt_us < server->probe.rtt) {
+		server->probe.rtt = rtt_us;
+		server->rtt = rtt_us;
+		alist->preferred = index;
+	}
+
+	smp_wmb(); /* Set rtt before responded. */
+	server->probe.flags |= AFS_VLSERVER_PROBE_RESPONDED;
+	set_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+	set_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags);
+	have_result = true;
+out:
+	spin_unlock(&server->probe_lock);
+
+	trace_afs_vl_probe(server, false, alist, index, call->error, call->abort_code, rtt_us);
+	_debug("probe [%u][%u] %pISpc rtt=%d ret=%d",
+	       server_index, index, rxrpc_kernel_remote_addr(addr->peer),
+	       rtt_us, ret);
+
+	afs_done_one_vl_probe(server, have_result);
+}
+
+/*
+ * Probe all of a vlserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static bool afs_do_probe_vlserver(struct afs_net *net,
+				  struct afs_vlserver *server,
+				  struct key *key,
+				  unsigned int server_index,
+				  struct afs_error *_e)
+{
+	struct afs_addr_list *alist;
+	struct afs_call *call;
+	unsigned long unprobed;
+	unsigned int index, i;
+	bool in_progress = false;
+	int best_prio;
+
+	_enter("%s", server->name);
+
+	read_lock(&server->lock);
+	alist = rcu_dereference_protected(server->addresses,
+					  lockdep_is_held(&server->lock));
+	afs_get_addrlist(alist, afs_alist_trace_get_vlprobe);
+	read_unlock(&server->lock);
+
+	atomic_set(&server->probe_outstanding, alist->nr_addrs);
+	memset(&server->probe, 0, sizeof(server->probe));
+	server->probe.rtt = UINT_MAX;
+
+	unprobed = (1UL << alist->nr_addrs) - 1;
+	while (unprobed) {
+		best_prio = -1;
+		index = 0;
+		for (i = 0; i < alist->nr_addrs; i++) {
+			if (test_bit(i, &unprobed) &&
+			    alist->addrs[i].prio > best_prio) {
+				index = i;
+				best_prio = alist->addrs[i].prio;
+			}
+		}
+		__clear_bit(index, &unprobed);
+
+		trace_afs_vl_probe(server, true, alist, index, 0, 0, 0);
+		call = afs_vl_get_capabilities(net, alist, index, key, server,
+					       server_index);
+		if (!IS_ERR(call)) {
+			afs_prioritise_error(_e, call->error, call->abort_code);
+			afs_put_call(call);
+			in_progress = true;
+		} else {
+			afs_prioritise_error(_e, PTR_ERR(call), 0);
+			afs_done_one_vl_probe(server, false);
+		}
+	}
+
+	afs_put_addrlist(alist, afs_alist_trace_put_vlprobe);
+	return in_progress;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_send_vl_probes(struct afs_net *net, struct key *key,
+		       struct afs_vlserver_list *vllist)
+{
+	struct afs_vlserver *server;
+	struct afs_error e = {};
+	bool in_progress = false;
+	int i;
+
+	for (i = 0; i < vllist->nr_servers; i++) {
+		server = vllist->servers[i].server;
+		if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
+			continue;
+
+		if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags) &&
+		    afs_do_probe_vlserver(net, server, key, i, &e))
+			in_progress = true;
+	}
+
+	return in_progress ? 0 : e.error;
+}
+
+/*
+ * Wait for the first as-yet untried server to respond.
+ */
+int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
+			   unsigned long untried)
+{
+	struct wait_queue_entry *waits;
+	struct afs_vlserver *server;
+	unsigned int rtt = UINT_MAX, rtt_s;
+	bool have_responders = false;
+	int pref = -1, i;
+
+	_enter("%u,%lx", vllist->nr_servers, untried);
+
+	/* Only wait for servers that have a probe outstanding. */
+	for (i = 0; i < vllist->nr_servers; i++) {
+		if (test_bit(i, &untried)) {
+			server = vllist->servers[i].server;
+			if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+				__clear_bit(i, &untried);
+			if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)
+				have_responders = true;
+		}
+	}
+	if (have_responders || !untried)
+		return 0;
+
+	waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+	if (!waits)
+		return -ENOMEM;
+
+	for (i = 0; i < vllist->nr_servers; i++) {
+		if (test_bit(i, &untried)) {
+			server = vllist->servers[i].server;
+			init_waitqueue_entry(&waits[i], current);
+			add_wait_queue(&server->probe_wq, &waits[i]);
+		}
+	}
+
+	for (;;) {
+		bool still_probing = false;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		for (i = 0; i < vllist->nr_servers; i++) {
+			if (test_bit(i, &untried)) {
+				server = vllist->servers[i].server;
+				if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)
+					goto stop;
+				if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+					still_probing = true;
+			}
+		}
+
+		if (!still_probing || signal_pending(current))
+			goto stop;
+		schedule();
+	}
+
+stop:
+	set_current_state(TASK_RUNNING);
+
+	for (i = 0; i < vllist->nr_servers; i++) {
+		if (test_bit(i, &untried)) {
+			server = vllist->servers[i].server;
+			rtt_s = READ_ONCE(server->rtt);
+			if (test_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags) &&
+			    rtt_s < rtt) {
+				pref = i;
+				rtt = rtt_s;
+			}
+
+			remove_wait_queue(&server->probe_wq, &waits[i]);
+		}
+	}
+
+	kfree(waits);
+
+	if (pref == -1 && signal_pending(current))
+		return -ERESTARTSYS;
+
+	if (pref >= 0)
+		vllist->preferred = pref;
+
+	_leave(" = 0 [%u]", pref);
+	return 0;
+}
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
new file mode 100644
index 000000000000..6ad9688d8f4b
--- /dev/null
+++ b/fs/afs/vl_rotate.c
@@ -0,0 +1,399 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Handle vlserver selection and rotation.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include "internal.h"
+#include "afs_vl.h"
+
+/*
+ * Begin an operation on a volume location server.
+ */
+bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
+				  struct key *key)
+{
+	static atomic_t debug_ids;
+
+	memset(vc, 0, sizeof(*vc));
+	vc->cell = cell;
+	vc->key = key;
+	vc->cumul_error.error = -EDESTADDRREQ;
+	vc->nr_iterations = -1;
+
+	if (signal_pending(current)) {
+		vc->cumul_error.error = -EINTR;
+		vc->flags |= AFS_VL_CURSOR_STOP;
+		return false;
+	}
+
+	vc->debug_id = atomic_inc_return(&debug_ids);
+	return true;
+}
+
+/*
+ * Begin iteration through a server list, starting with the last used server if
+ * possible, or the last recorded good server if not.
+ */
+static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
+{
+	struct afs_cell *cell = vc->cell;
+	unsigned int dns_lookup_count;
+
+	if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
+	    cell->dns_expiry <= ktime_get_real_seconds()) {
+		dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
+		set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
+		afs_queue_cell(cell, afs_cell_trace_queue_dns);
+
+		if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
+			if (wait_var_event_interruptible(
+				    &cell->dns_lookup_count,
+				    smp_load_acquire(&cell->dns_lookup_count)
+				    != dns_lookup_count) < 0) {
+				vc->cumul_error.error = -ERESTARTSYS;
+				return false;
+			}
+		}
+
+		/* Status load is ordered after lookup counter load */
+		if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
+			pr_warn("No record of cell %s\n", cell->name);
+			vc->cumul_error.error = -ENOENT;
+			return false;
+		}
+
+		if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
+			vc->cumul_error.error = -EDESTADDRREQ;
+			return false;
+		}
+	}
+
+	read_lock(&cell->vl_servers_lock);
+	vc->server_list = afs_get_vlserverlist(
+		rcu_dereference_protected(cell->vl_servers,
+					  lockdep_is_held(&cell->vl_servers_lock)));
+	read_unlock(&cell->vl_servers_lock);
+	if (!vc->server_list->nr_servers)
+		return false;
+
+	vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1;
+	vc->server_index = -1;
+	return true;
+}
+
+/*
+ * Select the vlserver to use.  May be called multiple times to rotate
+ * through the vlservers.
+ */
+bool afs_select_vlserver(struct afs_vl_cursor *vc)
+{
+	struct afs_addr_list *alist = vc->alist;
+	struct afs_vlserver *vlserver;
+	unsigned long set, failed;
+	unsigned int rtt;
+	s32 abort_code = vc->call_abort_code;
+	int error = vc->call_error, i;
+
+	vc->nr_iterations++;
+
+	_enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d",
+	       vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers,
+	       vc->addr_index, vc->addr_tried,
+	       error, abort_code);
+
+	if (vc->flags & AFS_VL_CURSOR_STOP) {
+		_leave(" = f [stopped]");
+		return false;
+	}
+
+	if (vc->nr_iterations == 0)
+		goto start;
+
+	WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error);
+
+	/* Evaluate the result of the previous operation, if there was one. */
+	switch (error) {
+	default:
+	case 0:
+		/* Success or local failure.  Stop. */
+		vc->cumul_error.error = error;
+		vc->flags |= AFS_VL_CURSOR_STOP;
+		_leave(" = f [okay/local %d]", vc->cumul_error.error);
+		return false;
+
+	case -ECONNABORTED:
+		/* The far side rejected the operation on some grounds.  This
+		 * might involve the server being busy or the volume having been moved.
+		 */
+		switch (abort_code) {
+		case AFSVL_IO:
+		case AFSVL_BADVOLOPER:
+		case AFSVL_NOMEM:
+			/* The server went weird. */
+			afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code);
+			//write_lock(&vc->cell->vl_servers_lock);
+			//vc->server_list->weird_mask |= 1 << vc->server_index;
+			//write_unlock(&vc->cell->vl_servers_lock);
+			goto next_server;
+
+		default:
+			afs_prioritise_error(&vc->cumul_error, error, abort_code);
+			goto failed;
+		}
+
+	case -ERFKILL:
+	case -EADDRNOTAVAIL:
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -EHOSTDOWN:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -ETIME:
+		_debug("no conn %d", error);
+		afs_prioritise_error(&vc->cumul_error, error, 0);
+		goto iterate_address;
+
+	case -ECONNRESET:
+		_debug("call reset");
+		afs_prioritise_error(&vc->cumul_error, error, 0);
+		vc->flags |= AFS_VL_CURSOR_RETRY;
+		goto next_server;
+
+	case -EOPNOTSUPP:
+		_debug("notsupp");
+		goto next_server;
+	}
+
+restart_from_beginning:
+	_debug("restart");
+	if (vc->call_responded &&
+	    vc->addr_index != vc->alist->preferred &&
+	    test_bit(alist->preferred, &vc->addr_tried))
+		WRITE_ONCE(alist->preferred, vc->addr_index);
+	afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart);
+	alist = vc->alist = NULL;
+
+	afs_put_vlserverlist(vc->cell->net, vc->server_list);
+	vc->server_list = NULL;
+	if (vc->flags & AFS_VL_CURSOR_RETRIED)
+		goto failed;
+	vc->flags |= AFS_VL_CURSOR_RETRIED;
+start:
+	_debug("start");
+	ASSERTCMP(alist, ==, NULL);
+
+	if (!afs_start_vl_iteration(vc))
+		goto failed;
+
+	error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
+	if (error < 0) {
+		afs_prioritise_error(&vc->cumul_error, error, 0);
+		goto failed;
+	}
+
+pick_server:
+	_debug("pick [%lx]", vc->untried_servers);
+	ASSERTCMP(alist, ==, NULL);
+
+	error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers);
+	if (error < 0) {
+		afs_prioritise_error(&vc->cumul_error, error, 0);
+		goto failed;
+	}
+
+	/* Pick the untried server with the lowest RTT. */
+	vc->server_index = vc->server_list->preferred;
+	if (test_bit(vc->server_index, &vc->untried_servers))
+		goto selected_server;
+
+	vc->server_index = -1;
+	rtt = UINT_MAX;
+	for (i = 0; i < vc->server_list->nr_servers; i++) {
+		struct afs_vlserver *s = vc->server_list->servers[i].server;
+
+		if (!test_bit(i, &vc->untried_servers) ||
+		    !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
+			continue;
+		if (s->probe.rtt <= rtt) {
+			vc->server_index = i;
+			rtt = s->probe.rtt;
+		}
+	}
+
+	if (vc->server_index == -1)
+		goto no_more_servers;
+
+selected_server:
+	_debug("use %d", vc->server_index);
+	__clear_bit(vc->server_index, &vc->untried_servers);
+
+	/* We're starting on a different vlserver from the list.  We need to
+	 * check it, find its address list and probe its capabilities before we
+	 * use it.
+	 */
+	vlserver = vc->server_list->servers[vc->server_index].server;
+	vc->server = vlserver;
+
+	_debug("USING VLSERVER: %s", vlserver->name);
+
+	read_lock(&vlserver->lock);
+	alist = rcu_dereference_protected(vlserver->addresses,
+					  lockdep_is_held(&vlserver->lock));
+	vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set);
+	read_unlock(&vlserver->lock);
+
+	vc->addr_tried = 0;
+	vc->addr_index = -1;
+
+iterate_address:
+	/* Iterate over the current server's address list to try and find an
+	 * address on which it will respond to us.
+	 */
+	set = READ_ONCE(alist->responded);
+	failed = READ_ONCE(alist->probe_failed);
+	vc->addr_index = READ_ONCE(alist->preferred);
+
+	_debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index);
+
+	set &= ~(failed | vc->addr_tried);
+
+	if (!set)
+		goto next_server;
+
+	if (!test_bit(vc->addr_index, &set))
+		vc->addr_index = __ffs(set);
+
+	set_bit(vc->addr_index, &vc->addr_tried);
+	vc->alist = alist;
+
+	_debug("VL address %d/%d", vc->addr_index, alist->nr_addrs);
+
+	vc->call_responded = false;
+	_leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer));
+	return true;
+
+next_server:
+	_debug("next");
+	ASSERT(alist);
+	if (vc->call_responded &&
+	    vc->addr_index != alist->preferred &&
+	    test_bit(alist->preferred, &vc->addr_tried))
+		WRITE_ONCE(alist->preferred, vc->addr_index);
+	afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next);
+	alist = vc->alist = NULL;
+	goto pick_server;
+
+no_more_servers:
+	/* That's all the servers poked to no good effect.  Try again if some
+	 * of them were busy.
+	 */
+	if (vc->flags & AFS_VL_CURSOR_RETRY)
+		goto restart_from_beginning;
+
+	for (i = 0; i < vc->server_list->nr_servers; i++) {
+		struct afs_vlserver *s = vc->server_list->servers[i].server;
+
+		if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
+			vc->cumul_error.responded = true;
+		afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error),
+				     s->probe.abort_code);
+	}
+
+failed:
+	if (alist) {
+		if (vc->call_responded &&
+		    vc->addr_index != alist->preferred &&
+		    test_bit(alist->preferred, &vc->addr_tried))
+			WRITE_ONCE(alist->preferred, vc->addr_index);
+		afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail);
+		alist = vc->alist = NULL;
+	}
+	vc->flags |= AFS_VL_CURSOR_STOP;
+	_leave(" = f [failed %d]", vc->cumul_error.error);
+	return false;
+}
+
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
+{
+	struct afs_cell *cell = vc->cell;
+	static int count;
+	int i;
+
+	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+		return;
+	count++;
+
+	rcu_read_lock();
+	pr_notice("EDESTADDR occurred\n");
+	pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
+	pr_notice("DNS: src=%u st=%u lc=%x\n",
+		  cell->dns_source, cell->dns_status, cell->dns_lookup_count);
+	pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
+		  vc->untried_servers, vc->server_index, vc->nr_iterations,
+		  vc->flags, vc->cumul_error.error);
+	pr_notice("VC: call  er=%d ac=%d r=%u\n",
+		  vc->call_error, vc->call_abort_code, vc->call_responded);
+
+	if (vc->server_list) {
+		const struct afs_vlserver_list *sl = vc->server_list;
+		pr_notice("VC: SL nr=%u ix=%u\n",
+			  sl->nr_servers, sl->index);
+		for (i = 0; i < sl->nr_servers; i++) {
+			const struct afs_vlserver *s = sl->servers[i].server;
+			pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
+				  s->name, s->port, s->flags, s->probe.error);
+			if (s->addresses) {
+				const struct afs_addr_list *a =
+					rcu_dereference(s->addresses);
+				pr_notice("VC:  - nr=%u/%u/%u pf=%u\n",
+					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
+					  a->preferred);
+				pr_notice("VC:  - R=%lx F=%lx\n",
+					  a->responded, a->probe_failed);
+				if (a == vc->alist)
+					pr_notice("VC:  - current\n");
+			}
+		}
+	}
+
+	pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index);
+	rcu_read_unlock();
+}
+
+/*
+ * Tidy up a volume location server cursor and unlock the vnode.
+ */
+int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
+{
+	struct afs_net *net = vc->cell->net;
+
+	_enter("VC=%x+%x", vc->debug_id, vc->nr_iterations);
+
+	switch (vc->cumul_error.error) {
+	case -EDESTADDRREQ:
+	case -EADDRNOTAVAIL:
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+		afs_vl_dump_edestaddrreq(vc);
+		break;
+	}
+
+	if (vc->alist) {
+		if (vc->call_responded &&
+		    vc->addr_index != vc->alist->preferred &&
+		    test_bit(vc->alist->preferred, &vc->addr_tried))
+			WRITE_ONCE(vc->alist->preferred, vc->addr_index);
+		afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end);
+		vc->alist = NULL;
+	}
+	afs_put_vlserverlist(net, vc->server_list);
+	return vc->cumul_error.error;
+}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 340afd0cd182..3a23c0b08eb6 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -1,219 +1,793 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS Volume Location Service client
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include "afs_fs.h"
 #include "internal.h"
 
 /*
- * map volume locator abort codes to error codes
+ * Deliver reply data to a VL.GetEntryByNameU call.
  */
-static int afs_vl_abort_to_error(u32 abort_code)
+static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
 {
-	_enter("%u", abort_code);
-
-	switch (abort_code) {
-	case AFSVL_IDEXIST:		return -EEXIST;
-	case AFSVL_IO:			return -EREMOTEIO;
-	case AFSVL_NAMEEXIST:		return -EEXIST;
-	case AFSVL_CREATEFAIL:		return -EREMOTEIO;
-	case AFSVL_NOENT:		return -ENOMEDIUM;
-	case AFSVL_EMPTY:		return -ENOMEDIUM;
-	case AFSVL_ENTDELETED:		return -ENOMEDIUM;
-	case AFSVL_BADNAME:		return -EINVAL;
-	case AFSVL_BADINDEX:		return -EINVAL;
-	case AFSVL_BADVOLTYPE:		return -EINVAL;
-	case AFSVL_BADSERVER:		return -EINVAL;
-	case AFSVL_BADPARTITION:	return -EINVAL;
-	case AFSVL_REPSFULL:		return -EFBIG;
-	case AFSVL_NOREPSERVER:		return -ENOENT;
-	case AFSVL_DUPREPSERVER:	return -EEXIST;
-	case AFSVL_RWNOTFOUND:		return -ENOENT;
-	case AFSVL_BADREFCOUNT:		return -EINVAL;
-	case AFSVL_SIZEEXCEEDED:	return -EINVAL;
-	case AFSVL_BADENTRY:		return -EINVAL;
-	case AFSVL_BADVOLIDBUMP:	return -EINVAL;
-	case AFSVL_IDALREADYHASHED:	return -EINVAL;
-	case AFSVL_ENTRYLOCKED:		return -EBUSY;
-	case AFSVL_BADVOLOPER:		return -EBADRQC;
-	case AFSVL_BADRELLOCKTYPE:	return -EINVAL;
-	case AFSVL_RERELEASE:		return -EREMOTEIO;
-	case AFSVL_BADSERVERFLAG:	return -EINVAL;
-	case AFSVL_PERM:		return -EACCES;
-	case AFSVL_NOMEM:		return -EREMOTEIO;
-	default:
-		return afs_abort_to_error(abort_code);
+	struct afs_uvldbentry__xdr *uvldb;
+	struct afs_vldb_entry *entry;
+	u32 nr_servers, vlflags;
+	int i, ret;
+
+	_enter("");
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	uvldb = call->buffer;
+	entry = call->ret_vldb;
+
+	nr_servers = ntohl(uvldb->nServers);
+	if (nr_servers > AFS_NMAXNSERVERS)
+		nr_servers = AFS_NMAXNSERVERS;
+
+	for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++)
+		entry->name[i] = (u8)ntohl(uvldb->name[i]);
+	entry->name[i] = 0;
+	entry->name_len = strlen(entry->name);
+
+	vlflags = ntohl(uvldb->flags);
+	for (i = 0; i < nr_servers; i++) {
+		struct afs_uuid__xdr *xdr;
+		struct afs_uuid *uuid;
+		u32 tmp = ntohl(uvldb->serverFlags[i]);
+		int j;
+		int n = entry->nr_servers;
+
+		if (tmp & AFS_VLSF_RWVOL) {
+			entry->fs_mask[n] |= AFS_VOL_VTM_RW;
+			if (vlflags & AFS_VLF_BACKEXISTS)
+				entry->fs_mask[n] |= AFS_VOL_VTM_BAK;
+		}
+		if (tmp & AFS_VLSF_ROVOL)
+			entry->fs_mask[n] |= AFS_VOL_VTM_RO;
+		if (!entry->fs_mask[n])
+			continue;
+
+		xdr = &uvldb->serverNumber[i];
+		uuid = (struct afs_uuid *)&entry->fs_server[n];
+		uuid->time_low			= xdr->time_low;
+		uuid->time_mid			= htons(ntohl(xdr->time_mid));
+		uuid->time_hi_and_version	= htons(ntohl(xdr->time_hi_and_version));
+		uuid->clock_seq_hi_and_reserved	= (u8)ntohl(xdr->clock_seq_hi_and_reserved);
+		uuid->clock_seq_low		= (u8)ntohl(xdr->clock_seq_low);
+		for (j = 0; j < 6; j++)
+			uuid->node[j] = (u8)ntohl(xdr->node[j]);
+
+		entry->vlsf_flags[n] = tmp;
+		entry->addr_version[n] = ntohl(uvldb->serverUnique[i]);
+		entry->nr_servers++;
 	}
+
+	for (i = 0; i < AFS_MAXTYPES; i++)
+		entry->vid[i] = ntohl(uvldb->volumeId[i]);
+
+	if (vlflags & AFS_VLF_RWEXISTS)
+		__set_bit(AFS_VLDB_HAS_RW, &entry->flags);
+	if (vlflags & AFS_VLF_ROEXISTS)
+		__set_bit(AFS_VLDB_HAS_RO, &entry->flags);
+	if (vlflags & AFS_VLF_BACKEXISTS)
+		__set_bit(AFS_VLDB_HAS_BAK, &entry->flags);
+
+	if (!(vlflags & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
+		entry->error = -ENOMEDIUM;
+		__set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags);
+	}
+
+	__set_bit(AFS_VLDB_QUERY_VALID, &entry->flags);
+	_leave(" = 0 [done]");
+	return 0;
 }
 
 /*
- * deliver reply data to a VL.GetEntryByXXX call
+ * VL.GetEntryByNameU operation type.
  */
-static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
-					   struct sk_buff *skb, bool last)
+static const struct afs_call_type afs_RXVLGetEntryByNameU = {
+	.name		= "VL.GetEntryByNameU",
+	.op		= afs_VL_GetEntryByNameU,
+	.deliver	= afs_deliver_vl_get_entry_by_name_u,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Dispatch a get volume entry by name or ID operation (uuid variant).  If the
+ * volname is a decimal number then it's a volume ID not a volume name.
+ */
+struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
+						  const char *volname,
+						  int volnamesz)
 {
-	struct afs_cache_vlocation *entry;
+	struct afs_vldb_entry *entry;
+	struct afs_call *call;
+	struct afs_net *net = vc->cell->net;
+	size_t reqsz, padsz;
 	__be32 *bp;
-	u32 tmp;
-	int loop;
 
-	_enter(",,%u", last);
+	_enter("");
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
+	padsz = (4 - (volnamesz & 3)) & 3;
+	reqsz = 8 + volnamesz + padsz;
 
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	entry = kzalloc(sizeof(struct afs_vldb_entry), GFP_KERNEL);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
 
-	/* unmarshall the reply once we've received all of it */
-	entry = call->reply;
-	bp = call->buffer;
+	call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByNameU, reqsz,
+				   sizeof(struct afs_uvldbentry__xdr));
+	if (!call) {
+		kfree(entry);
+		return ERR_PTR(-ENOMEM);
+	}
 
-	for (loop = 0; loop < 64; loop++)
-		entry->name[loop] = ntohl(*bp++);
-	entry->name[loop] = 0;
-	bp++; /* final NUL */
+	call->key = vc->key;
+	call->ret_vldb = entry;
+	call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+	call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+	call->service_id = vc->server->service_id;
 
-	bp++; /* type */
-	entry->nservers = ntohl(*bp++);
+	/* Marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(VLGETENTRYBYNAMEU);
+	*bp++ = htonl(volnamesz);
+	memcpy(bp, volname, volnamesz);
+	if (padsz > 0)
+		memset((void *)bp + volnamesz, 0, padsz);
+
+	trace_afs_make_vl_call(call);
+	afs_make_call(call, GFP_KERNEL);
+	afs_wait_for_call_to_complete(call);
+	vc->call_abort_code	= call->abort_code;
+	vc->call_error		= call->error;
+	vc->call_responded	= call->responded;
+	afs_put_call(call);
+	if (vc->call_error) {
+		kfree(entry);
+		return ERR_PTR(vc->call_error);
+	}
+	return entry;
+}
 
-	for (loop = 0; loop < 8; loop++)
-		entry->servers[loop].s_addr = *bp++;
+/*
+ * Deliver reply data to a VL.GetAddrsU call.
+ *
+ *	GetAddrsU(IN ListAddrByAttributes *inaddr,
+ *		  OUT afsUUID *uuidp1,
+ *		  OUT uint32_t *uniquifier,
+ *		  OUT uint32_t *nentries,
+ *		  OUT bulkaddrs *blkaddrs);
+ */
+static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
+{
+	struct afs_addr_list *alist;
+	__be32 *bp;
+	u32 uniquifier, nentries, count;
+	int i, ret;
+
+	_enter("{%u,%zu/%u}",
+	       call->unmarshall, iov_iter_count(call->iter), call->count);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_buf(call,
+				   sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
+		call->unmarshall++;
+
+		/* Extract the returned uuid, uniquifier, nentries and
+		 * blkaddrs size */
+		fallthrough;
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer + sizeof(struct afs_uuid__xdr);
+		uniquifier	= ntohl(*bp++);
+		nentries	= ntohl(*bp++);
+		count		= ntohl(*bp);
+
+		nentries = min(nentries, count);
+		alist = afs_alloc_addrlist(nentries);
+		if (!alist)
+			return -ENOMEM;
+		alist->version = uniquifier;
+		call->ret_alist = alist;
+		call->count = count;
+		call->count2 = nentries;
+		call->unmarshall++;
+
+	more_entries:
+		count = min(call->count, 4U);
+		afs_extract_to_buf(call, count * sizeof(__be32));
+
+		fallthrough;	/* and extract entries */
+	case 2:
+		ret = afs_extract_data(call, call->count > 4);
+		if (ret < 0)
+			return ret;
+
+		alist = call->ret_alist;
+		bp = call->buffer;
+		count = min(call->count, 4U);
+		for (i = 0; i < count; i++) {
+			if (alist->nr_addrs < call->count2) {
+				ret = afs_merge_fs_addr4(call->net, alist, *bp++, AFS_FS_PORT);
+				if (ret < 0)
+					return ret;
+			}
+		}
+
+		call->count -= count;
+		if (call->count > 0)
+			goto more_entries;
+		call->unmarshall++;
+		break;
+	}
 
-	bp += 8; /* partition IDs */
+	_leave(" = 0 [done]");
+	return 0;
+}
 
-	for (loop = 0; loop < 8; loop++) {
-		tmp = ntohl(*bp++);
-		entry->srvtmask[loop] = 0;
-		if (tmp & AFS_VLSF_RWVOL)
-			entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
-		if (tmp & AFS_VLSF_ROVOL)
-			entry->srvtmask[loop] |= AFS_VOL_VTM_RO;
-		if (tmp & AFS_VLSF_BACKVOL)
-			entry->srvtmask[loop] |= AFS_VOL_VTM_BAK;
+/*
+ * VL.GetAddrsU operation type.
+ */
+static const struct afs_call_type afs_RXVLGetAddrsU = {
+	.name		= "VL.GetAddrsU",
+	.op		= afs_VL_GetAddrsU,
+	.deliver	= afs_deliver_vl_get_addrs_u,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Dispatch an operation to get the addresses for a server, where the server is
+ * nominated by UUID.
+ */
+struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
+					 const uuid_t *uuid)
+{
+	struct afs_ListAddrByAttributes__xdr *r;
+	struct afs_addr_list *alist;
+	const struct afs_uuid *u = (const struct afs_uuid *)uuid;
+	struct afs_call *call;
+	struct afs_net *net = vc->cell->net;
+	__be32 *bp;
+	int i;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &afs_RXVLGetAddrsU,
+				   sizeof(__be32) + sizeof(struct afs_ListAddrByAttributes__xdr),
+				   sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
+	if (!call)
+		return ERR_PTR(-ENOMEM);
+
+	call->key = vc->key;
+	call->ret_alist = NULL;
+	call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+	call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+	call->service_id = vc->server->service_id;
+
+	/* Marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(VLGETADDRSU);
+	r = (struct afs_ListAddrByAttributes__xdr *)bp;
+	r->Mask		= htonl(AFS_VLADDR_UUID);
+	r->ipaddr	= 0;
+	r->index	= 0;
+	r->spare	= 0;
+	r->uuid.time_low			= u->time_low;
+	r->uuid.time_mid			= htonl(ntohs(u->time_mid));
+	r->uuid.time_hi_and_version		= htonl(ntohs(u->time_hi_and_version));
+	r->uuid.clock_seq_hi_and_reserved 	= htonl(u->clock_seq_hi_and_reserved);
+	r->uuid.clock_seq_low			= htonl(u->clock_seq_low);
+	for (i = 0; i < 6; i++)
+		r->uuid.node[i] = htonl(u->node[i]);
+
+	trace_afs_make_vl_call(call);
+	afs_make_call(call, GFP_KERNEL);
+	afs_wait_for_call_to_complete(call);
+	vc->call_abort_code	= call->abort_code;
+	vc->call_error		= call->error;
+	vc->call_responded	= call->responded;
+	alist			= call->ret_alist;
+	afs_put_call(call);
+	if (vc->call_error) {
+		afs_put_addrlist(alist, afs_alist_trace_put_getaddru);
+		return ERR_PTR(vc->call_error);
 	}
+	return alist;
+}
+
+/*
+ * Deliver reply data to an VL.GetCapabilities operation.
+ */
+static int afs_deliver_vl_get_capabilities(struct afs_call *call)
+{
+	u32 count;
+	int ret;
+
+	_enter("{%u,%zu/%u}",
+	       call->unmarshall, iov_iter_count(call->iter), call->count);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
 
-	entry->vid[0] = ntohl(*bp++);
-	entry->vid[1] = ntohl(*bp++);
-	entry->vid[2] = ntohl(*bp++);
+		fallthrough;	/* and extract the capabilities word count */
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
-	bp++; /* clone ID */
+		count = ntohl(call->tmp);
+		call->count = count;
+		call->count2 = count;
 
-	tmp = ntohl(*bp++); /* flags */
-	entry->vidmask = 0;
-	if (tmp & AFS_VLF_RWEXISTS)
-		entry->vidmask |= AFS_VOL_VTM_RW;
-	if (tmp & AFS_VLF_ROEXISTS)
-		entry->vidmask |= AFS_VOL_VTM_RO;
-	if (tmp & AFS_VLF_BACKEXISTS)
-		entry->vidmask |= AFS_VOL_VTM_BAK;
-	if (!entry->vidmask)
-		return -EBADMSG;
+		call->unmarshall++;
+		afs_extract_discard(call, count * sizeof(__be32));
+
+		fallthrough;	/* and extract capabilities words */
+	case 2:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		/* TODO: Examine capabilities */
+
+		call->unmarshall++;
+		break;
+	}
 
 	_leave(" = 0 [done]");
 	return 0;
 }
 
+static void afs_destroy_vl_get_capabilities(struct afs_call *call)
+{
+	afs_put_addrlist(call->vl_probe, afs_alist_trace_put_vlgetcaps);
+	afs_put_vlserver(call->net, call->vlserver);
+	afs_flat_call_destructor(call);
+}
+
 /*
- * VL.GetEntryByName operation type
+ * VL.GetCapabilities operation type
  */
-static const struct afs_call_type afs_RXVLGetEntryByName = {
-	.name		= "VL.GetEntryByName",
-	.deliver	= afs_deliver_vl_get_entry_by_xxx,
-	.abort_to_error	= afs_vl_abort_to_error,
-	.destructor	= afs_flat_call_destructor,
+static const struct afs_call_type afs_RXVLGetCapabilities = {
+	.name		= "VL.GetCapabilities",
+	.op		= afs_VL_GetCapabilities,
+	.deliver	= afs_deliver_vl_get_capabilities,
+	.immediate_cancel = afs_vlserver_probe_result,
+	.done		= afs_vlserver_probe_result,
+	.destructor	= afs_destroy_vl_get_capabilities,
 };
 
 /*
- * VL.GetEntryById operation type
+ * Probe a volume server for the capabilities that it supports.  This can
+ * return up to 196 words.
+ *
+ * We use this to probe for service upgrade to determine what the server at the
+ * other end supports.
+ */
+struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
+					 struct afs_addr_list *alist,
+					 unsigned int addr_index,
+					 struct key *key,
+					 struct afs_vlserver *server,
+					 unsigned int server_index)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4);
+	if (!call)
+		return ERR_PTR(-ENOMEM);
+
+	call->key = key;
+	call->vlserver = afs_get_vlserver(server);
+	call->server_index = server_index;
+	call->peer = rxrpc_kernel_get_peer(alist->addrs[addr_index].peer);
+	call->vl_probe = afs_get_addrlist(alist, afs_alist_trace_get_vlgetcaps);
+	call->probe_index = addr_index;
+	call->service_id = server->service_id;
+	call->upgrade = true;
+	call->async = true;
+	call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(VLGETCAPABILITIES);
+
+	/* Can't take a ref on server */
+	trace_afs_make_vl_call(call);
+	afs_make_call(call, GFP_KERNEL);
+	return call;
+}
+
+/*
+ * Deliver reply data to a YFSVL.GetEndpoints call.
+ *
+ *	GetEndpoints(IN yfsServerAttributes *attr,
+ *		     OUT opr_uuid *uuid,
+ *		     OUT afs_int32 *uniquifier,
+ *		     OUT endpoints *fsEndpoints,
+ *		     OUT endpoints *volEndpoints)
+ */
+static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
+{
+	struct afs_addr_list *alist;
+	__be32 *bp;
+	u32 uniquifier, size;
+	int ret;
+
+	_enter("{%u,%zu,%u}",
+	       call->unmarshall, iov_iter_count(call->iter), call->count2);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32));
+		call->unmarshall = 1;
+
+		/* Extract the returned uuid, uniquifier, fsEndpoints count and
+		 * either the first fsEndpoint type or the volEndpoints
+		 * count if there are no fsEndpoints. */
+		fallthrough;
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer + sizeof(uuid_t);
+		uniquifier	= ntohl(*bp++);
+		call->count	= ntohl(*bp++);
+		call->count2	= ntohl(*bp); /* Type or next count */
+
+		if (call->count > YFS_MAXENDPOINTS)
+			return afs_protocol_error(call, afs_eproto_yvl_fsendpt_num);
+
+		alist = afs_alloc_addrlist(call->count);
+		if (!alist)
+			return -ENOMEM;
+		alist->version = uniquifier;
+		call->ret_alist = alist;
+
+		if (call->count == 0)
+			goto extract_volendpoints;
+
+	next_fsendpoint:
+		switch (call->count2) {
+		case YFS_ENDPOINT_IPV4:
+			size = sizeof(__be32) * (1 + 1 + 1);
+			break;
+		case YFS_ENDPOINT_IPV6:
+			size = sizeof(__be32) * (1 + 4 + 1);
+			break;
+		default:
+			return afs_protocol_error(call, afs_eproto_yvl_fsendpt_type);
+		}
+
+		size += sizeof(__be32);
+		afs_extract_to_buf(call, size);
+		call->unmarshall = 2;
+
+		fallthrough;	/* and extract fsEndpoints[] entries */
+	case 2:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		alist = call->ret_alist;
+		bp = call->buffer;
+		switch (call->count2) {
+		case YFS_ENDPOINT_IPV4:
+			if (ntohl(bp[0]) != sizeof(__be32) * 2)
+				return afs_protocol_error(
+					call, afs_eproto_yvl_fsendpt4_len);
+			ret = afs_merge_fs_addr4(call->net, alist, bp[1], ntohl(bp[2]));
+			if (ret < 0)
+				return ret;
+			bp += 3;
+			break;
+		case YFS_ENDPOINT_IPV6:
+			if (ntohl(bp[0]) != sizeof(__be32) * 5)
+				return afs_protocol_error(
+					call, afs_eproto_yvl_fsendpt6_len);
+			ret = afs_merge_fs_addr6(call->net, alist, bp + 1, ntohl(bp[5]));
+			if (ret < 0)
+				return ret;
+			bp += 6;
+			break;
+		default:
+			return afs_protocol_error(call, afs_eproto_yvl_fsendpt_type);
+		}
+
+		/* Got either the type of the next entry or the count of
+		 * volEndpoints if no more fsEndpoints.
+		 */
+		call->count2 = ntohl(*bp++);
+
+		call->count--;
+		if (call->count > 0)
+			goto next_fsendpoint;
+
+	extract_volendpoints:
+		/* Extract the list of volEndpoints. */
+		call->count = call->count2;
+		if (!call->count)
+			goto end;
+		if (call->count > YFS_MAXENDPOINTS)
+			return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type);
+
+		afs_extract_to_buf(call, 1 * sizeof(__be32));
+		call->unmarshall = 3;
+
+		/* Extract the type of volEndpoints[0].  Normally we would
+		 * extract the type of the next endpoint when we extract the
+		 * data of the current one, but this is the first...
+		 */
+		fallthrough;
+	case 3:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+
+	next_volendpoint:
+		call->count2 = ntohl(*bp++);
+		switch (call->count2) {
+		case YFS_ENDPOINT_IPV4:
+			size = sizeof(__be32) * (1 + 1 + 1);
+			break;
+		case YFS_ENDPOINT_IPV6:
+			size = sizeof(__be32) * (1 + 4 + 1);
+			break;
+		default:
+			return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type);
+		}
+
+		if (call->count > 1)
+			size += sizeof(__be32); /* Get next type too */
+		afs_extract_to_buf(call, size);
+		call->unmarshall = 4;
+
+		fallthrough;	/* and extract volEndpoints[] entries */
+	case 4:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		switch (call->count2) {
+		case YFS_ENDPOINT_IPV4:
+			if (ntohl(bp[0]) != sizeof(__be32) * 2)
+				return afs_protocol_error(
+					call, afs_eproto_yvl_vlendpt4_len);
+			bp += 3;
+			break;
+		case YFS_ENDPOINT_IPV6:
+			if (ntohl(bp[0]) != sizeof(__be32) * 5)
+				return afs_protocol_error(
+					call, afs_eproto_yvl_vlendpt6_len);
+			bp += 6;
+			break;
+		default:
+			return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type);
+		}
+
+		/* Got either the type of the next entry or the count of
+		 * volEndpoints if no more fsEndpoints.
+		 */
+		call->count--;
+		if (call->count > 0)
+			goto next_volendpoint;
+
+	end:
+		afs_extract_discard(call, 0);
+		call->unmarshall = 5;
+
+		fallthrough;	/* Done */
+	case 5:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+		call->unmarshall = 6;
+		fallthrough;
+
+	case 6:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFSVL.GetEndpoints operation type.
  */
-static const struct afs_call_type afs_RXVLGetEntryById = {
-	.name		= "VL.GetEntryById",
-	.deliver	= afs_deliver_vl_get_entry_by_xxx,
-	.abort_to_error	= afs_vl_abort_to_error,
+static const struct afs_call_type afs_YFSVLGetEndpoints = {
+	.name		= "YFSVL.GetEndpoints",
+	.op		= afs_YFSVL_GetEndpoints,
+	.deliver	= afs_deliver_yfsvl_get_endpoints,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
- * dispatch a get volume entry by name operation
+ * Dispatch an operation to get the addresses for a server, where the server is
+ * nominated by UUID.
  */
-int afs_vl_get_entry_by_name(struct in_addr *addr,
-			     struct key *key,
-			     const char *volname,
-			     struct afs_cache_vlocation *entry,
-			     const struct afs_wait_mode *wait_mode)
+struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
+					      const uuid_t *uuid)
 {
+	struct afs_addr_list *alist;
 	struct afs_call *call;
-	size_t volnamesz, reqsz, padsz;
+	struct afs_net *net = vc->cell->net;
 	__be32 *bp;
 
 	_enter("");
 
-	volnamesz = strlen(volname);
-	padsz = (4 - (volnamesz & 3)) & 3;
-	reqsz = 8 + volnamesz + padsz;
-
-	call = afs_alloc_flat_call(&afs_RXVLGetEntryByName, reqsz, 384);
+	call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints,
+				   sizeof(__be32) * 2 + sizeof(*uuid),
+				   sizeof(struct in6_addr) + sizeof(__be32) * 3);
 	if (!call)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	call->key = key;
-	call->reply = entry;
-	call->service_id = VL_SERVICE;
-	call->port = htons(AFS_VL_PORT);
+	call->key = vc->key;
+	call->ret_alist = NULL;
+	call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+	call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+	call->service_id = vc->server->service_id;
 
-	/* marshall the parameters */
+	/* Marshall the parameters */
 	bp = call->request;
-	*bp++ = htonl(VLGETENTRYBYNAME);
-	*bp++ = htonl(volnamesz);
-	memcpy(bp, volname, volnamesz);
-	if (padsz > 0)
-		memset((void *) bp + volnamesz, 0, padsz);
+	*bp++ = htonl(YVLGETENDPOINTS);
+	*bp++ = htonl(YFS_SERVER_UUID);
+	memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
+
+	trace_afs_make_vl_call(call);
+	afs_make_call(call, GFP_KERNEL);
+	afs_wait_for_call_to_complete(call);
+	vc->call_abort_code	= call->abort_code;
+	vc->call_error		= call->error;
+	vc->call_responded	= call->responded;
+	alist			= call->ret_alist;
+	afs_put_call(call);
+	if (vc->call_error) {
+		afs_put_addrlist(alist, afs_alist_trace_put_getaddru);
+		return ERR_PTR(vc->call_error);
+	}
+	return alist;
+}
+
+/*
+ * Deliver reply data to a YFSVL.GetCellName operation.
+ */
+static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
+{
+	char *cell_name;
+	u32 namesz, paddedsz;
+	int ret;
+
+	_enter("{%u,%zu/%u}",
+	       call->unmarshall, iov_iter_count(call->iter), call->count);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+		fallthrough;	/* and extract the cell name length */
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		namesz = ntohl(call->tmp);
+		if (namesz > YFS_VL_MAXCELLNAME)
+			return afs_protocol_error(call, afs_eproto_cellname_len);
+		paddedsz = (namesz + 3) & ~3;
+		call->count = namesz;
+		call->count2 = paddedsz - namesz;
+
+		cell_name = kmalloc(namesz + 1, GFP_KERNEL);
+		if (!cell_name)
+			return -ENOMEM;
+		cell_name[namesz] = 0;
+		call->ret_str = cell_name;
+
+		afs_extract_begin(call, cell_name, namesz);
+		call->unmarshall++;
+
+		fallthrough;	/* and extract cell name */
+	case 2:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		afs_extract_discard(call, call->count2);
+		call->unmarshall++;
+
+		fallthrough;	/* and extract padding */
+	case 3:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		call->unmarshall++;
+		break;
+	}
 
-	/* initiate the call */
-	return afs_make_call(addr, call, GFP_KERNEL, wait_mode);
+	_leave(" = 0 [done]");
+	return 0;
 }
 
 /*
- * dispatch a get volume entry by ID operation
- */
-int afs_vl_get_entry_by_id(struct in_addr *addr,
-			   struct key *key,
-			   afs_volid_t volid,
-			   afs_voltype_t voltype,
-			   struct afs_cache_vlocation *entry,
-			   const struct afs_wait_mode *wait_mode)
+ * VL.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_YFSVLGetCellName = {
+	.name		= "YFSVL.GetCellName",
+	.op		= afs_YFSVL_GetCellName,
+	.deliver	= afs_deliver_yfsvl_get_cell_name,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Probe a volume server for the capabilities that it supports.  This can
+ * return up to 196 words.
+ *
+ * We use this to probe for service upgrade to determine what the server at the
+ * other end supports.
+ */
+char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc)
 {
 	struct afs_call *call;
+	struct afs_net *net = vc->cell->net;
 	__be32 *bp;
+	char *cellname;
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXVLGetEntryById, 12, 384);
+	call = afs_alloc_flat_call(net, &afs_YFSVLGetCellName, 1 * 4, 0);
 	if (!call)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	call->key = key;
-	call->reply = entry;
-	call->service_id = VL_SERVICE;
-	call->port = htons(AFS_VL_PORT);
+	call->key = vc->key;
+	call->ret_str = NULL;
+	call->max_lifespan = AFS_VL_MAX_LIFESPAN;
+	call->peer = rxrpc_kernel_get_peer(vc->alist->addrs[vc->addr_index].peer);
+	call->service_id = vc->server->service_id;
 
 	/* marshall the parameters */
 	bp = call->request;
-	*bp++ = htonl(VLGETENTRYBYID);
-	*bp++ = htonl(volid);
-	*bp   = htonl(voltype);
-
-	/* initiate the call */
-	return afs_make_call(addr, call, GFP_KERNEL, wait_mode);
+	*bp++ = htonl(YVLGETCELLNAME);
+
+	/* Can't take a ref on server */
+	trace_afs_make_vl_call(call);
+	afs_make_call(call, GFP_KERNEL);
+	afs_wait_for_call_to_complete(call);
+	vc->call_abort_code	= call->abort_code;
+	vc->call_error		= call->error;
+	vc->call_responded	= call->responded;
+	cellname		= call->ret_str;
+	afs_put_call(call);
+	if (vc->call_error) {
+		kfree(cellname);
+		return ERR_PTR(vc->call_error);
+	}
+	return cellname;
 }
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
deleted file mode 100644
index 57bcb1596530..000000000000
--- a/fs/afs/vlocation.c
+++ /dev/null
@@ -1,718 +0,0 @@
-/* AFS volume location management
- *
- * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include "internal.h"
-
-static unsigned afs_vlocation_timeout = 10;	/* volume location timeout in seconds */
-static unsigned afs_vlocation_update_timeout = 10 * 60;
-
-static void afs_vlocation_reaper(struct work_struct *);
-static void afs_vlocation_updater(struct work_struct *);
-
-static LIST_HEAD(afs_vlocation_updates);
-static LIST_HEAD(afs_vlocation_graveyard);
-static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
-static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
-static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
-static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
-static struct workqueue_struct *afs_vlocation_update_worker;
-
-/*
- * iterate through the VL servers in a cell until one of them admits knowing
- * about the volume in question
- */
-static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
-					   struct key *key,
-					   struct afs_cache_vlocation *vldb)
-{
-	struct afs_cell *cell = vl->cell;
-	struct in_addr addr;
-	int count, ret;
-
-	_enter("%s,%s", cell->name, vl->vldb.name);
-
-	down_write(&vl->cell->vl_sem);
-	ret = -ENOMEDIUM;
-	for (count = cell->vl_naddrs; count > 0; count--) {
-		addr = cell->vl_addrs[cell->vl_curr_svix];
-
-		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
-
-		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
-					       &afs_sync_call);
-		switch (ret) {
-		case 0:
-			goto out;
-		case -ENOMEM:
-		case -ENONET:
-		case -ENETUNREACH:
-		case -EHOSTUNREACH:
-		case -ECONNREFUSED:
-			if (ret == -ENOMEM || ret == -ENONET)
-				goto out;
-			goto rotate;
-		case -ENOMEDIUM:
-		case -EKEYREJECTED:
-		case -EKEYEXPIRED:
-			goto out;
-		default:
-			ret = -EIO;
-			goto rotate;
-		}
-
-		/* rotate the server records upon lookup failure */
-	rotate:
-		cell->vl_curr_svix++;
-		cell->vl_curr_svix %= cell->vl_naddrs;
-	}
-
-out:
-	up_write(&vl->cell->vl_sem);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * iterate through the VL servers in a cell until one of them admits knowing
- * about the volume in question
- */
-static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
-					 struct key *key,
-					 afs_volid_t volid,
-					 afs_voltype_t voltype,
-					 struct afs_cache_vlocation *vldb)
-{
-	struct afs_cell *cell = vl->cell;
-	struct in_addr addr;
-	int count, ret;
-
-	_enter("%s,%x,%d,", cell->name, volid, voltype);
-
-	down_write(&vl->cell->vl_sem);
-	ret = -ENOMEDIUM;
-	for (count = cell->vl_naddrs; count > 0; count--) {
-		addr = cell->vl_addrs[cell->vl_curr_svix];
-
-		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
-
-		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
-					     &afs_sync_call);
-		switch (ret) {
-		case 0:
-			goto out;
-		case -ENOMEM:
-		case -ENONET:
-		case -ENETUNREACH:
-		case -EHOSTUNREACH:
-		case -ECONNREFUSED:
-			if (ret == -ENOMEM || ret == -ENONET)
-				goto out;
-			goto rotate;
-		case -EBUSY:
-			vl->upd_busy_cnt++;
-			if (vl->upd_busy_cnt <= 3) {
-				if (vl->upd_busy_cnt > 1) {
-					/* second+ BUSY - sleep a little bit */
-					set_current_state(TASK_UNINTERRUPTIBLE);
-					schedule_timeout(1);
-					__set_current_state(TASK_RUNNING);
-				}
-				continue;
-			}
-			break;
-		case -ENOMEDIUM:
-			vl->upd_rej_cnt++;
-			goto rotate;
-		default:
-			ret = -EIO;
-			goto rotate;
-		}
-
-		/* rotate the server records upon lookup failure */
-	rotate:
-		cell->vl_curr_svix++;
-		cell->vl_curr_svix %= cell->vl_naddrs;
-		vl->upd_busy_cnt = 0;
-	}
-
-out:
-	if (ret < 0 && vl->upd_rej_cnt > 0) {
-		printk(KERN_NOTICE "kAFS:"
-		       " Active volume no longer valid '%s'\n",
-		       vl->vldb.name);
-		vl->valid = 0;
-		ret = -ENOMEDIUM;
-	}
-
-	up_write(&vl->cell->vl_sem);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * allocate a volume location record
- */
-static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
-						 const char *name,
-						 size_t namesz)
-{
-	struct afs_vlocation *vl;
-
-	vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
-	if (vl) {
-		vl->cell = cell;
-		vl->state = AFS_VL_NEW;
-		atomic_set(&vl->usage, 1);
-		INIT_LIST_HEAD(&vl->link);
-		INIT_LIST_HEAD(&vl->grave);
-		INIT_LIST_HEAD(&vl->update);
-		init_waitqueue_head(&vl->waitq);
-		spin_lock_init(&vl->lock);
-		memcpy(vl->vldb.name, name, namesz);
-	}
-
-	_leave(" = %p", vl);
-	return vl;
-}
-
-/*
- * update record if we found it in the cache
- */
-static int afs_vlocation_update_record(struct afs_vlocation *vl,
-				       struct key *key,
-				       struct afs_cache_vlocation *vldb)
-{
-	afs_voltype_t voltype;
-	afs_volid_t vid;
-	int ret;
-
-	/* try to look up a cached volume in the cell VL databases by ID */
-	_debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
-	       vl->vldb.name,
-	       vl->vldb.vidmask,
-	       ntohl(vl->vldb.servers[0].s_addr),
-	       vl->vldb.srvtmask[0],
-	       ntohl(vl->vldb.servers[1].s_addr),
-	       vl->vldb.srvtmask[1],
-	       ntohl(vl->vldb.servers[2].s_addr),
-	       vl->vldb.srvtmask[2]);
-
-	_debug("Vids: %08x %08x %08x",
-	       vl->vldb.vid[0],
-	       vl->vldb.vid[1],
-	       vl->vldb.vid[2]);
-
-	if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
-		vid = vl->vldb.vid[0];
-		voltype = AFSVL_RWVOL;
-	} else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
-		vid = vl->vldb.vid[1];
-		voltype = AFSVL_ROVOL;
-	} else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
-		vid = vl->vldb.vid[2];
-		voltype = AFSVL_BACKVOL;
-	} else {
-		BUG();
-		vid = 0;
-		voltype = 0;
-	}
-
-	/* contact the server to make sure the volume is still available
-	 * - TODO: need to handle disconnected operation here
-	 */
-	ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
-	switch (ret) {
-		/* net error */
-	default:
-		printk(KERN_WARNING "kAFS:"
-		       " failed to update volume '%s' (%x) up in '%s': %d\n",
-		       vl->vldb.name, vid, vl->cell->name, ret);
-		_leave(" = %d", ret);
-		return ret;
-
-		/* pulled from local cache into memory */
-	case 0:
-		_leave(" = 0");
-		return 0;
-
-		/* uh oh... looks like the volume got deleted */
-	case -ENOMEDIUM:
-		printk(KERN_ERR "kAFS:"
-		       " volume '%s' (%x) does not exist '%s'\n",
-		       vl->vldb.name, vid, vl->cell->name);
-
-		/* TODO: make existing record unavailable */
-		_leave(" = %d", ret);
-		return ret;
-	}
-}
-
-/*
- * apply the update to a VL record
- */
-static void afs_vlocation_apply_update(struct afs_vlocation *vl,
-				       struct afs_cache_vlocation *vldb)
-{
-	_debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
-	       vldb->name, vldb->vidmask,
-	       ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
-	       ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
-	       ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
-
-	_debug("Vids: %08x %08x %08x",
-	       vldb->vid[0], vldb->vid[1], vldb->vid[2]);
-
-	if (strcmp(vldb->name, vl->vldb.name) != 0)
-		printk(KERN_NOTICE "kAFS:"
-		       " name of volume '%s' changed to '%s' on server\n",
-		       vl->vldb.name, vldb->name);
-
-	vl->vldb = *vldb;
-
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_update_cookie(vl->cache);
-#endif
-}
-
-/*
- * fill in a volume location record, consulting the cache and the VL server
- * both
- */
-static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
-					struct key *key)
-{
-	struct afs_cache_vlocation vldb;
-	int ret;
-
-	_enter("");
-
-	ASSERTCMP(vl->valid, ==, 0);
-
-	memset(&vldb, 0, sizeof(vldb));
-
-	/* see if we have an in-cache copy (will set vl->valid if there is) */
-#ifdef CONFIG_AFS_FSCACHE
-	vl->cache = fscache_acquire_cookie(vl->cell->cache,
-					   &afs_vlocation_cache_index_def, vl);
-#endif
-
-	if (vl->valid) {
-		/* try to update a known volume in the cell VL databases by
-		 * ID as the name may have changed */
-		_debug("found in cache");
-		ret = afs_vlocation_update_record(vl, key, &vldb);
-	} else {
-		/* try to look up an unknown volume in the cell VL databases by
-		 * name */
-		ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
-		if (ret < 0) {
-			printk("kAFS: failed to locate '%s' in cell '%s'\n",
-			       vl->vldb.name, vl->cell->name);
-			return ret;
-		}
-	}
-
-	afs_vlocation_apply_update(vl, &vldb);
-	_leave(" = 0");
-	return 0;
-}
-
-/*
- * queue a vlocation record for updates
- */
-static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
-{
-	struct afs_vlocation *xvl;
-
-	/* wait at least 10 minutes before updating... */
-	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
-
-	spin_lock(&afs_vlocation_updates_lock);
-
-	if (!list_empty(&afs_vlocation_updates)) {
-		/* ... but wait at least 1 second more than the newest record
-		 * already queued so that we don't spam the VL server suddenly
-		 * with lots of requests
-		 */
-		xvl = list_entry(afs_vlocation_updates.prev,
-				 struct afs_vlocation, update);
-		if (vl->update_at <= xvl->update_at)
-			vl->update_at = xvl->update_at + 1;
-	} else {
-		queue_delayed_work(afs_vlocation_update_worker,
-				   &afs_vlocation_update,
-				   afs_vlocation_update_timeout * HZ);
-	}
-
-	list_add_tail(&vl->update, &afs_vlocation_updates);
-	spin_unlock(&afs_vlocation_updates_lock);
-}
-
-/*
- * lookup volume location
- * - iterate through the VL servers in a cell until one of them admits knowing
- *   about the volume in question
- * - lookup in the local cache if not able to find on the VL server
- * - insert/update in the local cache if did get a VL response
- */
-struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
-					   struct key *key,
-					   const char *name,
-					   size_t namesz)
-{
-	struct afs_vlocation *vl;
-	int ret;
-
-	_enter("{%s},{%x},%*.*s,%zu",
-	       cell->name, key_serial(key),
-	       (int) namesz, (int) namesz, name, namesz);
-
-	if (namesz >= sizeof(vl->vldb.name)) {
-		_leave(" = -ENAMETOOLONG");
-		return ERR_PTR(-ENAMETOOLONG);
-	}
-
-	/* see if we have an in-memory copy first */
-	down_write(&cell->vl_sem);
-	spin_lock(&cell->vl_lock);
-	list_for_each_entry(vl, &cell->vl_list, link) {
-		if (vl->vldb.name[namesz] != '\0')
-			continue;
-		if (memcmp(vl->vldb.name, name, namesz) == 0)
-			goto found_in_memory;
-	}
-	spin_unlock(&cell->vl_lock);
-
-	/* not in the cell's in-memory lists - create a new record */
-	vl = afs_vlocation_alloc(cell, name, namesz);
-	if (!vl) {
-		up_write(&cell->vl_sem);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	afs_get_cell(cell);
-
-	list_add_tail(&vl->link, &cell->vl_list);
-	vl->state = AFS_VL_CREATING;
-	up_write(&cell->vl_sem);
-
-fill_in_record:
-	ret = afs_vlocation_fill_in_record(vl, key);
-	if (ret < 0)
-		goto error_abandon;
-	spin_lock(&vl->lock);
-	vl->state = AFS_VL_VALID;
-	spin_unlock(&vl->lock);
-	wake_up(&vl->waitq);
-
-	/* update volume entry in local cache */
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_update_cookie(vl->cache);
-#endif
-
-	/* schedule for regular updates */
-	afs_vlocation_queue_for_updates(vl);
-	goto success;
-
-found_in_memory:
-	/* found in memory */
-	_debug("found in memory");
-	atomic_inc(&vl->usage);
-	spin_unlock(&cell->vl_lock);
-	if (!list_empty(&vl->grave)) {
-		spin_lock(&afs_vlocation_graveyard_lock);
-		list_del_init(&vl->grave);
-		spin_unlock(&afs_vlocation_graveyard_lock);
-	}
-	up_write(&cell->vl_sem);
-
-	/* see if it was an abandoned record that we might try filling in */
-	spin_lock(&vl->lock);
-	while (vl->state != AFS_VL_VALID) {
-		afs_vlocation_state_t state = vl->state;
-
-		_debug("invalid [state %d]", state);
-
-		if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
-			vl->state = AFS_VL_CREATING;
-			spin_unlock(&vl->lock);
-			goto fill_in_record;
-		}
-
-		/* must now wait for creation or update by someone else to
-		 * complete */
-		_debug("wait");
-
-		spin_unlock(&vl->lock);
-		ret = wait_event_interruptible(vl->waitq,
-					       vl->state == AFS_VL_NEW ||
-					       vl->state == AFS_VL_VALID ||
-					       vl->state == AFS_VL_NO_VOLUME);
-		if (ret < 0)
-			goto error;
-		spin_lock(&vl->lock);
-	}
-	spin_unlock(&vl->lock);
-
-success:
-	_leave(" = %p", vl);
-	return vl;
-
-error_abandon:
-	spin_lock(&vl->lock);
-	vl->state = AFS_VL_NEW;
-	spin_unlock(&vl->lock);
-	wake_up(&vl->waitq);
-error:
-	ASSERT(vl != NULL);
-	afs_put_vlocation(vl);
-	_leave(" = %d", ret);
-	return ERR_PTR(ret);
-}
-
-/*
- * finish using a volume location record
- */
-void afs_put_vlocation(struct afs_vlocation *vl)
-{
-	if (!vl)
-		return;
-
-	_enter("%s", vl->vldb.name);
-
-	ASSERTCMP(atomic_read(&vl->usage), >, 0);
-
-	if (likely(!atomic_dec_and_test(&vl->usage))) {
-		_leave("");
-		return;
-	}
-
-	spin_lock(&afs_vlocation_graveyard_lock);
-	if (atomic_read(&vl->usage) == 0) {
-		_debug("buried");
-		list_move_tail(&vl->grave, &afs_vlocation_graveyard);
-		vl->time_of_death = get_seconds();
-		queue_delayed_work(afs_wq, &afs_vlocation_reap,
-				   afs_vlocation_timeout * HZ);
-
-		/* suspend updates on this record */
-		if (!list_empty(&vl->update)) {
-			spin_lock(&afs_vlocation_updates_lock);
-			list_del_init(&vl->update);
-			spin_unlock(&afs_vlocation_updates_lock);
-		}
-	}
-	spin_unlock(&afs_vlocation_graveyard_lock);
-	_leave(" [killed?]");
-}
-
-/*
- * destroy a dead volume location record
- */
-static void afs_vlocation_destroy(struct afs_vlocation *vl)
-{
-	_enter("%p", vl);
-
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(vl->cache, 0);
-#endif
-	afs_put_cell(vl->cell);
-	kfree(vl);
-}
-
-/*
- * reap dead volume location records
- */
-static void afs_vlocation_reaper(struct work_struct *work)
-{
-	LIST_HEAD(corpses);
-	struct afs_vlocation *vl;
-	unsigned long delay, expiry;
-	time_t now;
-
-	_enter("");
-
-	now = get_seconds();
-	spin_lock(&afs_vlocation_graveyard_lock);
-
-	while (!list_empty(&afs_vlocation_graveyard)) {
-		vl = list_entry(afs_vlocation_graveyard.next,
-				struct afs_vlocation, grave);
-
-		_debug("check %p", vl);
-
-		/* the queue is ordered most dead first */
-		expiry = vl->time_of_death + afs_vlocation_timeout;
-		if (expiry > now) {
-			delay = (expiry - now) * HZ;
-			_debug("delay %lu", delay);
-			mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
-			break;
-		}
-
-		spin_lock(&vl->cell->vl_lock);
-		if (atomic_read(&vl->usage) > 0) {
-			_debug("no reap");
-			list_del_init(&vl->grave);
-		} else {
-			_debug("reap");
-			list_move_tail(&vl->grave, &corpses);
-			list_del_init(&vl->link);
-		}
-		spin_unlock(&vl->cell->vl_lock);
-	}
-
-	spin_unlock(&afs_vlocation_graveyard_lock);
-
-	/* now reap the corpses we've extracted */
-	while (!list_empty(&corpses)) {
-		vl = list_entry(corpses.next, struct afs_vlocation, grave);
-		list_del(&vl->grave);
-		afs_vlocation_destroy(vl);
-	}
-
-	_leave("");
-}
-
-/*
- * initialise the VL update process
- */
-int __init afs_vlocation_update_init(void)
-{
-	afs_vlocation_update_worker =
-		create_singlethread_workqueue("kafs_vlupdated");
-	return afs_vlocation_update_worker ? 0 : -ENOMEM;
-}
-
-/*
- * discard all the volume location records for rmmod
- */
-void afs_vlocation_purge(void)
-{
-	afs_vlocation_timeout = 0;
-
-	spin_lock(&afs_vlocation_updates_lock);
-	list_del_init(&afs_vlocation_updates);
-	spin_unlock(&afs_vlocation_updates_lock);
-	mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
-	destroy_workqueue(afs_vlocation_update_worker);
-
-	mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
-}
-
-/*
- * update a volume location
- */
-static void afs_vlocation_updater(struct work_struct *work)
-{
-	struct afs_cache_vlocation vldb;
-	struct afs_vlocation *vl, *xvl;
-	time_t now;
-	long timeout;
-	int ret;
-
-	_enter("");
-
-	now = get_seconds();
-
-	/* find a record to update */
-	spin_lock(&afs_vlocation_updates_lock);
-	for (;;) {
-		if (list_empty(&afs_vlocation_updates)) {
-			spin_unlock(&afs_vlocation_updates_lock);
-			_leave(" [nothing]");
-			return;
-		}
-
-		vl = list_entry(afs_vlocation_updates.next,
-				struct afs_vlocation, update);
-		if (atomic_read(&vl->usage) > 0)
-			break;
-		list_del_init(&vl->update);
-	}
-
-	timeout = vl->update_at - now;
-	if (timeout > 0) {
-		queue_delayed_work(afs_vlocation_update_worker,
-				   &afs_vlocation_update, timeout * HZ);
-		spin_unlock(&afs_vlocation_updates_lock);
-		_leave(" [nothing]");
-		return;
-	}
-
-	list_del_init(&vl->update);
-	atomic_inc(&vl->usage);
-	spin_unlock(&afs_vlocation_updates_lock);
-
-	/* we can now perform the update */
-	_debug("update %s", vl->vldb.name);
-	vl->state = AFS_VL_UPDATING;
-	vl->upd_rej_cnt = 0;
-	vl->upd_busy_cnt = 0;
-
-	ret = afs_vlocation_update_record(vl, NULL, &vldb);
-	spin_lock(&vl->lock);
-	switch (ret) {
-	case 0:
-		afs_vlocation_apply_update(vl, &vldb);
-		vl->state = AFS_VL_VALID;
-		break;
-	case -ENOMEDIUM:
-		vl->state = AFS_VL_VOLUME_DELETED;
-		break;
-	default:
-		vl->state = AFS_VL_UNCERTAIN;
-		break;
-	}
-	spin_unlock(&vl->lock);
-	wake_up(&vl->waitq);
-
-	/* and then reschedule */
-	_debug("reschedule");
-	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
-
-	spin_lock(&afs_vlocation_updates_lock);
-
-	if (!list_empty(&afs_vlocation_updates)) {
-		/* next update in 10 minutes, but wait at least 1 second more
-		 * than the newest record already queued so that we don't spam
-		 * the VL server suddenly with lots of requests
-		 */
-		xvl = list_entry(afs_vlocation_updates.prev,
-				 struct afs_vlocation, update);
-		if (vl->update_at <= xvl->update_at)
-			vl->update_at = xvl->update_at + 1;
-		xvl = list_entry(afs_vlocation_updates.next,
-				 struct afs_vlocation, update);
-		timeout = xvl->update_at - now;
-		if (timeout < 0)
-			timeout = 0;
-	} else {
-		timeout = afs_vlocation_update_timeout;
-	}
-
-	ASSERT(list_empty(&vl->update));
-
-	list_add_tail(&vl->update, &afs_vlocation_updates);
-
-	_debug("timeout %ld", timeout);
-	queue_delayed_work(afs_vlocation_update_worker,
-			   &afs_vlocation_update, timeout * HZ);
-	spin_unlock(&afs_vlocation_updates_lock);
-	afs_put_vlocation(vl);
-}
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
deleted file mode 100644
index 25cf4c3f4ff7..000000000000
--- a/fs/afs/vnode.c
+++ /dev/null
@@ -1,1025 +0,0 @@
-/* AFS vnode management
- *
- * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include "internal.h"
-
-#if 0
-static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
-				   int depth, char lr)
-{
-	struct afs_vnode *vnode;
-	bool bad = false;
-
-	if (!node)
-		return false;
-
-	if (node->rb_left)
-		bad = dump_tree_aux(node->rb_left, node, depth + 2, '/');
-
-	vnode = rb_entry(node, struct afs_vnode, cb_promise);
-	_debug("%c %*.*s%c%p {%d}",
-	       rb_is_red(node) ? 'R' : 'B',
-	       depth, depth, "", lr,
-	       vnode, vnode->cb_expires_at);
-	if (rb_parent(node) != parent) {
-		printk("BAD: %p != %p\n", rb_parent(node), parent);
-		bad = true;
-	}
-
-	if (node->rb_right)
-		bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\');
-
-	return bad;
-}
-
-static noinline void dump_tree(const char *name, struct afs_server *server)
-{
-	_enter("%s", name);
-	if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-'))
-		BUG();
-}
-#endif
-
-/*
- * insert a vnode into the backing server's vnode tree
- */
-static void afs_install_vnode(struct afs_vnode *vnode,
-			      struct afs_server *server)
-{
-	struct afs_server *old_server = vnode->server;
-	struct afs_vnode *xvnode;
-	struct rb_node *parent, **p;
-
-	_enter("%p,%p", vnode, server);
-
-	if (old_server) {
-		spin_lock(&old_server->fs_lock);
-		rb_erase(&vnode->server_rb, &old_server->fs_vnodes);
-		spin_unlock(&old_server->fs_lock);
-	}
-
-	afs_get_server(server);
-	vnode->server = server;
-	afs_put_server(old_server);
-
-	/* insert into the server's vnode tree in FID order */
-	spin_lock(&server->fs_lock);
-
-	parent = NULL;
-	p = &server->fs_vnodes.rb_node;
-	while (*p) {
-		parent = *p;
-		xvnode = rb_entry(parent, struct afs_vnode, server_rb);
-		if (vnode->fid.vid < xvnode->fid.vid)
-			p = &(*p)->rb_left;
-		else if (vnode->fid.vid > xvnode->fid.vid)
-			p = &(*p)->rb_right;
-		else if (vnode->fid.vnode < xvnode->fid.vnode)
-			p = &(*p)->rb_left;
-		else if (vnode->fid.vnode > xvnode->fid.vnode)
-			p = &(*p)->rb_right;
-		else if (vnode->fid.unique < xvnode->fid.unique)
-			p = &(*p)->rb_left;
-		else if (vnode->fid.unique > xvnode->fid.unique)
-			p = &(*p)->rb_right;
-		else
-			BUG(); /* can't happen unless afs_iget() malfunctions */
-	}
-
-	rb_link_node(&vnode->server_rb, parent, p);
-	rb_insert_color(&vnode->server_rb, &server->fs_vnodes);
-
-	spin_unlock(&server->fs_lock);
-	_leave("");
-}
-
-/*
- * insert a vnode into the promising server's update/expiration tree
- * - caller must hold vnode->lock
- */
-static void afs_vnode_note_promise(struct afs_vnode *vnode,
-				   struct afs_server *server)
-{
-	struct afs_server *old_server;
-	struct afs_vnode *xvnode;
-	struct rb_node *parent, **p;
-
-	_enter("%p,%p", vnode, server);
-
-	ASSERT(server != NULL);
-
-	old_server = vnode->server;
-	if (vnode->cb_promised) {
-		if (server == old_server &&
-		    vnode->cb_expires == vnode->cb_expires_at) {
-			_leave(" [no change]");
-			return;
-		}
-
-		spin_lock(&old_server->cb_lock);
-		if (vnode->cb_promised) {
-			_debug("delete");
-			rb_erase(&vnode->cb_promise, &old_server->cb_promises);
-			vnode->cb_promised = false;
-		}
-		spin_unlock(&old_server->cb_lock);
-	}
-
-	if (vnode->server != server)
-		afs_install_vnode(vnode, server);
-
-	vnode->cb_expires_at = vnode->cb_expires;
-	_debug("PROMISE on %p {%lu}",
-	       vnode, (unsigned long) vnode->cb_expires_at);
-
-	/* abuse an RB-tree to hold the expiration order (we may have multiple
-	 * items with the same expiration time) */
-	spin_lock(&server->cb_lock);
-
-	parent = NULL;
-	p = &server->cb_promises.rb_node;
-	while (*p) {
-		parent = *p;
-		xvnode = rb_entry(parent, struct afs_vnode, cb_promise);
-		if (vnode->cb_expires_at < xvnode->cb_expires_at)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&vnode->cb_promise, parent, p);
-	rb_insert_color(&vnode->cb_promise, &server->cb_promises);
-	vnode->cb_promised = true;
-
-	spin_unlock(&server->cb_lock);
-	_leave("");
-}
-
-/*
- * handle remote file deletion by discarding the callback promise
- */
-static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
-{
-	struct afs_server *server;
-
-	_enter("{%p}", vnode->server);
-
-	set_bit(AFS_VNODE_DELETED, &vnode->flags);
-
-	server = vnode->server;
-	if (server) {
-		if (vnode->cb_promised) {
-			spin_lock(&server->cb_lock);
-			if (vnode->cb_promised) {
-				rb_erase(&vnode->cb_promise,
-					 &server->cb_promises);
-				vnode->cb_promised = false;
-			}
-			spin_unlock(&server->cb_lock);
-		}
-
-		spin_lock(&server->fs_lock);
-		rb_erase(&vnode->server_rb, &server->fs_vnodes);
-		spin_unlock(&server->fs_lock);
-
-		vnode->server = NULL;
-		afs_put_server(server);
-	} else {
-		ASSERT(!vnode->cb_promised);
-	}
-
-	_leave("");
-}
-
-/*
- * finish off updating the recorded status of a file after a successful
- * operation completion
- * - starts callback expiry timer
- * - adds to server's callback list
- */
-void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
-				      struct afs_server *server)
-{
-	struct afs_server *oldserver = NULL;
-
-	_enter("%p,%p", vnode, server);
-
-	spin_lock(&vnode->lock);
-	clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-	afs_vnode_note_promise(vnode, server);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-
-	wake_up_all(&vnode->update_waitq);
-	afs_put_server(oldserver);
-	_leave("");
-}
-
-/*
- * finish off updating the recorded status of a file after an operation failed
- */
-static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
-{
-	_enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, ret);
-
-	spin_lock(&vnode->lock);
-
-	clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-
-	if (ret == -ENOENT) {
-		/* the file was deleted on the server */
-		_debug("got NOENT from server - marking file deleted");
-		afs_vnode_deleted_remotely(vnode);
-	}
-
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-
-	wake_up_all(&vnode->update_waitq);
-	_leave("");
-}
-
-/*
- * fetch file status from the volume
- * - don't issue a fetch if:
- *   - the changed bit is not set and there's a valid callback
- *   - there are any outstanding ops that will fetch the status
- * - TODO implement local caching
- */
-int afs_vnode_fetch_status(struct afs_vnode *vnode,
-			   struct afs_vnode *auth_vnode, struct key *key)
-{
-	struct afs_server *server;
-	unsigned long acl_order;
-	int ret;
-
-	DECLARE_WAITQUEUE(myself, current);
-
-	_enter("%s,{%x:%u.%u}",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
-	if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
-	    vnode->cb_promised) {
-		_leave(" [unchanged]");
-		return 0;
-	}
-
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-		_leave(" [deleted]");
-		return -ENOENT;
-	}
-
-	acl_order = 0;
-	if (auth_vnode)
-		acl_order = auth_vnode->acl_order;
-
-	spin_lock(&vnode->lock);
-
-	if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
-	    vnode->cb_promised) {
-		spin_unlock(&vnode->lock);
-		_leave(" [unchanged]");
-		return 0;
-	}
-
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-
-	if (vnode->update_cnt > 0) {
-		/* someone else started a fetch */
-		_debug("wait on fetch %d", vnode->update_cnt);
-
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		ASSERT(myself.func != NULL);
-		add_wait_queue(&vnode->update_waitq, &myself);
-
-		/* wait for the status to be updated */
-		for (;;) {
-			if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
-				break;
-			if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-				break;
-
-			/* check to see if it got updated and invalidated all
-			 * before we saw it */
-			if (vnode->update_cnt == 0) {
-				remove_wait_queue(&vnode->update_waitq,
-						  &myself);
-				set_current_state(TASK_RUNNING);
-				goto get_anyway;
-			}
-
-			spin_unlock(&vnode->lock);
-
-			schedule();
-			set_current_state(TASK_UNINTERRUPTIBLE);
-
-			spin_lock(&vnode->lock);
-		}
-
-		remove_wait_queue(&vnode->update_waitq, &myself);
-		spin_unlock(&vnode->lock);
-		set_current_state(TASK_RUNNING);
-
-		return test_bit(AFS_VNODE_DELETED, &vnode->flags) ?
-			-ENOENT : 0;
-	}
-
-get_anyway:
-	/* okay... we're going to have to initiate the op */
-	vnode->update_cnt++;
-
-	spin_unlock(&vnode->lock);
-
-	/* merge AFS status fetches and clear outstanding callback on this
-	 * vnode */
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %p{%08x}",
-		       server, ntohl(server->addr.s_addr));
-
-		ret = afs_fs_fetch_file_status(server, key, vnode, NULL,
-					       &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0) {
-		_debug("adjust");
-		if (auth_vnode)
-			afs_cache_permit(vnode, key, acl_order);
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
-	} else {
-		_debug("failed [%d]", ret);
-		afs_vnode_status_update_failed(vnode, ret);
-	}
-
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
-}
-
-/*
- * fetch file data from the volume
- * - TODO implement caching
- */
-int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
-			 off_t offset, size_t length, struct page *page)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x,,,",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	/* this op will fetch the status */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	/* merge in AFS status fetches and clear outstanding callback on this
-	 * vnode */
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_fetch_data(server, key, vnode, offset, length,
-					page, &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-	}
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	return PTR_ERR(server);
-}
-
-/*
- * make a file or a directory
- */
-int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
-		     const char *name, umode_t mode, struct afs_fid *newfid,
-		     struct afs_file_status *newstatus,
-		     struct afs_callback *newcb, struct afs_server **_server)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x,%s,,",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key),
-	       name);
-
-	/* this op will fetch the status on the directory we're creating in */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_create(server, key, vnode, name, mode, newfid,
-				    newstatus, newcb, &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		*_server = server;
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-		*_server = NULL;
-	}
-
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
-}
-
-/*
- * remove a file or directory
- */
-int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
-		     bool isdir)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x,%s",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key),
-	       name);
-
-	/* this op will fetch the status on the directory we're removing from */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_remove(server, key, vnode, name, isdir,
-				    &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-	}
-
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
-}
-
-/*
- * create a hard link
- */
-int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
-			  struct key *key, const char *name)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s",
-	       dvnode->volume->vlocation->vldb.name,
-	       dvnode->fid.vid,
-	       dvnode->fid.vnode,
-	       dvnode->fid.unique,
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key),
-	       name);
-
-	/* this op will fetch the status on the directory we're removing from */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-	spin_lock(&dvnode->lock);
-	dvnode->update_cnt++;
-	spin_unlock(&dvnode->lock);
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(dvnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_link(server, key, dvnode, vnode, name,
-				  &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(dvnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_vnode_finalise_status_update(dvnode, server);
-		afs_put_server(server);
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-		afs_vnode_status_update_failed(dvnode, ret);
-	}
-
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	spin_lock(&dvnode->lock);
-	dvnode->update_cnt--;
-	ASSERTCMP(dvnode->update_cnt, >=, 0);
-	spin_unlock(&dvnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
-}
-
-/*
- * create a symbolic link
- */
-int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
-		      const char *name, const char *content,
-		      struct afs_fid *newfid,
-		      struct afs_file_status *newstatus,
-		      struct afs_server **_server)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x,%s,%s,,,",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key),
-	       name, content);
-
-	/* this op will fetch the status on the directory we're creating in */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_symlink(server, key, vnode, name, content,
-				     newfid, newstatus, &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		*_server = server;
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-		*_server = NULL;
-	}
-
-	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
-	return ret;
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
-	return PTR_ERR(server);
-}
-
-/*
- * rename a file
- */
-int afs_vnode_rename(struct afs_vnode *orig_dvnode,
-		     struct afs_vnode *new_dvnode,
-		     struct key *key,
-		     const char *orig_name,
-		     const char *new_name)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s",
-	       orig_dvnode->volume->vlocation->vldb.name,
-	       orig_dvnode->fid.vid,
-	       orig_dvnode->fid.vnode,
-	       orig_dvnode->fid.unique,
-	       new_dvnode->volume->vlocation->vldb.name,
-	       new_dvnode->fid.vid,
-	       new_dvnode->fid.vnode,
-	       new_dvnode->fid.unique,
-	       key_serial(key),
-	       orig_name,
-	       new_name);
-
-	/* this op will fetch the status on both the directories we're dealing
-	 * with */
-	spin_lock(&orig_dvnode->lock);
-	orig_dvnode->update_cnt++;
-	spin_unlock(&orig_dvnode->lock);
-	if (new_dvnode != orig_dvnode) {
-		spin_lock(&new_dvnode->lock);
-		new_dvnode->update_cnt++;
-		spin_unlock(&new_dvnode->lock);
-	}
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(orig_dvnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_rename(server, key, orig_dvnode, orig_name,
-				    new_dvnode, new_name, &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(orig_dvnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(orig_dvnode, server);
-		if (new_dvnode != orig_dvnode)
-			afs_vnode_finalise_status_update(new_dvnode, server);
-		afs_put_server(server);
-	} else {
-		afs_vnode_status_update_failed(orig_dvnode, ret);
-		if (new_dvnode != orig_dvnode)
-			afs_vnode_status_update_failed(new_dvnode, ret);
-	}
-
-	_leave(" = %d [cnt %d]", ret, orig_dvnode->update_cnt);
-	return ret;
-
-no_server:
-	spin_lock(&orig_dvnode->lock);
-	orig_dvnode->update_cnt--;
-	ASSERTCMP(orig_dvnode->update_cnt, >=, 0);
-	spin_unlock(&orig_dvnode->lock);
-	if (new_dvnode != orig_dvnode) {
-		spin_lock(&new_dvnode->lock);
-		new_dvnode->update_cnt--;
-		ASSERTCMP(new_dvnode->update_cnt, >=, 0);
-		spin_unlock(&new_dvnode->lock);
-	}
-	_leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
-	return PTR_ERR(server);
-}
-
-/*
- * write to a file
- */
-int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
-			 unsigned offset, unsigned to)
-{
-	struct afs_server *server;
-	struct afs_vnode *vnode = wb->vnode;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(wb->key),
-	       first, last, offset, to);
-
-	/* this op will fetch the status */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_store_data(server, wb, first, last, offset, to,
-					&afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-	}
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	return PTR_ERR(server);
-}
-
-/*
- * set the attributes on a file
- */
-int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
-		      struct iattr *attr)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	/* this op will fetch the status */
-	spin_lock(&vnode->lock);
-	vnode->update_cnt++;
-	spin_unlock(&vnode->lock);
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_setattr(server, key, vnode, attr, &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0) {
-		afs_vnode_finalise_status_update(vnode, server);
-		afs_put_server(server);
-	} else {
-		afs_vnode_status_update_failed(vnode, ret);
-	}
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	spin_lock(&vnode->lock);
-	vnode->update_cnt--;
-	ASSERTCMP(vnode->update_cnt, >=, 0);
-	spin_unlock(&vnode->lock);
-	return PTR_ERR(server);
-}
-
-/*
- * get the status of a volume
- */
-int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
-				struct afs_volume_status *vs)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x,",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_get_volume_status(server, key, vnode, vs, &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0)
-		afs_put_server(server);
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	return PTR_ERR(server);
-}
-
-/*
- * get a lock on a file
- */
-int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
-		       afs_lock_type_t type)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x,%u",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key), type);
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_set_lock(server, key, vnode, type, &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0)
-		afs_put_server(server);
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	return PTR_ERR(server);
-}
-
-/*
- * extend a lock on a file
- */
-int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_extend_lock(server, key, vnode, &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0)
-		afs_put_server(server);
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	return PTR_ERR(server);
-}
-
-/*
- * release a lock on a file
- */
-int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s{%x:%u.%u},%x",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       key_serial(key));
-
-	do {
-		/* pick a server to query */
-		server = afs_volume_pick_fileserver(vnode);
-		if (IS_ERR(server))
-			goto no_server;
-
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
-		ret = afs_fs_release_lock(server, key, vnode, &afs_sync_call);
-
-	} while (!afs_volume_release_fileserver(vnode, server, ret));
-
-	/* adjust the flags */
-	if (ret == 0)
-		afs_put_server(server);
-
-	_leave(" = %d", ret);
-	return ret;
-
-no_server:
-	return PTR_ERR(server);
-}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 401eeb21869f..0efff3d25133 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -1,28 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* AFS volume management
  *
  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/sched.h>
 #include "internal.h"
 
-static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
+static unsigned __read_mostly afs_volume_record_life = 60 * 60;
+static atomic_t afs_volume_debug_id;
+
+static void afs_destroy_volume(struct work_struct *work);
 
 /*
- * lookup a volume by name
- * - this can be one of the following:
+ * Insert a volume into a cell.  If there's an existing volume record, that is
+ * returned instead with a ref held.
+ */
+static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
+						      struct afs_volume *volume)
+{
+	struct afs_volume *p;
+	struct rb_node *parent = NULL, **pp;
+
+	write_seqlock(&cell->volume_lock);
+
+	pp = &cell->volumes.rb_node;
+	while (*pp) {
+		parent = *pp;
+		p = rb_entry(parent, struct afs_volume, cell_node);
+		if (p->vid < volume->vid) {
+			pp = &(*pp)->rb_left;
+		} else if (p->vid > volume->vid) {
+			pp = &(*pp)->rb_right;
+		} else {
+			if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
+				volume = p;
+				goto found;
+			}
+
+			set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
+			rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
+		}
+	}
+
+	rb_link_node_rcu(&volume->cell_node, parent, pp);
+	rb_insert_color(&volume->cell_node, &cell->volumes);
+	hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes);
+
+found:
+	write_sequnlock(&cell->volume_lock);
+	return volume;
+
+}
+
+static void afs_remove_volume_from_cell(struct afs_volume *volume)
+{
+	struct afs_cell *cell = volume->cell;
+
+	if (!hlist_unhashed(&volume->proc_link)) {
+		trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref),
+				 afs_volume_trace_remove);
+		write_seqlock(&cell->volume_lock);
+		hlist_del_rcu(&volume->proc_link);
+		if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
+			rb_erase(&volume->cell_node, &cell->volumes);
+		write_sequnlock(&cell->volume_lock);
+	}
+}
+
+/*
+ * Allocate a volume record and load it up from a vldb record.
+ */
+static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
+					   struct afs_vldb_entry *vldb,
+					   struct afs_server_list **_slist)
+{
+	struct afs_server_list *slist;
+	struct afs_volume *volume;
+	int ret = -ENOMEM, i;
+
+	volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
+	if (!volume)
+		goto error_0;
+
+	volume->debug_id	= atomic_inc_return(&afs_volume_debug_id);
+	volume->vid		= vldb->vid[params->type];
+	volume->update_at	= ktime_get_real_seconds() + afs_volume_record_life;
+	volume->cell		= afs_get_cell(params->cell, afs_cell_trace_get_vol);
+	volume->type		= params->type;
+	volume->type_force	= params->force;
+	volume->name_len	= vldb->name_len;
+	volume->creation_time	= TIME64_MIN;
+	volume->update_time	= TIME64_MIN;
+
+	refcount_set(&volume->ref, 1);
+	INIT_HLIST_NODE(&volume->proc_link);
+	INIT_WORK(&volume->destructor, afs_destroy_volume);
+	rwlock_init(&volume->servers_lock);
+	mutex_init(&volume->volsync_lock);
+	mutex_init(&volume->cb_check_lock);
+	rwlock_init(&volume->cb_v_break_lock);
+	INIT_LIST_HEAD(&volume->open_mmaps);
+	init_rwsem(&volume->open_mmaps_lock);
+	memcpy(volume->name, vldb->name, vldb->name_len + 1);
+
+	for (i = 0; i < AFS_MAXTYPES; i++)
+		volume->vids[i] = vldb->vid[i];
+
+	slist = afs_alloc_server_list(volume, params->key, vldb);
+	if (IS_ERR(slist)) {
+		ret = PTR_ERR(slist);
+		goto error_1;
+	}
+
+	*_slist = slist;
+	rcu_assign_pointer(volume->servers, slist);
+	trace_afs_volume(volume->debug_id, volume->vid, 1, afs_volume_trace_alloc);
+	return volume;
+
+error_1:
+	afs_put_cell(volume->cell, afs_cell_trace_put_vol);
+	kfree(volume);
+error_0:
+	return ERR_PTR(ret);
+}
+
+/*
+ * Look up or allocate a volume record.
+ */
+static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params,
+					    struct afs_vldb_entry *vldb)
+{
+	struct afs_server_list *slist;
+	struct afs_volume *candidate, *volume;
+
+	candidate = afs_alloc_volume(params, vldb, &slist);
+	if (IS_ERR(candidate))
+		return candidate;
+
+	volume = afs_insert_volume_into_cell(params->cell, candidate);
+	if (volume == candidate)
+		afs_attach_volume_to_servers(volume, slist);
+	else
+		afs_put_volume(candidate, afs_volume_trace_put_cell_dup);
+	return volume;
+}
+
+/*
+ * Look up a VLDB record for a volume.
+ */
+static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
+						 struct key *key,
+						 const char *volname,
+						 size_t volnamesz)
+{
+	struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
+	struct afs_vl_cursor vc;
+	int ret;
+
+	if (!afs_begin_vlserver_operation(&vc, cell, key))
+		return ERR_PTR(-ERESTARTSYS);
+
+	while (afs_select_vlserver(&vc)) {
+		vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
+	}
+
+	ret = afs_end_vlserver_operation(&vc);
+	return ret < 0 ? ERR_PTR(ret) : vldb;
+}
+
+/*
+ * Look up a volume in the VL server and create a candidate volume record for
+ * it.
+ *
+ * The volume name can be one of the following:
  *	"%[cell:]volume[.]"		R/W volume
  *	"#[cell:]volume[.]"		R/O or R/W volume (rwparent=0),
  *					 or R/W (rwparent=1) volume
@@ -42,360 +195,278 @@ static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  *           explicitly told otherwise
  */
-struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
+struct afs_volume *afs_create_volume(struct afs_fs_context *params)
 {
-	struct afs_vlocation *vlocation = NULL;
-	struct afs_volume *volume = NULL;
-	struct afs_server *server = NULL;
-	char srvtmask;
-	int ret, loop;
-
-	_enter("{%*.*s,%d}",
-	       params->volnamesz, params->volnamesz, params->volname, params->rwpath);
-
-	/* lookup the volume location record */
-	vlocation = afs_vlocation_lookup(params->cell, params->key,
-					 params->volname, params->volnamesz);
-	if (IS_ERR(vlocation)) {
-		ret = PTR_ERR(vlocation);
-		vlocation = NULL;
-		goto error;
-	}
+	struct afs_vldb_entry *vldb;
+	struct afs_volume *volume;
+	unsigned long type_mask = 1UL << params->type;
 
-	/* make the final decision on the type we want */
-	ret = -ENOMEDIUM;
-	if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
-		goto error;
+	vldb = afs_vl_lookup_vldb(params->cell, params->key,
+				  params->volname, params->volnamesz);
+	if (IS_ERR(vldb))
+		return ERR_CAST(vldb);
 
-	srvtmask = 0;
-	for (loop = 0; loop < vlocation->vldb.nservers; loop++)
-		srvtmask |= vlocation->vldb.srvtmask[loop];
+	if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
+		volume = ERR_PTR(vldb->error);
+		goto error;
+	}
 
+	/* Make the final decision on the type we want */
+	volume = ERR_PTR(-ENOMEDIUM);
 	if (params->force) {
-		if (!(srvtmask & (1 << params->type)))
+		if (!(vldb->flags & type_mask))
 			goto error;
-	} else if (srvtmask & AFS_VOL_VTM_RO) {
+	} else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
 		params->type = AFSVL_ROVOL;
-	} else if (srvtmask & AFS_VOL_VTM_RW) {
+	} else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
 		params->type = AFSVL_RWVOL;
 	} else {
 		goto error;
 	}
 
-	down_write(&params->cell->vl_sem);
+	volume = afs_lookup_volume(params, vldb);
 
-	/* is the volume already active? */
-	if (vlocation->vols[params->type]) {
-		/* yes - re-use it */
-		volume = vlocation->vols[params->type];
-		afs_get_volume(volume);
-		goto success;
-	}
-
-	/* create a new volume record */
-	_debug("creating new volume record");
-
-	ret = -ENOMEM;
-	volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
-	if (!volume)
-		goto error_up;
+error:
+	kfree(vldb);
+	return volume;
+}
 
-	atomic_set(&volume->usage, 1);
-	volume->type		= params->type;
-	volume->type_force	= params->force;
-	volume->cell		= params->cell;
-	volume->vid		= vlocation->vldb.vid[params->type];
-
-	ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY);
-	if (ret)
-		goto error_bdi;
-
-	init_rwsem(&volume->server_sem);
-
-	/* look up all the applicable server records */
-	for (loop = 0; loop < 8; loop++) {
-		if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
-			server = afs_lookup_server(
-			       volume->cell, &vlocation->vldb.servers[loop]);
-			if (IS_ERR(server)) {
-				ret = PTR_ERR(server);
-				goto error_discard;
-			}
+/*
+ * Destroy a volume record
+ */
+static void afs_destroy_volume(struct work_struct *work)
+{
+	struct afs_volume *volume = container_of(work, struct afs_volume, destructor);
+	struct afs_server_list *slist = rcu_access_pointer(volume->servers);
 
-			volume->servers[volume->nservers] = server;
-			volume->nservers++;
-		}
-	}
+	_enter("%p", volume);
 
-	/* attach the cache and volume location */
 #ifdef CONFIG_AFS_FSCACHE
-	volume->cache = fscache_acquire_cookie(vlocation->cache,
-					       &afs_volume_cache_index_def,
-					       volume);
+	ASSERTCMP(volume->cache, ==, NULL);
 #endif
-	afs_get_vlocation(vlocation);
-	volume->vlocation = vlocation;
 
-	vlocation->vols[volume->type] = volume;
+	afs_detach_volume_from_servers(volume, slist);
+	afs_remove_volume_from_cell(volume);
+	afs_put_serverlist(volume->cell->net, slist);
+	afs_put_cell(volume->cell, afs_cell_trace_put_vol);
+	trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref),
+			 afs_volume_trace_free);
+	kfree_rcu(volume, rcu);
 
-success:
-	_debug("kAFS selected %s volume %08x",
-	       afs_voltypes[volume->type], volume->vid);
-	up_write(&params->cell->vl_sem);
-	afs_put_vlocation(vlocation);
-	_leave(" = %p", volume);
-	return volume;
-
-	/* clean up */
-error_up:
-	up_write(&params->cell->vl_sem);
-error:
-	afs_put_vlocation(vlocation);
-	_leave(" = %d", ret);
-	return ERR_PTR(ret);
-
-error_discard:
-	bdi_destroy(&volume->bdi);
-error_bdi:
-	up_write(&params->cell->vl_sem);
-
-	for (loop = volume->nservers - 1; loop >= 0; loop--)
-		afs_put_server(volume->servers[loop]);
-
-	kfree(volume);
-	goto error;
+	_leave(" [destroyed]");
 }
 
 /*
- * destroy a volume record
+ * Try to get a reference on a volume record.
  */
-void afs_put_volume(struct afs_volume *volume)
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
 {
-	struct afs_vlocation *vlocation;
-	int loop;
-
-	if (!volume)
-		return;
+	int r;
 
-	_enter("%p", volume);
-
-	ASSERTCMP(atomic_read(&volume->usage), >, 0);
-
-	vlocation = volume->vlocation;
+	if (__refcount_inc_not_zero(&volume->ref, &r)) {
+		trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason);
+		return true;
+	}
+	return false;
+}
 
-	/* to prevent a race, the decrement and the dequeue must be effectively
-	 * atomic */
-	down_write(&vlocation->cell->vl_sem);
+/*
+ * Get a reference on a volume record.
+ */
+struct afs_volume *afs_get_volume(struct afs_volume *volume,
+				  enum afs_volume_trace reason)
+{
+	if (volume) {
+		int r;
 
-	if (likely(!atomic_dec_and_test(&volume->usage))) {
-		up_write(&vlocation->cell->vl_sem);
-		_leave("");
-		return;
+		__refcount_inc(&volume->ref, &r);
+		trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason);
 	}
+	return volume;
+}
 
-	vlocation->vols[volume->type] = NULL;
 
-	up_write(&vlocation->cell->vl_sem);
+/*
+ * Drop a reference on a volume record.
+ */
+void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason)
+{
+	if (volume) {
+		unsigned int debug_id = volume->debug_id;
+		afs_volid_t vid = volume->vid;
+		bool zero;
+		int r;
+
+		zero = __refcount_dec_and_test(&volume->ref, &r);
+		trace_afs_volume(debug_id, vid, r - 1, reason);
+		if (zero)
+			schedule_work(&volume->destructor);
+	}
+}
 
-	/* finish cleaning up the volume */
+/*
+ * Activate a volume.
+ */
+int afs_activate_volume(struct afs_volume *volume)
+{
 #ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(volume->cache, 0);
+	struct fscache_volume *vcookie;
+	char *name;
+
+	name = kasprintf(GFP_KERNEL, "afs,%s,%llx",
+			 volume->cell->name, volume->vid);
+	if (!name)
+		return -ENOMEM;
+
+	vcookie = fscache_acquire_volume(name, NULL, NULL, 0);
+	if (IS_ERR(vcookie)) {
+		if (vcookie != ERR_PTR(-EBUSY)) {
+			kfree(name);
+			return PTR_ERR(vcookie);
+		}
+		pr_err("AFS: Cache volume key already in use (%s)\n", name);
+		vcookie = NULL;
+	}
+	volume->cache = vcookie;
+	kfree(name);
 #endif
-	afs_put_vlocation(vlocation);
+	return 0;
+}
 
-	for (loop = volume->nservers - 1; loop >= 0; loop--)
-		afs_put_server(volume->servers[loop]);
+/*
+ * Deactivate a volume.
+ */
+void afs_deactivate_volume(struct afs_volume *volume)
+{
+	_enter("%s", volume->name);
 
-	bdi_destroy(&volume->bdi);
-	kfree(volume);
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_relinquish_volume(volume->cache, NULL,
+				  test_bit(AFS_VOLUME_DELETED, &volume->flags));
+	volume->cache = NULL;
+#endif
 
-	_leave(" [destroyed]");
+	_leave("");
 }
 
 /*
- * pick a server to use to try accessing this volume
- * - returns with an elevated usage count on the server chosen
+ * Query the VL service to update the volume status.
  */
-struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
+static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
 {
-	struct afs_volume *volume = vnode->volume;
-	struct afs_server *server;
-	int ret, state, loop;
+	struct afs_server_list *new, *old, *discard;
+	struct afs_vldb_entry *vldb;
+	char idbuf[24];
+	int ret, idsz;
+
+	_enter("");
 
-	_enter("%s", volume->vlocation->vldb.name);
+	/* We look up an ID by passing it as a decimal string in the
+	 * operation's name parameter.
+	 */
+	idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
 
-	/* stick with the server we're already using if we can */
-	if (vnode->server && vnode->server->fs_state == 0) {
-		afs_get_server(vnode->server);
-		_leave(" = %p [current]", vnode->server);
-		return vnode->server;
+	vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
+	if (IS_ERR(vldb)) {
+		ret = PTR_ERR(vldb);
+		goto error;
 	}
 
-	down_read(&volume->server_sem);
+	/* See if the volume got renamed. */
+	if (vldb->name_len != volume->name_len ||
+	    memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
+		/* TODO: Use RCU'd string. */
+		memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
+		volume->name_len = vldb->name_len;
+	}
 
-	/* handle the no-server case */
-	if (volume->nservers == 0) {
-		ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
-		up_read(&volume->server_sem);
-		_leave(" = %d [no servers]", ret);
-		return ERR_PTR(ret);
+	/* See if the volume's server list got updated. */
+	new = afs_alloc_server_list(volume, key, vldb);
+	if (IS_ERR(new)) {
+		ret = PTR_ERR(new);
+		goto error_vldb;
 	}
 
-	/* basically, just search the list for the first live server and use
-	 * that */
-	ret = 0;
-	for (loop = 0; loop < volume->nservers; loop++) {
-		server = volume->servers[loop];
-		state = server->fs_state;
-
-		_debug("consider %d [%d]", loop, state);
-
-		switch (state) {
-			/* found an apparently healthy server */
-		case 0:
-			afs_get_server(server);
-			up_read(&volume->server_sem);
-			_leave(" = %p (picked %08x)",
-			       server, ntohl(server->addr.s_addr));
-			return server;
-
-		case -ENETUNREACH:
-			if (ret == 0)
-				ret = state;
-			break;
-
-		case -EHOSTUNREACH:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH)
-				ret = state;
-			break;
-
-		case -ECONNREFUSED:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH ||
-			    ret == -EHOSTUNREACH)
-				ret = state;
-			break;
-
-		default:
-		case -EREMOTEIO:
-			if (ret == 0 ||
-			    ret == -ENETUNREACH ||
-			    ret == -EHOSTUNREACH ||
-			    ret == -ECONNREFUSED)
-				ret = state;
-			break;
-		}
+	write_lock(&volume->servers_lock);
+
+	discard = new;
+	old = rcu_dereference_protected(volume->servers,
+					lockdep_is_held(&volume->servers_lock));
+	if (afs_annotate_server_list(new, old)) {
+		new->seq = volume->servers_seq + 1;
+		rcu_assign_pointer(volume->servers, new);
+		smp_wmb();
+		volume->servers_seq++;
+		discard = old;
 	}
 
-	/* no available servers
-	 * - TODO: handle the no active servers case better
-	 */
-	up_read(&volume->server_sem);
+	/* Check more often if replication is ongoing. */
+	if (new->ro_replicating)
+		volume->update_at = ktime_get_real_seconds() + 10 * 60;
+	else
+		volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
+	write_unlock(&volume->servers_lock);
+
+	if (discard == old)
+		afs_reattach_volume_to_servers(volume, new, old);
+	afs_put_serverlist(volume->cell->net, discard);
+	ret = 0;
+error_vldb:
+	kfree(vldb);
+error:
 	_leave(" = %d", ret);
-	return ERR_PTR(ret);
+	return ret;
 }
 
 /*
- * release a server after use
- * - releases the ref on the server struct that was acquired by picking
- * - records result of using a particular server to access a volume
- * - return 0 to try again, 1 if okay or to issue error
- * - the caller must release the server struct if result was 0
+ * Make sure the volume record is up to date.
  */
-int afs_volume_release_fileserver(struct afs_vnode *vnode,
-				  struct afs_server *server,
-				  int result)
+int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op)
 {
-	struct afs_volume *volume = vnode->volume;
-	unsigned loop;
-
-	_enter("%s,%08x,%d",
-	       volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
-	       result);
-
-	switch (result) {
-		/* success */
-	case 0:
-		server->fs_act_jif = jiffies;
-		server->fs_state = 0;
-		_leave("");
-		return 1;
-
-		/* the fileserver denied all knowledge of the volume */
-	case -ENOMEDIUM:
-		server->fs_act_jif = jiffies;
-		down_write(&volume->server_sem);
-
-		/* firstly, find where the server is in the active list (if it
-		 * is) */
-		for (loop = 0; loop < volume->nservers; loop++)
-			if (volume->servers[loop] == server)
-				goto present;
-
-		/* no longer there - may have been discarded by another op */
-		goto try_next_server_upw;
-
-	present:
-		volume->nservers--;
-		memmove(&volume->servers[loop],
-			&volume->servers[loop + 1],
-			sizeof(volume->servers[loop]) *
-			(volume->nservers - loop));
-		volume->servers[volume->nservers] = NULL;
-		afs_put_server(server);
-		volume->rjservers++;
-
-		if (volume->nservers > 0)
-			/* another server might acknowledge its existence */
-			goto try_next_server_upw;
-
-		/* handle the case where all the fileservers have rejected the
-		 * volume
-		 * - TODO: try asking the fileservers for volume information
-		 * - TODO: contact the VL server again to see if the volume is
-		 *         no longer registered
-		 */
-		up_write(&volume->server_sem);
-		afs_put_server(server);
-		_leave(" [completely rejected]");
-		return 1;
-
-		/* problem reaching the server */
-	case -ENETUNREACH:
-	case -EHOSTUNREACH:
-	case -ECONNREFUSED:
-	case -ETIME:
-	case -ETIMEDOUT:
-	case -EREMOTEIO:
-		/* mark the server as dead
-		 * TODO: vary dead timeout depending on error
-		 */
-		spin_lock(&server->fs_lock);
-		if (!server->fs_state) {
-			server->fs_dead_jif = jiffies + HZ * 10;
-			server->fs_state = result;
-			printk("kAFS: SERVER DEAD state=%d\n", result);
-		}
-		spin_unlock(&server->fs_lock);
-		goto try_next_server;
-
-		/* miscellaneous error */
-	default:
-		server->fs_act_jif = jiffies;
-	case -ENOMEM:
-	case -ENONET:
-		/* tell the caller to accept the result */
-		afs_put_server(server);
-		_leave(" [local failure]");
-		return 1;
-	}
+	int ret, retries = 0;
+
+	_enter("");
 
-	/* tell the caller to loop around and try the next server */
-try_next_server_upw:
-	up_write(&volume->server_sem);
-try_next_server:
-	afs_put_server(server);
-	_leave(" [try next server]");
+retry:
+	if (test_bit(AFS_VOLUME_WAIT, &volume->flags))
+		goto wait;
+	if (volume->update_at <= ktime_get_real_seconds() ||
+	    test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags))
+		goto update;
+	_leave(" = 0");
 	return 0;
+
+update:
+	if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
+		clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+		ret = afs_update_volume_status(volume, op->key);
+		if (ret < 0)
+			set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+		clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
+		clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
+		wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+wait:
+	if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
+		_leave(" = 0 [no wait]");
+		return 0;
+	}
+
+	ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT,
+			  (op->flags & AFS_OPERATION_UNINTR) ?
+			  TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
+	if (ret == -ERESTARTSYS) {
+		_leave(" = %d", ret);
+		return ret;
+	}
+
+	retries++;
+	if (retries == 4) {
+		_leave(" = -ESTALE");
+		return -ESTALE;
+	}
+	goto retry;
 }
diff --git a/fs/afs/write.c b/fs/afs/write.c
index a890db4b9898..93ad86ff3345 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -1,678 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /* handling of writes to regular files and writing back to the server
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
+
 #include <linux/backing-dev.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
-#include <linux/aio.h>
+#include <linux/netfs.h>
+#include <trace/events/netfs.h>
 #include "internal.h"
 
-static int afs_write_back_from_locked_page(struct afs_writeback *wb,
-					   struct page *page);
-
-/*
- * mark a page as having been made dirty and thus needing writeback
- */
-int afs_set_page_dirty(struct page *page)
-{
-	_enter("");
-	return __set_page_dirty_nobuffers(page);
-}
-
-/*
- * unlink a writeback record because its usage has reached zero
- * - must be called with the wb->vnode->writeback_lock held
- */
-static void afs_unlink_writeback(struct afs_writeback *wb)
-{
-	struct afs_writeback *front;
-	struct afs_vnode *vnode = wb->vnode;
-
-	list_del_init(&wb->link);
-	if (!list_empty(&vnode->writebacks)) {
-		/* if an fsync rises to the front of the queue then wake it
-		 * up */
-		front = list_entry(vnode->writebacks.next,
-				   struct afs_writeback, link);
-		if (front->state == AFS_WBACK_SYNCING) {
-			_debug("wake up sync");
-			front->state = AFS_WBACK_COMPLETE;
-			wake_up(&front->waitq);
-		}
-	}
-}
-
-/*
- * free a writeback record
- */
-static void afs_free_writeback(struct afs_writeback *wb)
-{
-	_enter("");
-	key_put(wb->key);
-	kfree(wb);
-}
-
 /*
- * dispose of a reference to a writeback record
+ * completion of write to server
  */
-void afs_put_writeback(struct afs_writeback *wb)
+static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
 {
-	struct afs_vnode *vnode = wb->vnode;
-
-	_enter("{%d}", wb->usage);
+	_enter("{%llx:%llu},{%x @%llx}",
+	       vnode->fid.vid, vnode->fid.vnode, len, start);
 
-	spin_lock(&vnode->writeback_lock);
-	if (--wb->usage == 0)
-		afs_unlink_writeback(wb);
-	else
-		wb = NULL;
-	spin_unlock(&vnode->writeback_lock);
-	if (wb)
-		afs_free_writeback(wb);
+	afs_prune_wb_keys(vnode);
+	_leave("");
 }
 
 /*
- * partly or wholly fill a page that's under preparation for writing
+ * Find a key to use for the writeback.  We cached the keys used to author the
+ * writes on the vnode.  wreq->netfs_priv2 will contain the last writeback key
+ * record used or NULL and we need to start from there if it's set.
+ * wreq->netfs_priv will be set to the key itself or NULL.
  */
-static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
-			 loff_t pos, struct page *page)
+static void afs_get_writeback_key(struct netfs_io_request *wreq)
 {
-	loff_t i_size;
-	int ret;
-	int len;
+	struct afs_wb_key *wbk, *old = wreq->netfs_priv2;
+	struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
 
-	_enter(",,%llu", (unsigned long long)pos);
+	key_put(wreq->netfs_priv);
+	wreq->netfs_priv = NULL;
+	wreq->netfs_priv2 = NULL;
 
-	i_size = i_size_read(&vnode->vfs_inode);
-	if (pos + PAGE_CACHE_SIZE > i_size)
-		len = i_size - pos;
+	spin_lock(&vnode->wb_lock);
+	if (old)
+		wbk = list_next_entry(old, vnode_link);
 	else
-		len = PAGE_CACHE_SIZE;
-
-	ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
-	if (ret < 0) {
-		if (ret == -ENOENT) {
-			_debug("got NOENT from server"
-			       " - marking file deleted and stale");
-			set_bit(AFS_VNODE_DELETED, &vnode->flags);
-			ret = -ESTALE;
-		}
-	}
-
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * prepare to perform part of a write to a page
- */
-int afs_write_begin(struct file *file, struct address_space *mapping,
-		    loff_t pos, unsigned len, unsigned flags,
-		    struct page **pagep, void **fsdata)
-{
-	struct afs_writeback *candidate, *wb;
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
-	struct page *page;
-	struct key *key = file->private_data;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
-	unsigned to = from + len;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	int ret;
-
-	_enter("{%x:%u},{%lx},%u,%u",
-	       vnode->fid.vid, vnode->fid.vnode, index, from, to);
-
-	candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
-	if (!candidate)
-		return -ENOMEM;
-	candidate->vnode = vnode;
-	candidate->first = candidate->last = index;
-	candidate->offset_first = from;
-	candidate->to_last = to;
-	INIT_LIST_HEAD(&candidate->link);
-	candidate->usage = 1;
-	candidate->state = AFS_WBACK_PENDING;
-	init_waitqueue_head(&candidate->waitq);
-
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page) {
-		kfree(candidate);
-		return -ENOMEM;
-	}
-	*pagep = page;
-	/* page won't leak in error case: it eventually gets cleaned off LRU */
-
-	if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
-		ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
-		if (ret < 0) {
-			kfree(candidate);
-			_leave(" = %d [prep]", ret);
-			return ret;
-		}
-		SetPageUptodate(page);
-	}
-
-try_again:
-	spin_lock(&vnode->writeback_lock);
-
-	/* see if this page is already pending a writeback under a suitable key
-	 * - if so we can just join onto that one */
-	wb = (struct afs_writeback *) page_private(page);
-	if (wb) {
-		if (wb->key == key && wb->state == AFS_WBACK_PENDING)
-			goto subsume_in_current_wb;
-		goto flush_conflicting_wb;
-	}
-
-	if (index > 0) {
-		/* see if we can find an already pending writeback that we can
-		 * append this page to */
-		list_for_each_entry(wb, &vnode->writebacks, link) {
-			if (wb->last == index - 1 && wb->key == key &&
-			    wb->state == AFS_WBACK_PENDING)
-				goto append_to_previous_wb;
+		wbk = list_first_entry(&vnode->wb_keys, struct afs_wb_key, vnode_link);
+
+	list_for_each_entry_from(wbk, &vnode->wb_keys, vnode_link) {
+		_debug("wbk %u", key_serial(wbk->key));
+		if (key_validate(wbk->key) == 0) {
+			refcount_inc(&wbk->usage);
+			wreq->netfs_priv = key_get(wbk->key);
+			wreq->netfs_priv2 = wbk;
+			_debug("USE WB KEY %u", key_serial(wbk->key));
+			break;
 		}
 	}
 
-	list_add_tail(&candidate->link, &vnode->writebacks);
-	candidate->key = key_get(key);
-	spin_unlock(&vnode->writeback_lock);
-	SetPagePrivate(page);
-	set_page_private(page, (unsigned long) candidate);
-	_leave(" = 0 [new]");
-	return 0;
-
-subsume_in_current_wb:
-	_debug("subsume");
-	ASSERTRANGE(wb->first, <=, index, <=, wb->last);
-	if (index == wb->first && from < wb->offset_first)
-		wb->offset_first = from;
-	if (index == wb->last && to > wb->to_last)
-		wb->to_last = to;
-	spin_unlock(&vnode->writeback_lock);
-	kfree(candidate);
-	_leave(" = 0 [sub]");
-	return 0;
-
-append_to_previous_wb:
-	_debug("append into %lx-%lx", wb->first, wb->last);
-	wb->usage++;
-	wb->last++;
-	wb->to_last = to;
-	spin_unlock(&vnode->writeback_lock);
-	SetPagePrivate(page);
-	set_page_private(page, (unsigned long) wb);
-	kfree(candidate);
-	_leave(" = 0 [app]");
-	return 0;
-
-	/* the page is currently bound to another context, so if it's dirty we
-	 * need to flush it before we can use the new context */
-flush_conflicting_wb:
-	_debug("flush conflict");
-	if (wb->state == AFS_WBACK_PENDING)
-		wb->state = AFS_WBACK_CONFLICTING;
-	spin_unlock(&vnode->writeback_lock);
-	if (PageDirty(page)) {
-		ret = afs_write_back_from_locked_page(wb, page);
-		if (ret < 0) {
-			afs_put_writeback(candidate);
-			_leave(" = %d", ret);
-			return ret;
-		}
-	}
+	spin_unlock(&vnode->wb_lock);
 
-	/* the page holds a ref on the writeback record */
-	afs_put_writeback(wb);
-	set_page_private(page, 0);
-	ClearPagePrivate(page);
-	goto try_again;
+	afs_put_wb_key(old);
 }
 
-/*
- * finalise part of a write to a page
- */
-int afs_write_end(struct file *file, struct address_space *mapping,
-		  loff_t pos, unsigned len, unsigned copied,
-		  struct page *page, void *fsdata)
+static void afs_store_data_success(struct afs_operation *op)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
-	loff_t i_size, maybe_i_size;
-
-	_enter("{%x:%u},{%lx}",
-	       vnode->fid.vid, vnode->fid.vnode, page->index);
-
-	maybe_i_size = pos + copied;
-
-	i_size = i_size_read(&vnode->vfs_inode);
-	if (maybe_i_size > i_size) {
-		spin_lock(&vnode->writeback_lock);
-		i_size = i_size_read(&vnode->vfs_inode);
-		if (maybe_i_size > i_size)
-			i_size_write(&vnode->vfs_inode, maybe_i_size);
-		spin_unlock(&vnode->writeback_lock);
+	struct afs_vnode *vnode = op->file[0].vnode;
+
+	op->ctime = op->file[0].scb.status.mtime_client;
+	afs_vnode_commit_status(op, &op->file[0]);
+	if (!afs_op_error(op)) {
+		afs_pages_written_back(vnode, op->store.pos, op->store.size);
+		afs_stat_v(vnode, n_stores);
+		atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
 	}
-
-	set_page_dirty(page);
-	if (PageDirty(page))
-		_debug("dirtied");
-	unlock_page(page);
-	page_cache_release(page);
-
-	return copied;
 }
 
-/*
- * kill all the pages in the given range
- */
-static void afs_kill_pages(struct afs_vnode *vnode, bool error,
-			   pgoff_t first, pgoff_t last)
-{
-	struct pagevec pv;
-	unsigned count, loop;
-
-	_enter("{%x:%u},%lx-%lx",
-	       vnode->fid.vid, vnode->fid.vnode, first, last);
-
-	pagevec_init(&pv, 0);
-
-	do {
-		_debug("kill %lx-%lx", first, last);
-
-		count = last - first + 1;
-		if (count > PAGEVEC_SIZE)
-			count = PAGEVEC_SIZE;
-		pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
-					      first, count, pv.pages);
-		ASSERTCMP(pv.nr, ==, count);
-
-		for (loop = 0; loop < count; loop++) {
-			ClearPageUptodate(pv.pages[loop]);
-			if (error)
-				SetPageError(pv.pages[loop]);
-			end_page_writeback(pv.pages[loop]);
-		}
-
-		__pagevec_release(&pv);
-	} while (first < last);
-
-	_leave("");
-}
+static const struct afs_operation_ops afs_store_data_operation = {
+	.issue_afs_rpc	= afs_fs_store_data,
+	.issue_yfs_rpc	= yfs_fs_store_data,
+	.success	= afs_store_data_success,
+};
 
 /*
- * synchronously write back the locked page and any subsequent non-locked dirty
- * pages also covered by the same writeback record
+ * Prepare a subrequest to write to the server.  This sets the max_len
+ * parameter.
  */
-static int afs_write_back_from_locked_page(struct afs_writeback *wb,
-					   struct page *primary_page)
+void afs_prepare_write(struct netfs_io_subrequest *subreq)
 {
-	struct page *pages[8], *page;
-	unsigned long count;
-	unsigned n, offset, to;
-	pgoff_t start, first, last;
-	int loop, ret;
-
-	_enter(",%lx", primary_page->index);
-
-	count = 1;
-	if (!clear_page_dirty_for_io(primary_page))
-		BUG();
-	if (test_set_page_writeback(primary_page))
-		BUG();
-
-	/* find all consecutive lockable dirty pages, stopping when we find a
-	 * page that is not immediately lockable, is not dirty or is missing,
-	 * or we reach the end of the range */
-	start = primary_page->index;
-	if (start >= wb->last)
-		goto no_more;
-	start++;
-	do {
-		_debug("more %lx [%lx]", start, count);
-		n = wb->last - start + 1;
-		if (n > ARRAY_SIZE(pages))
-			n = ARRAY_SIZE(pages);
-		n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
-					  start, n, pages);
-		_debug("fgpc %u", n);
-		if (n == 0)
-			goto no_more;
-		if (pages[0]->index != start) {
-			do {
-				put_page(pages[--n]);
-			} while (n > 0);
-			goto no_more;
-		}
-
-		for (loop = 0; loop < n; loop++) {
-			page = pages[loop];
-			if (page->index > wb->last)
-				break;
-			if (!trylock_page(page))
-				break;
-			if (!PageDirty(page) ||
-			    page_private(page) != (unsigned long) wb) {
-				unlock_page(page);
-				break;
-			}
-			if (!clear_page_dirty_for_io(page))
-				BUG();
-			if (test_set_page_writeback(page))
-				BUG();
-			unlock_page(page);
-			put_page(page);
-		}
-		count += loop;
-		if (loop < n) {
-			for (; loop < n; loop++)
-				put_page(pages[loop]);
-			goto no_more;
-		}
-
-		start += loop;
-	} while (start <= wb->last && count < 65536);
-
-no_more:
-	/* we now have a contiguous set of dirty pages, each with writeback set
-	 * and the dirty mark cleared; the first page is locked and must remain
-	 * so, all the rest are unlocked */
-	first = primary_page->index;
-	last = first + count - 1;
-
-	offset = (first == wb->first) ? wb->offset_first : 0;
-	to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
-
-	_debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
-
-	ret = afs_vnode_store_data(wb, first, last, offset, to);
-	if (ret < 0) {
-		switch (ret) {
-		case -EDQUOT:
-		case -ENOSPC:
-			set_bit(AS_ENOSPC,
-				&wb->vnode->vfs_inode.i_mapping->flags);
-			break;
-		case -EROFS:
-		case -EIO:
-		case -EREMOTEIO:
-		case -EFBIG:
-		case -ENOENT:
-		case -ENOMEDIUM:
-		case -ENXIO:
-			afs_kill_pages(wb->vnode, true, first, last);
-			set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags);
-			break;
-		case -EACCES:
-		case -EPERM:
-		case -ENOKEY:
-		case -EKEYEXPIRED:
-		case -EKEYREJECTED:
-		case -EKEYREVOKED:
-			afs_kill_pages(wb->vnode, false, first, last);
-			break;
-		default:
-			break;
-		}
-	} else {
-		ret = count;
-	}
+	struct netfs_io_stream *stream = &subreq->rreq->io_streams[subreq->stream_nr];
 
-	_leave(" = %d", ret);
-	return ret;
+	//if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags))
+	//	subreq->max_len = 512 * 1024;
+	//else
+	stream->sreq_max_len = 256 * 1024 * 1024;
 }
 
 /*
- * write a page back to the server
- * - the caller locked the page for us
+ * Issue a subrequest to write to the server.
  */
-int afs_writepage(struct page *page, struct writeback_control *wbc)
+static void afs_issue_write_worker(struct work_struct *work)
 {
-	struct afs_writeback *wb;
-	int ret;
-
-	_enter("{%lx},", page->index);
-
-	wb = (struct afs_writeback *) page_private(page);
-	ASSERT(wb != NULL);
-
-	ret = afs_write_back_from_locked_page(wb, page);
-	unlock_page(page);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return 0;
+	struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work);
+	struct netfs_io_request *wreq = subreq->rreq;
+	struct afs_operation *op;
+	struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
+	unsigned long long pos = subreq->start + subreq->transferred;
+	size_t len = subreq->len - subreq->transferred;
+	int ret = -ENOKEY;
+
+	_enter("R=%x[%x],%s{%llx:%llu.%u},%llx,%zx",
+	       wreq->debug_id, subreq->debug_index,
+	       vnode->volume->name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       pos, len);
+
+#if 0 // Error injection
+	if (subreq->debug_index == 3)
+		return netfs_write_subrequest_terminated(subreq, -ENOANO);
+
+	if (!subreq->retry_count) {
+		set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+		return netfs_write_subrequest_terminated(subreq, -EAGAIN);
 	}
+#endif
 
-	wbc->nr_to_write -= ret;
-
-	_leave(" = 0");
-	return 0;
-}
-
-/*
- * write a region of pages back to the server
- */
-static int afs_writepages_region(struct address_space *mapping,
-				 struct writeback_control *wbc,
-				 pgoff_t index, pgoff_t end, pgoff_t *_next)
-{
-	struct afs_writeback *wb;
-	struct page *page;
-	int ret, n;
-
-	_enter(",,%lx,%lx,", index, end);
-
-	do {
-		n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
-				       1, &page);
-		if (!n)
-			break;
-
-		_debug("wback %lx", page->index);
-
-		if (page->index > end) {
-			*_next = index;
-			page_cache_release(page);
-			_leave(" = 0 [%lx]", *_next);
-			return 0;
-		}
-
-		/* at this point we hold neither mapping->tree_lock nor lock on
-		 * the page itself: the page may be truncated or invalidated
-		 * (changing page->mapping to NULL), or even swizzled back from
-		 * swapper_space to tmpfs file mapping
+	op = afs_alloc_operation(wreq->netfs_priv, vnode->volume);
+	if (IS_ERR(op))
+		return netfs_write_subrequest_terminated(subreq, -EAGAIN);
+
+	afs_op_set_vnode(op, 0, vnode);
+	op->file[0].dv_delta	= 1;
+	op->file[0].modification = true;
+	op->store.pos		= pos;
+	op->store.size		= len;
+	op->flags		|= AFS_OPERATION_UNINTR;
+	op->ops			= &afs_store_data_operation;
+
+	afs_begin_vnode_operation(op);
+
+	op->store.write_iter	= &subreq->io_iter;
+	op->store.i_size	= umax(pos + len, vnode->netfs.remote_i_size);
+	op->mtime		= inode_get_mtime(&vnode->netfs.inode);
+
+	afs_wait_for_operation(op);
+	ret = afs_put_operation(op);
+	switch (ret) {
+	case 0:
+		__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+		break;
+	case -EACCES:
+	case -EPERM:
+	case -ENOKEY:
+	case -EKEYEXPIRED:
+	case -EKEYREJECTED:
+	case -EKEYREVOKED:
+		/* If there are more keys we can try, use the retry algorithm
+		 * to rotate the keys.
 		 */
-		lock_page(page);
-
-		if (page->mapping != mapping) {
-			unlock_page(page);
-			page_cache_release(page);
-			continue;
-		}
-
-		if (wbc->sync_mode != WB_SYNC_NONE)
-			wait_on_page_writeback(page);
-
-		if (PageWriteback(page) || !PageDirty(page)) {
-			unlock_page(page);
-			continue;
-		}
-
-		wb = (struct afs_writeback *) page_private(page);
-		ASSERT(wb != NULL);
-
-		spin_lock(&wb->vnode->writeback_lock);
-		wb->state = AFS_WBACK_WRITING;
-		spin_unlock(&wb->vnode->writeback_lock);
-
-		ret = afs_write_back_from_locked_page(wb, page);
-		unlock_page(page);
-		page_cache_release(page);
-		if (ret < 0) {
-			_leave(" = %d", ret);
-			return ret;
-		}
-
-		wbc->nr_to_write -= ret;
-
-		cond_resched();
-	} while (index < end && wbc->nr_to_write > 0);
+		if (wreq->netfs_priv2)
+			set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+		break;
+	}
 
-	*_next = index;
-	_leave(" = 0 [%lx]", *_next);
-	return 0;
+	netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len);
 }
 
-/*
- * write some of the pending data back to the server
- */
-int afs_writepages(struct address_space *mapping,
-		   struct writeback_control *wbc)
+void afs_issue_write(struct netfs_io_subrequest *subreq)
 {
-	pgoff_t start, end, next;
-	int ret;
-
-	_enter("");
-
-	if (wbc->range_cyclic) {
-		start = mapping->writeback_index;
-		end = -1;
-		ret = afs_writepages_region(mapping, wbc, start, end, &next);
-		if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
-			ret = afs_writepages_region(mapping, wbc, 0, start,
-						    &next);
-		mapping->writeback_index = next;
-	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
-		end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
-		ret = afs_writepages_region(mapping, wbc, 0, end, &next);
-		if (wbc->nr_to_write > 0)
-			mapping->writeback_index = next;
-	} else {
-		start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
-		ret = afs_writepages_region(mapping, wbc, start, end, &next);
-	}
-
-	_leave(" = %d", ret);
-	return ret;
+	subreq->work.func = afs_issue_write_worker;
+	if (!queue_work(system_dfl_wq, &subreq->work))
+		WARN_ON_ONCE(1);
 }
 
 /*
- * completion of write to server
+ * Writeback calls this when it finds a folio that needs uploading.  This isn't
+ * called if writeback only has copy-to-cache to deal with.
  */
-void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
+void afs_begin_writeback(struct netfs_io_request *wreq)
 {
-	struct afs_writeback *wb = call->wb;
-	struct pagevec pv;
-	unsigned count, loop;
-	pgoff_t first = call->first, last = call->last;
-	bool free_wb;
-
-	_enter("{%x:%u},{%lx-%lx}",
-	       vnode->fid.vid, vnode->fid.vnode, first, last);
-
-	ASSERT(wb != NULL);
-
-	pagevec_init(&pv, 0);
-
-	do {
-		_debug("done %lx-%lx", first, last);
-
-		count = last - first + 1;
-		if (count > PAGEVEC_SIZE)
-			count = PAGEVEC_SIZE;
-		pv.nr = find_get_pages_contig(call->mapping, first, count,
-					      pv.pages);
-		ASSERTCMP(pv.nr, ==, count);
-
-		spin_lock(&vnode->writeback_lock);
-		for (loop = 0; loop < count; loop++) {
-			struct page *page = pv.pages[loop];
-			end_page_writeback(page);
-			if (page_private(page) == (unsigned long) wb) {
-				set_page_private(page, 0);
-				ClearPagePrivate(page);
-				wb->usage--;
-			}
-		}
-		free_wb = false;
-		if (wb->usage == 0) {
-			afs_unlink_writeback(wb);
-			free_wb = true;
-		}
-		spin_unlock(&vnode->writeback_lock);
-		first += count;
-		if (free_wb) {
-			afs_free_writeback(wb);
-			wb = NULL;
-		}
-
-		__pagevec_release(&pv);
-	} while (first <= last);
-
-	_leave("");
+	if (S_ISREG(wreq->inode->i_mode))
+		afs_get_writeback_key(wreq);
 }
 
 /*
- * write to an AFS file
+ * Prepare to retry the writes in request.  Use this to try rotating the
+ * available writeback keys.
  */
-ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
-		       unsigned long nr_segs, loff_t pos)
+void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
-	ssize_t result;
-	size_t count = iov_length(iov, nr_segs);
-
-	_enter("{%x.%u},{%zu},%lu,",
-	       vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
-
-	if (IS_SWAPFILE(&vnode->vfs_inode)) {
-		printk(KERN_INFO
-		       "AFS: Attempt to write to active swap file!\n");
-		return -EBUSY;
+	struct netfs_io_subrequest *subreq =
+		list_first_entry(&stream->subrequests,
+				 struct netfs_io_subrequest, rreq_link);
+
+	switch (wreq->origin) {
+	case NETFS_READAHEAD:
+	case NETFS_READPAGE:
+	case NETFS_READ_GAPS:
+	case NETFS_READ_SINGLE:
+	case NETFS_READ_FOR_WRITE:
+	case NETFS_UNBUFFERED_READ:
+	case NETFS_DIO_READ:
+		return;
+	default:
+		break;
 	}
 
-	if (!count)
-		return 0;
-
-	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
-	if (IS_ERR_VALUE(result)) {
-		_leave(" = %zd", result);
-		return result;
+	switch (subreq->error) {
+	case -EACCES:
+	case -EPERM:
+	case -ENOKEY:
+	case -EKEYEXPIRED:
+	case -EKEYREJECTED:
+	case -EKEYREVOKED:
+		afs_get_writeback_key(wreq);
+		if (!wreq->netfs_priv)
+			stream->failed = true;
+		break;
 	}
-
-	_leave(" = %zd", result);
-	return result;
 }
 
 /*
- * flush the vnode to the fileserver
+ * write some of the pending data back to the server
  */
-int afs_writeback_all(struct afs_vnode *vnode)
+int afs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-	struct address_space *mapping = vnode->vfs_inode.i_mapping;
-	struct writeback_control wbc = {
-		.sync_mode	= WB_SYNC_ALL,
-		.nr_to_write	= LONG_MAX,
-		.range_cyclic	= 1,
-	};
+	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
 	int ret;
 
-	_enter("");
-
-	ret = mapping->a_ops->writepages(mapping, &wbc);
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	/* We have to be careful as we can end up racing with setattr()
+	 * truncating the pagecache since the caller doesn't take a lock here
+	 * to prevent it.
+	 */
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		down_read(&vnode->validate_lock);
+	else if (!down_read_trylock(&vnode->validate_lock))
+		return 0;
 
-	_leave(" = %d", ret);
+	ret = netfs_writepages(mapping, wbc);
+	up_read(&vnode->validate_lock);
 	return ret;
 }
 
@@ -683,82 +252,58 @@ int afs_writeback_all(struct afs_vnode *vnode)
  */
 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *inode = file->f_mapping->host;
-	struct afs_writeback *wb, *xwb;
-	struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+	struct afs_file *af = file->private_data;
 	int ret;
 
-	_enter("{%x:%u},{n=%s},%d",
-	       vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+	_enter("{%llx:%llu},{n=%pD},%d",
+	       vnode->fid.vid, vnode->fid.vnode, file,
 	       datasync);
 
-	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (ret)
+	ret = afs_validate(vnode, af->key);
+	if (ret < 0)
 		return ret;
-	mutex_lock(&inode->i_mutex);
-
-	/* use a writeback record as a marker in the queue - when this reaches
-	 * the front of the queue, all the outstanding writes are either
-	 * completed or rejected */
-	wb = kzalloc(sizeof(*wb), GFP_KERNEL);
-	if (!wb) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	wb->vnode = vnode;
-	wb->first = 0;
-	wb->last = -1;
-	wb->offset_first = 0;
-	wb->to_last = PAGE_SIZE;
-	wb->usage = 1;
-	wb->state = AFS_WBACK_SYNCING;
-	init_waitqueue_head(&wb->waitq);
-
-	spin_lock(&vnode->writeback_lock);
-	list_for_each_entry(xwb, &vnode->writebacks, link) {
-		if (xwb->state == AFS_WBACK_PENDING)
-			xwb->state = AFS_WBACK_CONFLICTING;
-	}
-	list_add_tail(&wb->link, &vnode->writebacks);
-	spin_unlock(&vnode->writeback_lock);
-
-	/* push all the outstanding writebacks to the server */
-	ret = afs_writeback_all(vnode);
-	if (ret < 0) {
-		afs_put_writeback(wb);
-		_leave(" = %d [wb]", ret);
-		goto out;
-	}
 
-	/* wait for the preceding writes to actually complete */
-	ret = wait_event_interruptible(wb->waitq,
-				       wb->state == AFS_WBACK_COMPLETE ||
-				       vnode->writebacks.next == &wb->link);
-	afs_put_writeback(wb);
-	_leave(" = %d", ret);
-out:
-	mutex_unlock(&inode->i_mutex);
-	return ret;
+	return file_write_and_wait_range(file, start, end);
 }
 
 /*
  * notification that a previously read-only page is about to become writable
  * - if it returns an error, the caller will deliver a bus error signal
  */
-int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 {
-	struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host);
+	struct file *file = vmf->vma->vm_file;
 
-	_enter("{{%x:%u}},{%lx}",
-	       vnode->fid.vid, vnode->fid.vnode, page->index);
+	if (afs_validate(AFS_FS_I(file_inode(file)), afs_file_key(file)) < 0)
+		return VM_FAULT_SIGBUS;
+	return netfs_page_mkwrite(vmf, NULL);
+}
 
-	/* wait for the page to be written to the cache before we allow it to
-	 * be modified */
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_wait_on_page_write(vnode->cache, page);
-#endif
+/*
+ * Prune the keys cached for writeback.  The caller must hold vnode->wb_lock.
+ */
+void afs_prune_wb_keys(struct afs_vnode *vnode)
+{
+	LIST_HEAD(graveyard);
+	struct afs_wb_key *wbk, *tmp;
 
-	_leave(" = 0");
-	return 0;
+	/* Discard unused keys */
+	spin_lock(&vnode->wb_lock);
+
+	if (!mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
+	    !mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_DIRTY)) {
+		list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
+			if (refcount_read(&wbk->usage) == 1)
+				list_move(&wbk->vnode_link, &graveyard);
+		}
+	}
+
+	spin_unlock(&vnode->wb_lock);
+
+	while (!list_empty(&graveyard)) {
+		wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link);
+		list_del(&wbk->vnode_link);
+		afs_put_wb_key(wbk);
+	}
 }
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
new file mode 100644
index 000000000000..e19f396aa370
--- /dev/null
+++ b/fs/afs/xattr.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Extended attribute handling for AFS.  We use xattrs to get and set metadata
+ * instead of providing pioctl().
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/xattr.h>
+#include "internal.h"
+
+/*
+ * Deal with the result of a successful fetch ACL operation.
+ */
+static void afs_acl_success(struct afs_operation *op)
+{
+	afs_vnode_commit_status(op, &op->file[0]);
+}
+
+static void afs_acl_put(struct afs_operation *op)
+{
+	kfree(op->acl);
+}
+
+static const struct afs_operation_ops afs_fetch_acl_operation = {
+	.issue_afs_rpc	= afs_fs_fetch_acl,
+	.success	= afs_acl_success,
+	.put		= afs_acl_put,
+};
+
+/*
+ * Get a file's ACL.
+ */
+static int afs_xattr_get_acl(const struct xattr_handler *handler,
+			     struct dentry *dentry,
+			     struct inode *inode, const char *name,
+			     void *buffer, size_t size)
+{
+	struct afs_operation *op;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_acl *acl = NULL;
+	int ret;
+
+	op = afs_alloc_operation(NULL, vnode->volume);
+	if (IS_ERR(op))
+		return -ENOMEM;
+
+	afs_op_set_vnode(op, 0, vnode);
+	op->ops = &afs_fetch_acl_operation;
+
+	afs_begin_vnode_operation(op);
+	afs_wait_for_operation(op);
+	acl = op->acl;
+	op->acl = NULL;
+	ret = afs_put_operation(op);
+
+	if (ret == 0) {
+		ret = acl->size;
+		if (size > 0) {
+			if (acl->size <= size)
+				memcpy(buffer, acl->data, acl->size);
+			else
+				ret = -ERANGE;
+		}
+	}
+
+	kfree(acl);
+	return ret;
+}
+
+static bool afs_make_acl(struct afs_operation *op,
+			 const void *buffer, size_t size)
+{
+	struct afs_acl *acl;
+
+	acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+	if (!acl) {
+		afs_op_nomem(op);
+		return false;
+	}
+
+	acl->size = size;
+	memcpy(acl->data, buffer, size);
+	op->acl = acl;
+	return true;
+}
+
+static const struct afs_operation_ops afs_store_acl_operation = {
+	.issue_afs_rpc	= afs_fs_store_acl,
+	.success	= afs_acl_success,
+	.put		= afs_acl_put,
+};
+
+/*
+ * Set a file's AFS3 ACL.
+ */
+static int afs_xattr_set_acl(const struct xattr_handler *handler,
+			     struct mnt_idmap *idmap,
+                             struct dentry *dentry,
+                             struct inode *inode, const char *name,
+                             const void *buffer, size_t size, int flags)
+{
+	struct afs_operation *op;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+
+	if (flags == XATTR_CREATE)
+		return -EINVAL;
+
+	op = afs_alloc_operation(NULL, vnode->volume);
+	if (IS_ERR(op))
+		return -ENOMEM;
+
+	afs_op_set_vnode(op, 0, vnode);
+	if (!afs_make_acl(op, buffer, size))
+		return afs_put_operation(op);
+
+	op->ops = &afs_store_acl_operation;
+	return afs_do_sync_operation(op);
+}
+
+static const struct xattr_handler afs_xattr_afs_acl_handler = {
+	.name   = "afs.acl",
+	.get    = afs_xattr_get_acl,
+	.set    = afs_xattr_set_acl,
+};
+
+static const struct afs_operation_ops yfs_fetch_opaque_acl_operation = {
+	.issue_yfs_rpc	= yfs_fs_fetch_opaque_acl,
+	.success	= afs_acl_success,
+	/* Don't free op->yacl in .put here */
+};
+
+/*
+ * Get a file's YFS ACL.
+ */
+static int afs_xattr_get_yfs(const struct xattr_handler *handler,
+			     struct dentry *dentry,
+			     struct inode *inode, const char *name,
+			     void *buffer, size_t size)
+{
+	struct afs_operation *op;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct yfs_acl *yacl = NULL;
+	char buf[16], *data;
+	int which = 0, dsize, ret = -ENOMEM;
+
+	if (strcmp(name, "acl") == 0)
+		which = 0;
+	else if (strcmp(name, "acl_inherited") == 0)
+		which = 1;
+	else if (strcmp(name, "acl_num_cleaned") == 0)
+		which = 2;
+	else if (strcmp(name, "vol_acl") == 0)
+		which = 3;
+	else
+		return -EOPNOTSUPP;
+
+	yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL);
+	if (!yacl)
+		goto error;
+
+	if (which == 0)
+		yacl->flags |= YFS_ACL_WANT_ACL;
+	else if (which == 3)
+		yacl->flags |= YFS_ACL_WANT_VOL_ACL;
+
+	op = afs_alloc_operation(NULL, vnode->volume);
+	if (IS_ERR(op))
+		goto error_yacl;
+
+	afs_op_set_vnode(op, 0, vnode);
+	op->yacl = yacl;
+	op->ops = &yfs_fetch_opaque_acl_operation;
+
+	afs_begin_vnode_operation(op);
+	afs_wait_for_operation(op);
+	ret = afs_put_operation(op);
+
+	if (ret == 0) {
+		switch (which) {
+		case 0:
+			data = yacl->acl->data;
+			dsize = yacl->acl->size;
+			break;
+		case 1:
+			data = buf;
+			dsize = scnprintf(buf, sizeof(buf), "%u", yacl->inherit_flag);
+			break;
+		case 2:
+			data = buf;
+			dsize = scnprintf(buf, sizeof(buf), "%u", yacl->num_cleaned);
+			break;
+		case 3:
+			data = yacl->vol_acl->data;
+			dsize = yacl->vol_acl->size;
+			break;
+		default:
+			ret = -EOPNOTSUPP;
+			goto error_yacl;
+		}
+
+		ret = dsize;
+		if (size > 0) {
+			if (dsize <= size)
+				memcpy(buffer, data, dsize);
+			else
+				ret = -ERANGE;
+		}
+	} else if (ret == -ENOTSUPP) {
+		ret = -ENODATA;
+	}
+
+error_yacl:
+	yfs_free_opaque_acl(yacl);
+error:
+	return ret;
+}
+
+static const struct afs_operation_ops yfs_store_opaque_acl2_operation = {
+	.issue_yfs_rpc	= yfs_fs_store_opaque_acl2,
+	.success	= afs_acl_success,
+	.put		= afs_acl_put,
+};
+
+/*
+ * Set a file's YFS ACL.
+ */
+static int afs_xattr_set_yfs(const struct xattr_handler *handler,
+			     struct mnt_idmap *idmap,
+                             struct dentry *dentry,
+                             struct inode *inode, const char *name,
+                             const void *buffer, size_t size, int flags)
+{
+	struct afs_operation *op;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	int ret;
+
+	if (flags == XATTR_CREATE ||
+	    strcmp(name, "acl") != 0)
+		return -EINVAL;
+
+	op = afs_alloc_operation(NULL, vnode->volume);
+	if (IS_ERR(op))
+		return -ENOMEM;
+
+	afs_op_set_vnode(op, 0, vnode);
+	if (!afs_make_acl(op, buffer, size))
+		return afs_put_operation(op);
+
+	op->ops = &yfs_store_opaque_acl2_operation;
+	ret = afs_do_sync_operation(op);
+	if (ret == -ENOTSUPP)
+		ret = -ENODATA;
+	return ret;
+}
+
+static const struct xattr_handler afs_xattr_yfs_handler = {
+	.prefix	= "afs.yfs.",
+	.get	= afs_xattr_get_yfs,
+	.set	= afs_xattr_set_yfs,
+};
+
+/*
+ * Get the name of the cell on which a file resides.
+ */
+static int afs_xattr_get_cell(const struct xattr_handler *handler,
+			      struct dentry *dentry,
+			      struct inode *inode, const char *name,
+			      void *buffer, size_t size)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct afs_cell *cell = vnode->volume->cell;
+	size_t namelen;
+
+	namelen = cell->name_len;
+	if (size == 0)
+		return namelen;
+	if (namelen > size)
+		return -ERANGE;
+	memcpy(buffer, cell->name, namelen);
+	return namelen;
+}
+
+static const struct xattr_handler afs_xattr_afs_cell_handler = {
+	.name	= "afs.cell",
+	.get	= afs_xattr_get_cell,
+};
+
+/*
+ * Get the volume ID, vnode ID and vnode uniquifier of a file as a sequence of
+ * hex numbers separated by colons.
+ */
+static int afs_xattr_get_fid(const struct xattr_handler *handler,
+			     struct dentry *dentry,
+			     struct inode *inode, const char *name,
+			     void *buffer, size_t size)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	char text[16 + 1 + 24 + 1 + 8 + 1];
+	size_t len;
+
+	/* The volume ID is 64-bit, the vnode ID is 96-bit and the
+	 * uniquifier is 32-bit.
+	 */
+	len = scnprintf(text, sizeof(text), "%llx:", vnode->fid.vid);
+	if (vnode->fid.vnode_hi)
+		len += scnprintf(text + len, sizeof(text) - len, "%x%016llx",
+				vnode->fid.vnode_hi, vnode->fid.vnode);
+	else
+		len += scnprintf(text + len, sizeof(text) - len, "%llx",
+				 vnode->fid.vnode);
+	len += scnprintf(text + len, sizeof(text) - len, ":%x",
+			 vnode->fid.unique);
+
+	if (size == 0)
+		return len;
+	if (len > size)
+		return -ERANGE;
+	memcpy(buffer, text, len);
+	return len;
+}
+
+static const struct xattr_handler afs_xattr_afs_fid_handler = {
+	.name	= "afs.fid",
+	.get	= afs_xattr_get_fid,
+};
+
+/*
+ * Get the name of the volume on which a file resides.
+ */
+static int afs_xattr_get_volume(const struct xattr_handler *handler,
+			      struct dentry *dentry,
+			      struct inode *inode, const char *name,
+			      void *buffer, size_t size)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	const char *volname = vnode->volume->name;
+	size_t namelen;
+
+	namelen = strlen(volname);
+	if (size == 0)
+		return namelen;
+	if (namelen > size)
+		return -ERANGE;
+	memcpy(buffer, volname, namelen);
+	return namelen;
+}
+
+static const struct xattr_handler afs_xattr_afs_volume_handler = {
+	.name	= "afs.volume",
+	.get	= afs_xattr_get_volume,
+};
+
+const struct xattr_handler * const afs_xattr_handlers[] = {
+	&afs_xattr_afs_acl_handler,
+	&afs_xattr_afs_cell_handler,
+	&afs_xattr_afs_fid_handler,
+	&afs_xattr_afs_volume_handler,
+	&afs_xattr_yfs_handler,		/* afs.yfs. prefix */
+	NULL
+};
diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h
new file mode 100644
index 000000000000..cc5f143d21a3
--- /dev/null
+++ b/fs/afs/xdr_fs.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AFS fileserver XDR types
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef XDR_FS_H
+#define XDR_FS_H
+
+struct afs_xdr_AFSFetchStatus {
+	__be32	if_version;
+#define AFS_FSTATUS_VERSION	1
+	__be32	type;
+	__be32	nlink;
+	__be32	size_lo;
+	__be32	data_version_lo;
+	__be32	author;
+	__be32	owner;
+	__be32	caller_access;
+	__be32	anon_access;
+	__be32	mode;
+	__be32	parent_vnode;
+	__be32	parent_unique;
+	__be32	seg_size;
+	__be32	mtime_client;
+	__be32	mtime_server;
+	__be32	group;
+	__be32	sync_counter;
+	__be32	data_version_hi;
+	__be32	lock_count;
+	__be32	size_hi;
+	__be32	abort_code;
+} __packed;
+
+#define AFS_DIR_HASHTBL_SIZE	128
+#define AFS_DIR_DIRENT_SIZE	32
+#define AFS_DIR_SLOTS_PER_BLOCK	64
+#define AFS_DIR_BLOCK_SIZE	2048
+#define AFS_DIR_BLOCKS_PER_PAGE	(PAGE_SIZE / AFS_DIR_BLOCK_SIZE)
+#define AFS_DIR_MAX_SLOTS	65536
+#define AFS_DIR_BLOCKS_WITH_CTR	128
+#define AFS_DIR_MAX_BLOCKS	1023
+#define AFS_DIR_RESV_BLOCKS	1
+#define AFS_DIR_RESV_BLOCKS0	13
+
+/*
+ * Directory entry structure.
+ */
+union afs_xdr_dirent {
+	struct {
+		u8		valid;
+		u8		unused[1];
+		__be16		hash_next;
+		__be32		vnode;
+		__be32		unique;
+		u8		name[];
+		/* When determining the number of dirent slots needed to
+		 * represent a directory entry, name should be assumed to be 16
+		 * bytes, due to a now-standardised (mis)calculation, but it is
+		 * in fact 20 bytes in size.  afs_dir_calc_slots() should be
+		 * used for this.
+		 *
+		 * For names longer than (16 or) 20 bytes, extra slots should
+		 * be annexed to this one using the extended_name format.
+		 */
+	} u;
+	u8			extended_name[32];
+} __packed;
+
+/*
+ * Directory block header (one at the beginning of every 2048-byte block).
+ */
+struct afs_xdr_dir_hdr {
+	__be16		npages;
+	__be16		magic;
+#define AFS_DIR_MAGIC htons(1234)
+	u8		reserved;
+	u8		bitmap[8];
+	u8		pad[19];
+} __packed;
+
+/*
+ * Directory block layout
+ */
+union afs_xdr_dir_block {
+	struct afs_xdr_dir_hdr		hdr;
+
+	struct {
+		struct afs_xdr_dir_hdr	hdr;
+		u8			alloc_ctrs[AFS_DIR_BLOCKS_WITH_CTR];
+		__be16			hashtable[AFS_DIR_HASHTBL_SIZE];
+	} meta;
+
+	union afs_xdr_dirent	dirents[AFS_DIR_SLOTS_PER_BLOCK];
+} __packed;
+
+/*
+ * Directory layout on a linux VM page.
+ */
+struct afs_xdr_dir_page {
+	union afs_xdr_dir_block	blocks[AFS_DIR_BLOCKS_PER_PAGE];
+};
+
+/*
+ * Calculate the number of dirent slots required for any given name length.
+ * The calculation is made assuming the part of the name in the first slot is
+ * 16 bytes, rather than 20, but this miscalculation is now standardised.
+ */
+static inline unsigned int afs_dir_calc_slots(size_t name_len)
+{
+	name_len++; /* NUL-terminated */
+	return 1 + ((name_len + 15) / AFS_DIR_DIRENT_SIZE);
+}
+
+#endif /* XDR_FS_H */
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
new file mode 100644
index 000000000000..febf13a49f0b
--- /dev/null
+++ b/fs/afs/yfsclient.c
@@ -0,0 +1,2232 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* YFS File Server client stubs
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/circ_buf.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+#include "protocol_yfs.h"
+
+#define xdr_size(x) (sizeof(*x) / sizeof(__be32))
+
+static void xdr_decode_YFSFid(const __be32 **_bp, struct afs_fid *fid)
+{
+	const struct yfs_xdr_YFSFid *x = (const void *)*_bp;
+
+	fid->vid	= xdr_to_u64(x->volume);
+	fid->vnode	= xdr_to_u64(x->vnode.lo);
+	fid->vnode_hi	= ntohl(x->vnode.hi);
+	fid->unique	= ntohl(x->vnode.unique);
+	*_bp += xdr_size(x);
+}
+
+static __be32 *xdr_encode_u32(__be32 *bp, u32 n)
+{
+	*bp++ = htonl(n);
+	return bp;
+}
+
+static __be32 *xdr_encode_u64(__be32 *bp, u64 n)
+{
+	struct yfs_xdr_u64 *x = (void *)bp;
+
+	*x = u64_to_xdr(n);
+	return bp + xdr_size(x);
+}
+
+static __be32 *xdr_encode_YFSFid(__be32 *bp, struct afs_fid *fid)
+{
+	struct yfs_xdr_YFSFid *x = (void *)bp;
+
+	x->volume	= u64_to_xdr(fid->vid);
+	x->vnode.lo	= u64_to_xdr(fid->vnode);
+	x->vnode.hi	= htonl(fid->vnode_hi);
+	x->vnode.unique	= htonl(fid->unique);
+	return bp + xdr_size(x);
+}
+
+static size_t xdr_strlen(unsigned int len)
+{
+	return sizeof(__be32) + round_up(len, sizeof(__be32));
+}
+
+static __be32 *xdr_encode_string(__be32 *bp, const char *p, unsigned int len)
+{
+	bp = xdr_encode_u32(bp, len);
+	bp = memcpy(bp, p, len);
+	if (len & 3) {
+		unsigned int pad = 4 - (len & 3);
+
+		memset((u8 *)bp + len, 0, pad);
+		len += pad;
+	}
+
+	return bp + len / sizeof(__be32);
+}
+
+static __be32 *xdr_encode_name(__be32 *bp, const struct qstr *p)
+{
+	return xdr_encode_string(bp, p->name, p->len);
+}
+
+static s64 linux_to_yfs_time(const struct timespec64 *t)
+{
+	/* Convert to 100ns intervals. */
+	return (u64)t->tv_sec * 10000000 + t->tv_nsec/100;
+}
+
+static __be32 *xdr_encode_YFSStoreStatus(__be32 *bp, mode_t *mode,
+					 const struct timespec64 *t)
+{
+	struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+	mode_t masked_mode = mode ? *mode & S_IALLUGO : 0;
+	s64 mtime = linux_to_yfs_time(t);
+	u32 mask = AFS_SET_MTIME;
+
+	mask |= mode ? AFS_SET_MODE : 0;
+
+	x->mask		= htonl(mask);
+	x->mode		= htonl(masked_mode);
+	x->mtime_client	= u64_to_xdr(mtime);
+	x->owner	= u64_to_xdr(0);
+	x->group	= u64_to_xdr(0);
+	return bp + xdr_size(x);
+}
+
+/*
+ * Convert a signed 100ns-resolution 64-bit time into a timespec.
+ */
+static struct timespec64 yfs_time_to_linux(s64 t)
+{
+	struct timespec64 ts;
+	u64 abs_t;
+
+	/*
+	 * Unfortunately can not use normal 64 bit division on 32 bit arch, but
+	 * the alternative, do_div, does not work with negative numbers so have
+	 * to special case them
+	 */
+	if (t < 0) {
+		abs_t = -t;
+		ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100);
+		ts.tv_nsec = -ts.tv_nsec;
+		ts.tv_sec = -abs_t;
+	} else {
+		abs_t = t;
+		ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100;
+		ts.tv_sec = abs_t;
+	}
+
+	return ts;
+}
+
+static struct timespec64 xdr_to_time(const struct yfs_xdr_u64 xdr)
+{
+	s64 t = xdr_to_u64(xdr);
+
+	return yfs_time_to_linux(t);
+}
+
+static void yfs_check_req(struct afs_call *call, __be32 *bp)
+{
+	size_t len = (void *)bp - call->request;
+
+	if (len > call->request_size)
+		pr_err("kAFS: %s: Request buffer overflow (%zu>%u)\n",
+		       call->type->name, len, call->request_size);
+	else if (len < call->request_size)
+		pr_warn("kAFS: %s: Request buffer underflow (%zu<%u)\n",
+			call->type->name, len, call->request_size);
+}
+
+/*
+ * Dump a bad file status record.
+ */
+static void xdr_dump_bad(const __be32 *bp)
+{
+	__be32 x[4];
+	int i;
+
+	pr_notice("YFS XDR: Bad status record\n");
+	for (i = 0; i < 6 * 4 * 4; i += 16) {
+		memcpy(x, bp, 16);
+		bp += 4;
+		pr_notice("%03x: %08x %08x %08x %08x\n",
+			  i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
+	}
+
+	memcpy(x, bp, 8);
+	pr_notice("0x60: %08x %08x\n", ntohl(x[0]), ntohl(x[1]));
+}
+
+/*
+ * Decode a YFSFetchStatus block
+ */
+static void xdr_decode_YFSFetchStatus(const __be32 **_bp,
+				      struct afs_call *call,
+				      struct afs_status_cb *scb)
+{
+	const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
+	struct afs_file_status *status = &scb->status;
+	u32 type;
+
+	status->abort_code = ntohl(xdr->abort_code);
+	if (status->abort_code != 0) {
+		if (status->abort_code == VNOVNODE)
+			status->nlink = 0;
+		scb->have_error = true;
+		goto advance;
+	}
+
+	type = ntohl(xdr->type);
+	switch (type) {
+	case AFS_FTYPE_FILE:
+	case AFS_FTYPE_DIR:
+	case AFS_FTYPE_SYMLINK:
+		status->type = type;
+		break;
+	default:
+		goto bad;
+	}
+
+	status->nlink		= ntohl(xdr->nlink);
+	status->author		= xdr_to_u64(xdr->author);
+	status->owner		= xdr_to_u64(xdr->owner);
+	status->caller_access	= ntohl(xdr->caller_access); /* Ticket dependent */
+	status->anon_access	= ntohl(xdr->anon_access);
+	status->mode		= ntohl(xdr->mode) & S_IALLUGO;
+	status->group		= xdr_to_u64(xdr->group);
+	status->lock_count	= ntohl(xdr->lock_count);
+
+	status->mtime_client	= xdr_to_time(xdr->mtime_client);
+	status->mtime_server	= xdr_to_time(xdr->mtime_server);
+	status->size		= xdr_to_u64(xdr->size);
+	status->data_version	= xdr_to_u64(xdr->data_version);
+	scb->have_status	= true;
+advance:
+	*_bp += xdr_size(xdr);
+	return;
+
+bad:
+	xdr_dump_bad(*_bp);
+	afs_protocol_error(call, afs_eproto_bad_status);
+	goto advance;
+}
+
+/*
+ * Decode a YFSCallBack block
+ */
+static void xdr_decode_YFSCallBack(const __be32 **_bp,
+				   struct afs_call *call,
+				   struct afs_status_cb *scb)
+{
+	struct yfs_xdr_YFSCallBack *x = (void *)*_bp;
+	struct afs_callback *cb = &scb->callback;
+	ktime_t cb_expiry;
+
+	cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100);
+	cb->expires_at	= ktime_divns(cb_expiry, NSEC_PER_SEC);
+	scb->have_cb	= true;
+	*_bp += xdr_size(x);
+}
+
+/*
+ * Decode a YFSVolSync block
+ */
+static void xdr_decode_YFSVolSync(const __be32 **_bp,
+				  struct afs_volsync *volsync)
+{
+	struct yfs_xdr_YFSVolSync *x = (void *)*_bp;
+	u64 creation, update;
+
+	if (volsync) {
+		creation = xdr_to_u64(x->vol_creation_date);
+		do_div(creation, 10 * 1000 * 1000);
+		volsync->creation = creation;
+		update = xdr_to_u64(x->vol_update_date);
+		do_div(update, 10 * 1000 * 1000);
+		volsync->update = update;
+	}
+
+	*_bp += xdr_size(x);
+}
+
+/*
+ * Encode the requested attributes into a YFSStoreStatus block
+ */
+static __be32 *xdr_encode_YFS_StoreStatus(__be32 *bp, struct iattr *attr)
+{
+	struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+	s64 mtime = 0, owner = 0, group = 0;
+	u32 mask = 0, mode = 0;
+
+	mask = 0;
+	if (attr->ia_valid & ATTR_MTIME) {
+		mask |= AFS_SET_MTIME;
+		mtime = linux_to_yfs_time(&attr->ia_mtime);
+	}
+
+	if (attr->ia_valid & ATTR_UID) {
+		mask |= AFS_SET_OWNER;
+		owner = from_kuid(&init_user_ns, attr->ia_uid);
+	}
+
+	if (attr->ia_valid & ATTR_GID) {
+		mask |= AFS_SET_GROUP;
+		group = from_kgid(&init_user_ns, attr->ia_gid);
+	}
+
+	if (attr->ia_valid & ATTR_MODE) {
+		mask |= AFS_SET_MODE;
+		mode = attr->ia_mode & S_IALLUGO;
+	}
+
+	x->mask		= htonl(mask);
+	x->mode		= htonl(mode);
+	x->mtime_client	= u64_to_xdr(mtime);
+	x->owner	= u64_to_xdr(owner);
+	x->group	= u64_to_xdr(group);
+	return bp + xdr_size(x);
+}
+
+/*
+ * Decode a YFSFetchVolumeStatus block.
+ */
+static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp,
+					    struct afs_volume_status *vs)
+{
+	const struct yfs_xdr_YFSFetchVolumeStatus *x = (const void *)*_bp;
+	u32 flags;
+
+	vs->vid			= xdr_to_u64(x->vid);
+	vs->parent_id		= xdr_to_u64(x->parent_id);
+	flags			= ntohl(x->flags);
+	vs->online		= flags & yfs_FVSOnline;
+	vs->in_service		= flags & yfs_FVSInservice;
+	vs->blessed		= flags & yfs_FVSBlessed;
+	vs->needs_salvage	= flags & yfs_FVSNeedsSalvage;
+	vs->type		= ntohl(x->type);
+	vs->min_quota		= 0;
+	vs->max_quota		= xdr_to_u64(x->max_quota);
+	vs->blocks_in_use	= xdr_to_u64(x->blocks_in_use);
+	vs->part_blocks_avail	= xdr_to_u64(x->part_blocks_avail);
+	vs->part_max_blocks	= xdr_to_u64(x->part_max_blocks);
+	vs->vol_copy_date	= xdr_to_u64(x->vol_copy_date);
+	vs->vol_backup_date	= xdr_to_u64(x->vol_backup_date);
+	*_bp += sizeof(*x) / sizeof(__be32);
+}
+
+/*
+ * Deliver reply data to operations that just return a file status and a volume
+ * sync record.
+ */
+static int yfs_deliver_status_and_volsync(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	const __be32 *bp;
+	int ret;
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	bp = call->buffer;
+	xdr_decode_YFSFetchStatus(&bp, call, &op->file[0].scb);
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * Deliver reply data to an YFS.FetchData64.
+ */
+static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct netfs_io_subrequest *subreq = op->fetch.subreq;
+	struct afs_vnode_param *vp = &op->file[0];
+	const __be32 *bp;
+	size_t count_before;
+	int ret;
+
+	_enter("{%u,%zu, %zu/%llu}",
+	       call->unmarshall, call->iov_len, iov_iter_count(call->iter),
+	       call->remaining);
+
+	switch (call->unmarshall) {
+	case 0:
+		call->remaining = 0;
+		afs_extract_to_tmp64(call);
+		call->unmarshall++;
+		fallthrough;
+
+		/* Extract the returned data length into ->actual_len.  This
+		 * may indicate more or less data than was requested will be
+		 * returned.
+		 */
+	case 1:
+		_debug("extract data length");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->remaining = be64_to_cpu(call->tmp64);
+		_debug("DATA length: %llu", call->remaining);
+
+		if (call->remaining == 0)
+			goto no_more_data;
+
+		call->iter = &subreq->io_iter;
+		call->iov_len = min(call->remaining, subreq->len - subreq->transferred);
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the returned data */
+	case 2:
+		count_before = call->iov_len;
+		_debug("extract data %zu/%llu", count_before, call->remaining);
+
+		ret = afs_extract_data(call, true);
+		subreq->transferred += count_before - call->iov_len;
+		if (ret < 0)
+			return ret;
+
+		call->iter = &call->def_iter;
+		if (call->remaining)
+			goto no_more_data;
+
+		/* Discard any excess data the server gave us */
+		afs_extract_discard(call, call->remaining);
+		call->unmarshall = 3;
+		fallthrough;
+
+	case 3:
+		_debug("extract discard %zu/%llu",
+		       iov_iter_count(call->iter), call->remaining);
+
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+	no_more_data:
+		call->unmarshall = 4;
+		afs_extract_to_buf(call,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSCallBack) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+		fallthrough;
+
+		/* extract the metadata */
+	case 4:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+		xdr_decode_YFSCallBack(&bp, call, &vp->scb);
+		xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+		if (subreq->start + subreq->transferred >= vp->scb.status.size)
+			__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
+
+		call->unmarshall++;
+		fallthrough;
+
+	case 5:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.FetchData64 operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchData64 = {
+	.name		= "YFS.FetchData64",
+	.op		= yfs_FS_FetchData64,
+	.async_rx	= afs_fetch_data_async_rx,
+	.deliver	= yfs_deliver_fs_fetch_data64,
+	.immediate_cancel = afs_fetch_data_immediate_cancel,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch data from a file.
+ */
+void yfs_fs_fetch_data(struct afs_operation *op)
+{
+	struct netfs_io_subrequest *subreq = op->fetch.subreq;
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},%llx,%zx",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode,
+	       subreq->start + subreq->transferred,
+	       subreq->len   - subreq->transferred);
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchData64,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_u64) * 2,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSCallBack) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	if (op->flags & AFS_OPERATION_ASYNC)
+		call->async = true;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSFETCHDATA64);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	bp = xdr_encode_u64(bp, subreq->start + subreq->transferred);
+	bp = xdr_encode_u64(bp, subreq->len   - subreq->transferred);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data for YFS.CreateFile or YFS.MakeDir.
+ */
+static int yfs_deliver_fs_create_vnode(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_YFSFid(&bp, &op->file[1].fid);
+	xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+	xdr_decode_YFSCallBack(&bp, call, &vp->scb);
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.CreateFile and FS.MakeDir operation type
+ */
+static const struct afs_call_type afs_RXFSCreateFile = {
+	.name		= "YFS.CreateFile",
+	.op		= yfs_FS_CreateFile,
+	.deliver	= yfs_deliver_fs_create_vnode,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Create a file.
+ */
+void yfs_fs_create_file(struct afs_operation *op)
+{
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_call *call;
+	size_t reqsz, rplsz;
+	__be32 *bp;
+
+	_enter("");
+
+	reqsz = (sizeof(__be32) +
+		 sizeof(__be32) +
+		 sizeof(struct yfs_xdr_YFSFid) +
+		 xdr_strlen(name->len) +
+		 sizeof(struct yfs_xdr_YFSStoreStatus) +
+		 sizeof(__be32));
+	rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+		 sizeof(struct yfs_xdr_YFSFetchStatus) +
+		 sizeof(struct yfs_xdr_YFSFetchStatus) +
+		 sizeof(struct yfs_xdr_YFSCallBack) +
+		 sizeof(struct yfs_xdr_YFSVolSync));
+
+	call = afs_alloc_flat_call(op->net, &afs_RXFSCreateFile, reqsz, rplsz);
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSCREATEFILE);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvp->fid);
+	bp = xdr_encode_name(bp, name);
+	bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime);
+	bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
+	yfs_check_req(call, bp);
+
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+static const struct afs_call_type yfs_RXFSMakeDir = {
+	.name		= "YFS.MakeDir",
+	.op		= yfs_FS_MakeDir,
+	.deliver	= yfs_deliver_fs_create_vnode,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Make a directory.
+ */
+void yfs_fs_make_dir(struct afs_operation *op)
+{
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_call *call;
+	size_t reqsz, rplsz;
+	__be32 *bp;
+
+	_enter("");
+
+	reqsz = (sizeof(__be32) +
+		 sizeof(struct yfs_xdr_RPCFlags) +
+		 sizeof(struct yfs_xdr_YFSFid) +
+		 xdr_strlen(name->len) +
+		 sizeof(struct yfs_xdr_YFSStoreStatus));
+	rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+		 sizeof(struct yfs_xdr_YFSFetchStatus) +
+		 sizeof(struct yfs_xdr_YFSFetchStatus) +
+		 sizeof(struct yfs_xdr_YFSCallBack) +
+		 sizeof(struct yfs_xdr_YFSVolSync));
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXFSMakeDir, reqsz, rplsz);
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSMAKEDIR);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvp->fid);
+	bp = xdr_encode_name(bp, name);
+	bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime);
+	yfs_check_req(call, bp);
+
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile2 operation.
+ */
+static int yfs_deliver_fs_remove_file2(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	struct afs_fid fid;
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	bp = call->buffer;
+	xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+	xdr_decode_YFSFid(&bp, &fid);
+	xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+	/* Was deleted if vnode->status.abort_code == VNOVNODE. */
+
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+	return 0;
+}
+
+static void yfs_done_fs_remove_file2(struct afs_call *call)
+{
+	if (call->error == -ECONNABORTED &&
+	    (call->abort_code == RX_INVALID_OPERATION ||
+	     call->abort_code == RXGEN_OPCODE)) {
+		set_bit(AFS_SERVER_FL_NO_RM2, &call->op->server->flags);
+		call->op->flags |= AFS_OPERATION_DOWNGRADE;
+	}
+}
+
+/*
+ * YFS.RemoveFile2 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile2 = {
+	.name		= "YFS.RemoveFile2",
+	.op		= yfs_FS_RemoveFile2,
+	.deliver	= yfs_deliver_fs_remove_file2,
+	.done		= yfs_done_fs_remove_file2,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Remove a file and retrieve new file status.
+ */
+void yfs_fs_remove_file2(struct afs_operation *op)
+{
+	struct afs_vnode_param *dvp = &op->file[0];
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveFile2,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(name->len),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSREMOVEFILE2);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvp->fid);
+	bp = xdr_encode_name(bp, name);
+	yfs_check_req(call, bp);
+
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile or YFS.RemoveDir operation.
+ */
+static int yfs_deliver_fs_remove(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *dvp = &op->file[0];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	bp = call->buffer;
+	xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+	return 0;
+}
+
+/*
+ * FS.RemoveDir and FS.RemoveFile operation types.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile = {
+	.name		= "YFS.RemoveFile",
+	.op		= yfs_FS_RemoveFile,
+	.deliver	= yfs_deliver_fs_remove,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Remove a file.
+ */
+void yfs_fs_remove_file(struct afs_operation *op)
+{
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	if (!test_bit(AFS_SERVER_FL_NO_RM2, &op->server->flags))
+		return yfs_fs_remove_file2(op);
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveFile,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(name->len),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSREMOVEFILE);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvp->fid);
+	bp = xdr_encode_name(bp, name);
+	yfs_check_req(call, bp);
+
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+static const struct afs_call_type yfs_RXYFSRemoveDir = {
+	.name		= "YFS.RemoveDir",
+	.op		= yfs_FS_RemoveDir,
+	.deliver	= yfs_deliver_fs_remove,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Remove a directory.
+ */
+void yfs_fs_remove_dir(struct afs_operation *op)
+{
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveDir,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(name->len),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSREMOVEDIR);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvp->fid);
+	bp = xdr_encode_name(bp, name);
+	yfs_check_req(call, bp);
+
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.Link operation.
+ */
+static int yfs_deliver_fs_link(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	bp = call->buffer;
+	xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.Link operation type.
+ */
+static const struct afs_call_type yfs_RXYFSLink = {
+	.name		= "YFS.Link",
+	.op		= yfs_FS_Link,
+	.deliver	= yfs_deliver_fs_link,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Make a hard link.
+ */
+void yfs_fs_link(struct afs_operation *op)
+{
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSLink,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(name->len) +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSLINK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvp->fid);
+	bp = xdr_encode_name(bp, name);
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call1(call, &vp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.Symlink operation.
+ */
+static int yfs_deliver_fs_symlink(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_YFSFid(&bp, &vp->fid);
+	xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb);
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.Symlink operation type
+ */
+static const struct afs_call_type yfs_RXYFSSymlink = {
+	.name		= "YFS.Symlink",
+	.op		= yfs_FS_Symlink,
+	.deliver	= yfs_deliver_fs_symlink,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Create a symbolic link.
+ */
+void yfs_fs_symlink(struct afs_operation *op)
+{
+	const struct qstr *name = &op->dentry->d_name;
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_call *call;
+	size_t contents_sz;
+	mode_t mode = 0777;
+	__be32 *bp;
+
+	_enter("");
+
+	contents_sz = strlen(op->create.symlink);
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSSymlink,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(name->len) +
+				   xdr_strlen(contents_sz) +
+				   sizeof(struct yfs_xdr_YFSStoreStatus),
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSYMLINK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvp->fid);
+	bp = xdr_encode_name(bp, name);
+	bp = xdr_encode_string(bp, op->create.symlink, contents_sz);
+	bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime);
+	yfs_check_req(call, bp);
+
+	call->fid = dvp->fid;
+	trace_afs_make_fs_call1(call, &dvp->fid, name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.Rename operation.
+ */
+static int yfs_deliver_fs_rename(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	bp = call->buffer;
+	/* If the two dirs are the same, we have two copies of the same status
+	 * report, so we just decode it twice.
+	 */
+	xdr_decode_YFSFetchStatus(&bp, call, &orig_dvp->scb);
+	xdr_decode_YFSFetchStatus(&bp, call, &new_dvp->scb);
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.Rename operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename = {
+	.name		= "FS.Rename",
+	.op		= yfs_FS_Rename,
+	.deliver	= yfs_deliver_fs_rename,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Rename a file or directory.
+ */
+void yfs_fs_rename(struct afs_operation *op)
+{
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	const struct qstr *orig_name = &op->dentry->d_name;
+	const struct qstr *new_name = &op->dentry_2->d_name;
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	if (!test_bit(AFS_SERVER_FL_NO_RENAME2, &op->server->flags))
+		return yfs_fs_rename_replace(op);
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(orig_name->len) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(new_name->len),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSRENAME);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &orig_dvp->fid);
+	bp = xdr_encode_name(bp, orig_name);
+	bp = xdr_encode_YFSFid(bp, &new_dvp->fid);
+	bp = xdr_encode_name(bp, new_name);
+	yfs_check_req(call, bp);
+
+	call->fid = orig_dvp->fid;
+	trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.Rename_NoReplace operation.  This does not
+ * return the status of a displaced target inode as there cannot be one.
+ */
+static int yfs_deliver_fs_rename_1(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	struct afs_vnode_param *old_vp = &op->more_files[0];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	bp = call->buffer;
+	/* If the two dirs are the same, we have two copies of the same status
+	 * report, so we just decode it twice.
+	 */
+	xdr_decode_YFSFetchStatus(&bp, call, &orig_dvp->scb);
+	xdr_decode_YFSFid(&bp, &old_vp->fid);
+	xdr_decode_YFSFetchStatus(&bp, call, &old_vp->scb);
+	xdr_decode_YFSFetchStatus(&bp, call, &new_dvp->scb);
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * Deliver reply data to a YFS.Rename_Replace or a YFS.Rename_Exchange
+ * operation.  These return the status of the displaced target inode if there
+ * was one.
+ */
+static int yfs_deliver_fs_rename_2(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	struct afs_vnode_param *old_vp = &op->more_files[0];
+	struct afs_vnode_param *new_vp = &op->more_files[1];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	bp = call->buffer;
+	/* If the two dirs are the same, we have two copies of the same status
+	 * report, so we just decode it twice.
+	 */
+	xdr_decode_YFSFetchStatus(&bp, call, &orig_dvp->scb);
+	xdr_decode_YFSFid(&bp, &old_vp->fid);
+	xdr_decode_YFSFetchStatus(&bp, call, &old_vp->scb);
+	xdr_decode_YFSFetchStatus(&bp, call, &new_dvp->scb);
+	xdr_decode_YFSFid(&bp, &new_vp->fid);
+	xdr_decode_YFSFetchStatus(&bp, call, &new_vp->scb);
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+static void yfs_done_fs_rename_replace(struct afs_call *call)
+{
+	if (call->error == -ECONNABORTED &&
+	    (call->abort_code == RX_INVALID_OPERATION ||
+	     call->abort_code == RXGEN_OPCODE)) {
+		set_bit(AFS_SERVER_FL_NO_RENAME2, &call->op->server->flags);
+		call->op->flags |= AFS_OPERATION_DOWNGRADE;
+	}
+}
+
+/*
+ * YFS.Rename_Replace operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename_Replace = {
+	.name		= "FS.Rename_Replace",
+	.op		= yfs_FS_Rename_Replace,
+	.deliver	= yfs_deliver_fs_rename_2,
+	.done		= yfs_done_fs_rename_replace,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * YFS.Rename_NoReplace operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename_NoReplace = {
+	.name		= "FS.Rename_NoReplace",
+	.op		= yfs_FS_Rename_NoReplace,
+	.deliver	= yfs_deliver_fs_rename_1,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * YFS.Rename_Exchange operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename_Exchange = {
+	.name		= "FS.Rename_Exchange",
+	.op		= yfs_FS_Rename_Exchange,
+	.deliver	= yfs_deliver_fs_rename_2,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Rename a file or directory, replacing the target if it exists.  The status
+ * of a displaced target is returned.
+ */
+void yfs_fs_rename_replace(struct afs_operation *op)
+{
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	const struct qstr *orig_name = &op->dentry->d_name;
+	const struct qstr *new_name = &op->dentry_2->d_name;
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename_Replace,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(orig_name->len) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(new_name->len),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* Marshall the parameters. */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSRENAME_REPLACE);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &orig_dvp->fid);
+	bp = xdr_encode_name(bp, orig_name);
+	bp = xdr_encode_YFSFid(bp, &new_dvp->fid);
+	bp = xdr_encode_name(bp, new_name);
+	yfs_check_req(call, bp);
+
+	call->fid = orig_dvp->fid;
+	trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Rename a file or directory, failing if the target dirent exists.
+ */
+void yfs_fs_rename_noreplace(struct afs_operation *op)
+{
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	const struct qstr *orig_name = &op->dentry->d_name;
+	const struct qstr *new_name = &op->dentry_2->d_name;
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename_NoReplace,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(orig_name->len) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(new_name->len),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* Marshall the parameters. */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSRENAME_NOREPLACE);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &orig_dvp->fid);
+	bp = xdr_encode_name(bp, orig_name);
+	bp = xdr_encode_YFSFid(bp, &new_dvp->fid);
+	bp = xdr_encode_name(bp, new_name);
+	yfs_check_req(call, bp);
+
+	call->fid = orig_dvp->fid;
+	trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Exchange a pair of files directories.
+ */
+void yfs_fs_rename_exchange(struct afs_operation *op)
+{
+	struct afs_vnode_param *orig_dvp = &op->file[0];
+	struct afs_vnode_param *new_dvp = &op->file[1];
+	const struct qstr *orig_name = &op->dentry->d_name;
+	const struct qstr *new_name = &op->dentry_2->d_name;
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename_Exchange,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(orig_name->len) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(new_name->len),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* Marshall the parameters. */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSRENAME_EXCHANGE);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &orig_dvp->fid);
+	bp = xdr_encode_name(bp, orig_name);
+	bp = xdr_encode_YFSFid(bp, &new_dvp->fid);
+	bp = xdr_encode_name(bp, new_name);
+	yfs_check_req(call, bp);
+
+	call->fid = orig_dvp->fid;
+	trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * YFS.StoreData64 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSStoreData64 = {
+	.name		= "YFS.StoreData64",
+	.op		= yfs_FS_StoreData64,
+	.deliver	= yfs_deliver_status_and_volsync,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Store a set of pages to a large file.
+ */
+void yfs_fs_store_data(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+	_debug("size %llx, at %llx, i_size %llx",
+	       (unsigned long long)op->store.size,
+	       (unsigned long long)op->store.pos,
+	       (unsigned long long)op->store.i_size);
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreData64,
+				   sizeof(__be32) +
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSStoreStatus) +
+				   sizeof(struct yfs_xdr_u64) * 3,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	call->write_iter = op->store.write_iter;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	bp = xdr_encode_YFSStoreStatus(bp, NULL, &op->mtime);
+	bp = xdr_encode_u64(bp, op->store.pos);
+	bp = xdr_encode_u64(bp, op->store.size);
+	bp = xdr_encode_u64(bp, op->store.i_size);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * YFS.StoreStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSStoreStatus = {
+	.name		= "YFS.StoreStatus",
+	.op		= yfs_FS_StoreStatus,
+	.deliver	= yfs_deliver_status_and_volsync,
+	.destructor	= afs_flat_call_destructor,
+};
+
+static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = {
+	.name		= "YFS.StoreData64",
+	.op		= yfs_FS_StoreData64,
+	.deliver	= yfs_deliver_status_and_volsync,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 rather than
+ * YFS.StoreStatus so as to alter the file size also.
+ */
+static void yfs_fs_setattr_size(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	struct iattr *attr = op->setattr.attr;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreData64_as_Status,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSStoreStatus) +
+				   sizeof(struct yfs_xdr_u64) * 3,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	bp = xdr_encode_YFS_StoreStatus(bp, attr);
+	bp = xdr_encode_u64(bp, attr->ia_size);	/* position of start of write */
+	bp = xdr_encode_u64(bp, 0);		/* size of write */
+	bp = xdr_encode_u64(bp, attr->ia_size);	/* new file length */
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 if there's a change in
+ * file size, and YFS.StoreStatus otherwise.
+ */
+void yfs_fs_setattr(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	struct iattr *attr = op->setattr.attr;
+	__be32 *bp;
+
+	if (attr->ia_valid & ATTR_SIZE)
+		return yfs_fs_setattr_size(op);
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreStatus,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSStoreStatus),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSTORESTATUS);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	bp = xdr_encode_YFS_StoreStatus(bp, attr);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to a YFS.GetVolumeStatus operation.
+ */
+static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	const __be32 *bp;
+	char *p;
+	u32 size;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		call->unmarshall++;
+		afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus));
+		fallthrough;
+
+		/* extract the returned status record */
+	case 1:
+		_debug("extract status");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		xdr_decode_YFSFetchVolumeStatus(&bp, &op->volstatus.vs);
+		call->unmarshall++;
+		afs_extract_to_tmp(call);
+		fallthrough;
+
+		/* extract the volume name length */
+	case 2:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->count = ntohl(call->tmp);
+		_debug("volname length: %u", call->count);
+		if (call->count >= AFSNAMEMAX)
+			return afs_protocol_error(call, afs_eproto_volname_len);
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_to_buf(call, size);
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the volume name */
+	case 3:
+		_debug("extract volname");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		p = call->buffer;
+		p[call->count] = 0;
+		_debug("volname '%s'", p);
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the offline message length */
+	case 4:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->count = ntohl(call->tmp);
+		_debug("offline msg length: %u", call->count);
+		if (call->count >= AFSNAMEMAX)
+			return afs_protocol_error(call, afs_eproto_offline_msg_len);
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_to_buf(call, size);
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the offline message */
+	case 5:
+		_debug("extract offline");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		p = call->buffer;
+		p[call->count] = 0;
+		_debug("offline '%s'", p);
+
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the message of the day length */
+	case 6:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->count = ntohl(call->tmp);
+		_debug("motd length: %u", call->count);
+		if (call->count >= AFSNAMEMAX)
+			return afs_protocol_error(call, afs_eproto_motd_len);
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_to_buf(call, size);
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the message of the day */
+	case 7:
+		_debug("extract motd");
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		p = call->buffer;
+		p[call->count] = 0;
+		_debug("motd '%s'", p);
+
+		call->unmarshall++;
+		fallthrough;
+
+	case 8:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.GetVolumeStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSGetVolumeStatus = {
+	.name		= "YFS.GetVolumeStatus",
+	.op		= yfs_FS_GetVolumeStatus,
+	.deliver	= yfs_deliver_fs_get_volume_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * fetch the status of a volume
+ */
+void yfs_fs_get_volume_status(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSGetVolumeStatus,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_u64),
+				   max_t(size_t,
+					 sizeof(struct yfs_xdr_YFSFetchVolumeStatus) +
+					 sizeof(__be32),
+					 AFSOPAQUEMAX + 1));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSGETVOLUMESTATUS);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_u64(bp, vp->fid.vid);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * YFS.SetLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSSetLock = {
+	.name		= "YFS.SetLock",
+	.op		= yfs_FS_SetLock,
+	.deliver	= yfs_deliver_status_and_volsync,
+	.done		= afs_lock_op_done,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ExtendLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSExtendLock = {
+	.name		= "YFS.ExtendLock",
+	.op		= yfs_FS_ExtendLock,
+	.deliver	= yfs_deliver_status_and_volsync,
+	.done		= afs_lock_op_done,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ReleaseLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSReleaseLock = {
+	.name		= "YFS.ReleaseLock",
+	.op		= yfs_FS_ReleaseLock,
+	.deliver	= yfs_deliver_status_and_volsync,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Set a lock on a file
+ */
+void yfs_fs_set_lock(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSSetLock,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(__be32),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSETLOCK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	bp = xdr_encode_u32(bp, op->lock.type);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_calli(call, &vp->fid, op->lock.type);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * extend a lock on a file
+ */
+void yfs_fs_extend_lock(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSExtendLock,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSEXTENDLOCK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * release a lock on a file
+ */
+void yfs_fs_release_lock(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSReleaseLock,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSRELEASELOCK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver a reply to YFS.FetchStatus
+ */
+static int yfs_deliver_fs_fetch_status(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+	const __be32 *bp;
+	int ret;
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+	xdr_decode_YFSCallBack(&bp, call, &vp->scb);
+	xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.FetchStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchStatus = {
+	.name		= "YFS.FetchStatus",
+	.op		= yfs_FS_FetchStatus,
+	.deliver	= yfs_deliver_fs_fetch_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for a fid without needing a vnode handle.
+ */
+void yfs_fs_fetch_status(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchStatus,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSCallBack) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSFETCHSTATUS);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to an YFS.InlineBulkStatus call
+ */
+static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_status_cb *scb;
+	const __be32 *bp;
+	u32 tmp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+		/* Extract the file status count and array in two steps */
+	case 1:
+		_debug("extract status count");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		tmp = ntohl(call->tmp);
+		_debug("status count: %u/%u", tmp, op->nr_files);
+		if (tmp != op->nr_files)
+			return afs_protocol_error(call, afs_eproto_ibulkst_count);
+
+		call->count = 0;
+		call->unmarshall++;
+	more_counts:
+		afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus));
+		fallthrough;
+
+	case 2:
+		_debug("extract status array %u", call->count);
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		switch (call->count) {
+		case 0:
+			scb = &op->file[0].scb;
+			break;
+		case 1:
+			scb = &op->file[1].scb;
+			break;
+		default:
+			scb = &op->more_files[call->count - 2].scb;
+			break;
+		}
+
+		bp = call->buffer;
+		xdr_decode_YFSFetchStatus(&bp, call, scb);
+
+		call->count++;
+		if (call->count < op->nr_files)
+			goto more_counts;
+
+		call->count = 0;
+		call->unmarshall++;
+		afs_extract_to_tmp(call);
+		fallthrough;
+
+		/* Extract the callback count and array in two steps */
+	case 3:
+		_debug("extract CB count");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		tmp = ntohl(call->tmp);
+		_debug("CB count: %u", tmp);
+		if (tmp != op->nr_files)
+			return afs_protocol_error(call, afs_eproto_ibulkst_cb_count);
+		call->count = 0;
+		call->unmarshall++;
+	more_cbs:
+		afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack));
+		fallthrough;
+
+	case 4:
+		_debug("extract CB array");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		_debug("unmarshall CB array");
+		switch (call->count) {
+		case 0:
+			scb = &op->file[0].scb;
+			break;
+		case 1:
+			scb = &op->file[1].scb;
+			break;
+		default:
+			scb = &op->more_files[call->count - 2].scb;
+			break;
+		}
+
+		bp = call->buffer;
+		xdr_decode_YFSCallBack(&bp, call, scb);
+		call->count++;
+		if (call->count < op->nr_files)
+			goto more_cbs;
+
+		afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync));
+		call->unmarshall++;
+		fallthrough;
+
+	case 5:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+		call->unmarshall++;
+		fallthrough;
+
+	case 6:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.InlineBulkStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSInlineBulkStatus = {
+	.name		= "YFS.InlineBulkStatus",
+	.op		= yfs_FS_InlineBulkStatus,
+	.deliver	= yfs_deliver_fs_inline_bulk_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for up to 1024 files
+ */
+void yfs_fs_inline_bulk_status(struct afs_operation *op)
+{
+	struct afs_vnode_param *dvp = &op->file[0];
+	struct afs_vnode_param *vp = &op->file[1];
+	struct afs_call *call;
+	__be32 *bp;
+	int i;
+
+	_enter(",%x,{%llx:%llu},%u",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode, op->nr_files);
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSInlineBulkStatus,
+				   sizeof(__be32) +
+				   sizeof(__be32) +
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_YFSFid) * op->nr_files,
+				   sizeof(struct yfs_xdr_YFSFetchStatus));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSINLINEBULKSTATUS);
+	bp = xdr_encode_u32(bp, 0); /* RPCFlags */
+	bp = xdr_encode_u32(bp, op->nr_files);
+	bp = xdr_encode_YFSFid(bp, &dvp->fid);
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	for (i = 0; i < op->nr_files - 2; i++)
+		bp = xdr_encode_YFSFid(bp, &op->more_files[i].fid);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_NOFS);
+}
+
+/*
+ * Deliver reply data to an YFS.FetchOpaqueACL.
+ */
+static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
+{
+	struct afs_operation *op = call->op;
+	struct afs_vnode_param *vp = &op->file[0];
+	struct yfs_acl *yacl = op->yacl;
+	struct afs_acl *acl;
+	const __be32 *bp;
+	unsigned int size;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+		/* Extract the file ACL length */
+	case 1:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		size = call->count2 = ntohl(call->tmp);
+		size = round_up(size, 4);
+
+		if (yacl->flags & YFS_ACL_WANT_ACL) {
+			acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+			if (!acl)
+				return -ENOMEM;
+			yacl->acl = acl;
+			acl->size = call->count2;
+			afs_extract_begin(call, acl->data, size);
+		} else {
+			afs_extract_discard(call, size);
+		}
+		call->unmarshall++;
+		fallthrough;
+
+		/* Extract the file ACL */
+	case 2:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+		fallthrough;
+
+		/* Extract the volume ACL length */
+	case 3:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		size = call->count2 = ntohl(call->tmp);
+		size = round_up(size, 4);
+
+		if (yacl->flags & YFS_ACL_WANT_VOL_ACL) {
+			acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
+			if (!acl)
+				return -ENOMEM;
+			yacl->vol_acl = acl;
+			acl->size = call->count2;
+			afs_extract_begin(call, acl->data, size);
+		} else {
+			afs_extract_discard(call, size);
+		}
+		call->unmarshall++;
+		fallthrough;
+
+		/* Extract the volume ACL */
+	case 4:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		afs_extract_to_buf(call,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+		call->unmarshall++;
+		fallthrough;
+
+		/* extract the metadata */
+	case 5:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		yacl->inherit_flag = ntohl(*bp++);
+		yacl->num_cleaned = ntohl(*bp++);
+		xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+		xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+		call->unmarshall++;
+		fallthrough;
+
+	case 6:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+void yfs_free_opaque_acl(struct yfs_acl *yacl)
+{
+	if (yacl) {
+		kfree(yacl->acl);
+		kfree(yacl->vol_acl);
+		kfree(yacl);
+	}
+}
+
+/*
+ * YFS.FetchOpaqueACL operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchOpaqueACL = {
+	.name		= "YFS.FetchOpaqueACL",
+	.op		= yfs_FS_FetchOpaqueACL,
+	.deliver	= yfs_deliver_fs_fetch_opaque_acl,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the YFS advanced ACLs for a file.
+ */
+void yfs_fs_fetch_opaque_acl(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchOpaqueACL,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSFETCHOPAQUEACL);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_KERNEL);
+}
+
+/*
+ * YFS.StoreOpaqueACL2 operation type
+ */
+static const struct afs_call_type yfs_RXYFSStoreOpaqueACL2 = {
+	.name		= "YFS.StoreOpaqueACL2",
+	.op		= yfs_FS_StoreOpaqueACL2,
+	.deliver	= yfs_deliver_status_and_volsync,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the YFS ACL for a file.
+ */
+void yfs_fs_store_opaque_acl2(struct afs_operation *op)
+{
+	struct afs_vnode_param *vp = &op->file[0];
+	struct afs_call *call;
+	struct afs_acl *acl = op->acl;
+	size_t size;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(op->key), vp->fid.vid, vp->fid.vnode);
+
+	size = round_up(acl->size, 4);
+	call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreOpaqueACL2,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(__be32) + size,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return afs_op_nomem(op);
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSTOREOPAQUEACL2);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vp->fid);
+	bp = xdr_encode_u32(bp, acl->size);
+	memcpy(bp, acl->data, acl->size);
+	if (acl->size != size)
+		memset((void *)bp + acl->size, 0, size - acl->size);
+	bp += size / sizeof(__be32);
+	yfs_check_req(call, bp);
+
+	call->fid = vp->fid;
+	trace_afs_make_fs_call(call, &vp->fid);
+	afs_make_op_call(op, call, GFP_KERNEL);
+}